BitcodeReader.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This header defines interfaces to read LLVM bitcode files/streams.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #ifndef LLVM_BITCODE_BITCODEREADER_H
  18. #define LLVM_BITCODE_BITCODEREADER_H
  19. #include "llvm/ADT/ArrayRef.h"
  20. #include "llvm/ADT/StringRef.h"
  21. #include "llvm/Bitstream/BitCodes.h"
  22. #include "llvm/IR/ModuleSummaryIndex.h"
  23. #include "llvm/Support/Endian.h"
  24. #include "llvm/Support/Error.h"
  25. #include "llvm/Support/ErrorOr.h"
  26. #include "llvm/Support/MemoryBuffer.h"
  27. #include <cstdint>
  28. #include <memory>
  29. #include <string>
  30. #include <system_error>
  31. #include <vector>
  32. namespace llvm {
  33. class LLVMContext;
  34. class Module;
  35. typedef llvm::function_ref<Optional<std::string>(StringRef)>
  36. DataLayoutCallbackTy;
  37. // These functions are for converting Expected/Error values to
  38. // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
  39. // Remove these functions once no longer needed by the C and libLTO APIs.
  40. std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
  41. template <typename T>
  42. ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
  43. if (!Val)
  44. return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
  45. return std::move(*Val);
  46. }
  47. struct BitcodeFileContents;
  48. /// Basic information extracted from a bitcode module to be used for LTO.
  49. struct BitcodeLTOInfo {
  50. bool IsThinLTO;
  51. bool HasSummary;
  52. bool EnableSplitLTOUnit;
  53. };
  54. /// Represents a module in a bitcode file.
  55. class BitcodeModule {
  56. // This covers the identification (if present) and module blocks.
  57. ArrayRef<uint8_t> Buffer;
  58. StringRef ModuleIdentifier;
  59. // The string table used to interpret this module.
  60. StringRef Strtab;
  61. // The bitstream location of the IDENTIFICATION_BLOCK.
  62. uint64_t IdentificationBit;
  63. // The bitstream location of this module's MODULE_BLOCK.
  64. uint64_t ModuleBit;
  65. BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
  66. uint64_t IdentificationBit, uint64_t ModuleBit)
  67. : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
  68. IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
  69. // Calls the ctor.
  70. friend Expected<BitcodeFileContents>
  71. getBitcodeFileContents(MemoryBufferRef Buffer);
  72. Expected<std::unique_ptr<Module>>
  73. getModuleImpl(LLVMContext &Context, bool MaterializeAll,
  74. bool ShouldLazyLoadMetadata, bool IsImporting,
  75. DataLayoutCallbackTy DataLayoutCallback);
  76. public:
  77. StringRef getBuffer() const {
  78. return StringRef((const char *)Buffer.begin(), Buffer.size());
  79. }
  80. StringRef getStrtab() const { return Strtab; }
  81. StringRef getModuleIdentifier() const { return ModuleIdentifier; }
  82. /// Read the bitcode module and prepare for lazy deserialization of function
  83. /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
  84. /// If IsImporting is true, this module is being parsed for ThinLTO
  85. /// importing into another module.
  86. Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
  87. bool ShouldLazyLoadMetadata,
  88. bool IsImporting);
  89. /// Read the entire bitcode module and return it.
  90. Expected<std::unique_ptr<Module>> parseModule(
  91. LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback =
  92. [](StringRef) { return None; });
  93. /// Returns information about the module to be used for LTO: whether to
  94. /// compile with ThinLTO, and whether it has a summary.
  95. Expected<BitcodeLTOInfo> getLTOInfo();
  96. /// Parse the specified bitcode buffer, returning the module summary index.
  97. Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
  98. /// Parse the specified bitcode buffer and merge its module summary index
  99. /// into CombinedIndex.
  100. Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
  101. uint64_t ModuleId);
  102. };
  103. struct BitcodeFileContents {
  104. std::vector<BitcodeModule> Mods;
  105. StringRef Symtab, StrtabForSymtab;
  106. };
  107. /// Returns the contents of a bitcode file. This includes the raw contents of
  108. /// the symbol table embedded in the bitcode file. Clients which require a
  109. /// symbol table should prefer to use irsymtab::read instead of this function
  110. /// because it creates a reader for the irsymtab and handles upgrading bitcode
  111. /// files without a symbol table or with an old symbol table.
  112. Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
  113. /// Returns a list of modules in the specified bitcode buffer.
  114. Expected<std::vector<BitcodeModule>>
  115. getBitcodeModuleList(MemoryBufferRef Buffer);
  116. /// Read the header of the specified bitcode buffer and prepare for lazy
  117. /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
  118. /// lazily load metadata as well. If IsImporting is true, this module is
  119. /// being parsed for ThinLTO importing into another module.
  120. Expected<std::unique_ptr<Module>>
  121. getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
  122. bool ShouldLazyLoadMetadata = false,
  123. bool IsImporting = false);
  124. /// Like getLazyBitcodeModule, except that the module takes ownership of
  125. /// the memory buffer if successful. If successful, this moves Buffer. On
  126. /// error, this *does not* move Buffer. If IsImporting is true, this module is
  127. /// being parsed for ThinLTO importing into another module.
  128. Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
  129. std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
  130. bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
  131. /// Read the header of the specified bitcode buffer and extract just the
  132. /// triple information. If successful, this returns a string. On error, this
  133. /// returns "".
  134. Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
  135. /// Return true if \p Buffer contains a bitcode file with ObjC code (category
  136. /// or class) in it.
  137. Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
  138. /// Read the header of the specified bitcode buffer and extract just the
  139. /// producer string information. If successful, this returns a string. On
  140. /// error, this returns "".
  141. Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
  142. /// Read the specified bitcode file, returning the module.
  143. Expected<std::unique_ptr<Module>> parseBitcodeFile(
  144. MemoryBufferRef Buffer, LLVMContext &Context,
  145. DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
  146. return None;
  147. });
  148. /// Returns LTO information for the specified bitcode file.
  149. Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
  150. /// Parse the specified bitcode buffer, returning the module summary index.
  151. Expected<std::unique_ptr<ModuleSummaryIndex>>
  152. getModuleSummaryIndex(MemoryBufferRef Buffer);
  153. /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
  154. Error readModuleSummaryIndex(MemoryBufferRef Buffer,
  155. ModuleSummaryIndex &CombinedIndex,
  156. uint64_t ModuleId);
  157. /// Parse the module summary index out of an IR file and return the module
  158. /// summary index object if found, or an empty summary if not. If Path refers
  159. /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
  160. /// this function will return nullptr.
  161. Expected<std::unique_ptr<ModuleSummaryIndex>>
  162. getModuleSummaryIndexForFile(StringRef Path,
  163. bool IgnoreEmptyThinLTOIndexFile = false);
  164. /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
  165. /// for an LLVM IR bitcode wrapper.
  166. inline bool isBitcodeWrapper(const unsigned char *BufPtr,
  167. const unsigned char *BufEnd) {
  168. // See if you can find the hidden message in the magic bytes :-).
  169. // (Hint: it's a little-endian encoding.)
  170. return BufPtr != BufEnd &&
  171. BufPtr[0] == 0xDE &&
  172. BufPtr[1] == 0xC0 &&
  173. BufPtr[2] == 0x17 &&
  174. BufPtr[3] == 0x0B;
  175. }
  176. /// isRawBitcode - Return true if the given bytes are the magic bytes for
  177. /// raw LLVM IR bitcode (without a wrapper).
  178. inline bool isRawBitcode(const unsigned char *BufPtr,
  179. const unsigned char *BufEnd) {
  180. // These bytes sort of have a hidden message, but it's not in
  181. // little-endian this time, and it's a little redundant.
  182. return BufPtr != BufEnd &&
  183. BufPtr[0] == 'B' &&
  184. BufPtr[1] == 'C' &&
  185. BufPtr[2] == 0xc0 &&
  186. BufPtr[3] == 0xde;
  187. }
  188. /// isBitcode - Return true if the given bytes are the magic bytes for
  189. /// LLVM IR bitcode, either with or without a wrapper.
  190. inline bool isBitcode(const unsigned char *BufPtr,
  191. const unsigned char *BufEnd) {
  192. return isBitcodeWrapper(BufPtr, BufEnd) ||
  193. isRawBitcode(BufPtr, BufEnd);
  194. }
  195. /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
  196. /// header for padding or other reasons. The format of this header is:
  197. ///
  198. /// struct bc_header {
  199. /// uint32_t Magic; // 0x0B17C0DE
  200. /// uint32_t Version; // Version, currently always 0.
  201. /// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
  202. /// uint32_t BitcodeSize; // Size of traditional bitcode file.
  203. /// ... potentially other gunk ...
  204. /// };
  205. ///
  206. /// This function is called when we find a file with a matching magic number.
  207. /// In this case, skip down to the subsection of the file that is actually a
  208. /// BC file.
  209. /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
  210. /// contain the whole bitcode file.
  211. inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
  212. const unsigned char *&BufEnd,
  213. bool VerifyBufferSize) {
  214. // Must contain the offset and size field!
  215. if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
  216. return true;
  217. unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
  218. unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
  219. uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
  220. // Verify that Offset+Size fits in the file.
  221. if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
  222. return true;
  223. BufPtr += Offset;
  224. BufEnd = BufPtr+Size;
  225. return false;
  226. }
  227. APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits);
  228. const std::error_category &BitcodeErrorCategory();
  229. enum class BitcodeError { CorruptedBitcode = 1 };
  230. inline std::error_code make_error_code(BitcodeError E) {
  231. return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
  232. }
  233. } // end namespace llvm
  234. namespace std {
  235. template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
  236. } // end namespace std
  237. #endif // LLVM_BITCODE_BITCODEREADER_H
  238. #ifdef __GNUC__
  239. #pragma GCC diagnostic pop
  240. #endif