BitcodeReader.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This header defines interfaces to read LLVM bitcode files/streams.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #ifndef LLVM_BITCODE_BITCODEREADER_H
  18. #define LLVM_BITCODE_BITCODEREADER_H
  19. #include "llvm/ADT/ArrayRef.h"
  20. #include "llvm/ADT/StringRef.h"
  21. #include "llvm/Bitstream/BitCodeEnums.h"
  22. #include "llvm/IR/GlobalValue.h"
  23. #include "llvm/Support/Endian.h"
  24. #include "llvm/Support/Error.h"
  25. #include "llvm/Support/ErrorOr.h"
  26. #include "llvm/Support/MemoryBufferRef.h"
  27. #include <cstdint>
  28. #include <memory>
  29. #include <optional>
  30. #include <string>
  31. #include <system_error>
  32. #include <vector>
  33. namespace llvm {
  34. class LLVMContext;
  35. class Module;
  36. class MemoryBuffer;
  37. class Metadata;
  38. class ModuleSummaryIndex;
  39. class Type;
  40. class Value;
  41. // Callback to override the data layout string of an imported bitcode module.
  42. // The first argument is the target triple, the second argument the data layout
  43. // string from the input, or a default string. It will be used if the callback
  44. // returns std::nullopt.
  45. typedef std::function<std::optional<std::string>(StringRef, StringRef)>
  46. DataLayoutCallbackFuncTy;
  47. typedef std::function<Type *(unsigned)> GetTypeByIDTy;
  48. typedef std::function<unsigned(unsigned, unsigned)> GetContainedTypeIDTy;
  49. typedef std::function<void(Value *, unsigned, GetTypeByIDTy,
  50. GetContainedTypeIDTy)>
  51. ValueTypeCallbackTy;
  52. typedef std::function<void(Metadata **, unsigned, GetTypeByIDTy,
  53. GetContainedTypeIDTy)>
  54. MDTypeCallbackTy;
  55. // These functions are for converting Expected/Error values to
  56. // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
  57. // Remove these functions once no longer needed by the C and libLTO APIs.
  58. std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
  59. template <typename T>
  60. ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
  61. if (!Val)
  62. return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
  63. return std::move(*Val);
  64. }
  65. struct ParserCallbacks {
  66. std::optional<DataLayoutCallbackFuncTy> DataLayout;
  67. /// The ValueType callback is called for every function definition or
  68. /// declaration and allows accessing the type information, also behind
  69. /// pointers. This can be useful, when the opaque pointer upgrade cleans all
  70. /// type information behind pointers.
  71. /// The second argument to ValueTypeCallback is the type ID of the
  72. /// function, the two passed functions can be used to extract type
  73. /// information.
  74. std::optional<ValueTypeCallbackTy> ValueType;
  75. /// The MDType callback is called for every value in metadata.
  76. std::optional<MDTypeCallbackTy> MDType;
  77. ParserCallbacks() = default;
  78. explicit ParserCallbacks(DataLayoutCallbackFuncTy DataLayout)
  79. : DataLayout(DataLayout) {}
  80. };
  81. struct BitcodeFileContents;
  82. /// Basic information extracted from a bitcode module to be used for LTO.
  83. struct BitcodeLTOInfo {
  84. bool IsThinLTO;
  85. bool HasSummary;
  86. bool EnableSplitLTOUnit;
  87. };
  88. /// Represents a module in a bitcode file.
  89. class BitcodeModule {
  90. // This covers the identification (if present) and module blocks.
  91. ArrayRef<uint8_t> Buffer;
  92. StringRef ModuleIdentifier;
  93. // The string table used to interpret this module.
  94. StringRef Strtab;
  95. // The bitstream location of the IDENTIFICATION_BLOCK.
  96. uint64_t IdentificationBit;
  97. // The bitstream location of this module's MODULE_BLOCK.
  98. uint64_t ModuleBit;
  99. BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
  100. uint64_t IdentificationBit, uint64_t ModuleBit)
  101. : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
  102. IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
  103. // Calls the ctor.
  104. friend Expected<BitcodeFileContents>
  105. getBitcodeFileContents(MemoryBufferRef Buffer);
  106. Expected<std::unique_ptr<Module>>
  107. getModuleImpl(LLVMContext &Context, bool MaterializeAll,
  108. bool ShouldLazyLoadMetadata, bool IsImporting,
  109. ParserCallbacks Callbacks = {});
  110. public:
  111. StringRef getBuffer() const {
  112. return StringRef((const char *)Buffer.begin(), Buffer.size());
  113. }
  114. StringRef getStrtab() const { return Strtab; }
  115. StringRef getModuleIdentifier() const { return ModuleIdentifier; }
  116. /// Read the bitcode module and prepare for lazy deserialization of function
  117. /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
  118. /// If IsImporting is true, this module is being parsed for ThinLTO
  119. /// importing into another module.
  120. Expected<std::unique_ptr<Module>>
  121. getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
  122. bool IsImporting, ParserCallbacks Callbacks = {});
  123. /// Read the entire bitcode module and return it.
  124. Expected<std::unique_ptr<Module>>
  125. parseModule(LLVMContext &Context, ParserCallbacks Callbacks = {});
  126. /// Returns information about the module to be used for LTO: whether to
  127. /// compile with ThinLTO, and whether it has a summary.
  128. Expected<BitcodeLTOInfo> getLTOInfo();
  129. /// Parse the specified bitcode buffer, returning the module summary index.
  130. Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
  131. /// Parse the specified bitcode buffer and merge its module summary index
  132. /// into CombinedIndex.
  133. Error
  134. readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
  135. uint64_t ModuleId,
  136. std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
  137. };
  138. struct BitcodeFileContents {
  139. std::vector<BitcodeModule> Mods;
  140. StringRef Symtab, StrtabForSymtab;
  141. };
  142. /// Returns the contents of a bitcode file. This includes the raw contents of
  143. /// the symbol table embedded in the bitcode file. Clients which require a
  144. /// symbol table should prefer to use irsymtab::read instead of this function
  145. /// because it creates a reader for the irsymtab and handles upgrading bitcode
  146. /// files without a symbol table or with an old symbol table.
  147. Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
  148. /// Returns a list of modules in the specified bitcode buffer.
  149. Expected<std::vector<BitcodeModule>>
  150. getBitcodeModuleList(MemoryBufferRef Buffer);
  151. /// Read the header of the specified bitcode buffer and prepare for lazy
  152. /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
  153. /// lazily load metadata as well. If IsImporting is true, this module is
  154. /// being parsed for ThinLTO importing into another module.
  155. Expected<std::unique_ptr<Module>>
  156. getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
  157. bool ShouldLazyLoadMetadata = false,
  158. bool IsImporting = false,
  159. ParserCallbacks Callbacks = {});
  160. /// Like getLazyBitcodeModule, except that the module takes ownership of
  161. /// the memory buffer if successful. If successful, this moves Buffer. On
  162. /// error, this *does not* move Buffer. If IsImporting is true, this module is
  163. /// being parsed for ThinLTO importing into another module.
  164. Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
  165. std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
  166. bool ShouldLazyLoadMetadata = false, bool IsImporting = false,
  167. ParserCallbacks Callbacks = {});
  168. /// Read the header of the specified bitcode buffer and extract just the
  169. /// triple information. If successful, this returns a string. On error, this
  170. /// returns "".
  171. Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
  172. /// Return true if \p Buffer contains a bitcode file with ObjC code (category
  173. /// or class) in it.
  174. Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
  175. /// Read the header of the specified bitcode buffer and extract just the
  176. /// producer string information. If successful, this returns a string. On
  177. /// error, this returns "".
  178. Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
  179. /// Read the specified bitcode file, returning the module.
  180. Expected<std::unique_ptr<Module>>
  181. parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context,
  182. ParserCallbacks Callbacks = {});
  183. /// Returns LTO information for the specified bitcode file.
  184. Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
  185. /// Parse the specified bitcode buffer, returning the module summary index.
  186. Expected<std::unique_ptr<ModuleSummaryIndex>>
  187. getModuleSummaryIndex(MemoryBufferRef Buffer);
  188. /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
  189. Error readModuleSummaryIndex(MemoryBufferRef Buffer,
  190. ModuleSummaryIndex &CombinedIndex,
  191. uint64_t ModuleId);
  192. /// Parse the module summary index out of an IR file and return the module
  193. /// summary index object if found, or an empty summary if not. If Path refers
  194. /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
  195. /// this function will return nullptr.
  196. Expected<std::unique_ptr<ModuleSummaryIndex>>
  197. getModuleSummaryIndexForFile(StringRef Path,
  198. bool IgnoreEmptyThinLTOIndexFile = false);
  199. /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
  200. /// for an LLVM IR bitcode wrapper.
  201. inline bool isBitcodeWrapper(const unsigned char *BufPtr,
  202. const unsigned char *BufEnd) {
  203. // See if you can find the hidden message in the magic bytes :-).
  204. // (Hint: it's a little-endian encoding.)
  205. return BufPtr != BufEnd &&
  206. BufPtr[0] == 0xDE &&
  207. BufPtr[1] == 0xC0 &&
  208. BufPtr[2] == 0x17 &&
  209. BufPtr[3] == 0x0B;
  210. }
  211. /// isRawBitcode - Return true if the given bytes are the magic bytes for
  212. /// raw LLVM IR bitcode (without a wrapper).
  213. inline bool isRawBitcode(const unsigned char *BufPtr,
  214. const unsigned char *BufEnd) {
  215. // These bytes sort of have a hidden message, but it's not in
  216. // little-endian this time, and it's a little redundant.
  217. return BufPtr != BufEnd &&
  218. BufPtr[0] == 'B' &&
  219. BufPtr[1] == 'C' &&
  220. BufPtr[2] == 0xc0 &&
  221. BufPtr[3] == 0xde;
  222. }
  223. /// isBitcode - Return true if the given bytes are the magic bytes for
  224. /// LLVM IR bitcode, either with or without a wrapper.
  225. inline bool isBitcode(const unsigned char *BufPtr,
  226. const unsigned char *BufEnd) {
  227. return isBitcodeWrapper(BufPtr, BufEnd) ||
  228. isRawBitcode(BufPtr, BufEnd);
  229. }
  230. /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
  231. /// header for padding or other reasons. The format of this header is:
  232. ///
  233. /// struct bc_header {
  234. /// uint32_t Magic; // 0x0B17C0DE
  235. /// uint32_t Version; // Version, currently always 0.
  236. /// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
  237. /// uint32_t BitcodeSize; // Size of traditional bitcode file.
  238. /// ... potentially other gunk ...
  239. /// };
  240. ///
  241. /// This function is called when we find a file with a matching magic number.
  242. /// In this case, skip down to the subsection of the file that is actually a
  243. /// BC file.
  244. /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
  245. /// contain the whole bitcode file.
  246. inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
  247. const unsigned char *&BufEnd,
  248. bool VerifyBufferSize) {
  249. // Must contain the offset and size field!
  250. if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
  251. return true;
  252. unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
  253. unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
  254. uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
  255. // Verify that Offset+Size fits in the file.
  256. if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
  257. return true;
  258. BufPtr += Offset;
  259. BufEnd = BufPtr+Size;
  260. return false;
  261. }
  262. APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits);
  263. const std::error_category &BitcodeErrorCategory();
  264. enum class BitcodeError { CorruptedBitcode = 1 };
  265. inline std::error_code make_error_code(BitcodeError E) {
  266. return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
  267. }
  268. } // end namespace llvm
  269. namespace std {
  270. template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
  271. } // end namespace std
  272. #endif // LLVM_BITCODE_BITCODEREADER_H
  273. #ifdef __GNUC__
  274. #pragma GCC diagnostic pop
  275. #endif