GsymReader.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- GsymReader.h ---------------------------------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
  14. #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
  15. #include "llvm/ADT/ArrayRef.h"
  16. #include "llvm/DebugInfo/GSYM/FileEntry.h"
  17. #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
  18. #include "llvm/DebugInfo/GSYM/Header.h"
  19. #include "llvm/DebugInfo/GSYM/LineEntry.h"
  20. #include "llvm/DebugInfo/GSYM/StringTable.h"
  21. #include "llvm/Support/DataExtractor.h"
  22. #include "llvm/Support/Endian.h"
  23. #include "llvm/Support/ErrorOr.h"
  24. #include <inttypes.h>
  25. #include <memory>
  26. #include <stdint.h>
  27. #include <vector>
  28. namespace llvm {
  29. class MemoryBuffer;
  30. class raw_ostream;
  31. namespace gsym {
  32. /// GsymReader is used to read GSYM data from a file or buffer.
  33. ///
  34. /// This class is optimized for very quick lookups when the endianness matches
  35. /// the host system. The Header, address table, address info offsets, and file
  36. /// table is designed to be mmap'ed as read only into memory and used without
  37. /// any parsing needed. If the endianness doesn't match, we swap these objects
  38. /// and tables into GsymReader::SwappedData and then point our header and
  39. /// ArrayRefs to this swapped internal data.
  40. ///
  41. /// GsymReader objects must use one of the static functions to create an
  42. /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
  43. class GsymReader {
  44. GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
  45. llvm::Error parse();
  46. std::unique_ptr<MemoryBuffer> MemBuffer;
  47. StringRef GsymBytes;
  48. llvm::support::endianness Endian;
  49. const Header *Hdr = nullptr;
  50. ArrayRef<uint8_t> AddrOffsets;
  51. ArrayRef<uint32_t> AddrInfoOffsets;
  52. ArrayRef<FileEntry> Files;
  53. StringTable StrTab;
  54. /// When the GSYM file's endianness doesn't match the host system then
  55. /// we must decode all data structures that need to be swapped into
  56. /// local storage and set point the ArrayRef objects above to these swapped
  57. /// copies.
  58. struct SwappedData {
  59. Header Hdr;
  60. std::vector<uint8_t> AddrOffsets;
  61. std::vector<uint32_t> AddrInfoOffsets;
  62. std::vector<FileEntry> Files;
  63. };
  64. std::unique_ptr<SwappedData> Swap;
  65. public:
  66. GsymReader(GsymReader &&RHS);
  67. ~GsymReader();
  68. /// Construct a GsymReader from a file on disk.
  69. ///
  70. /// \param Path The file path the GSYM file to read.
  71. /// \returns An expected GsymReader that contains the object or an error
  72. /// object that indicates reason for failing to read the GSYM.
  73. static llvm::Expected<GsymReader> openFile(StringRef Path);
  74. /// Construct a GsymReader from a buffer.
  75. ///
  76. /// \param Bytes A set of bytes that will be copied and owned by the
  77. /// returned object on success.
  78. /// \returns An expected GsymReader that contains the object or an error
  79. /// object that indicates reason for failing to read the GSYM.
  80. static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
  81. /// Access the GSYM header.
  82. /// \returns A native endian version of the GSYM header.
  83. const Header &getHeader() const;
  84. /// Get the full function info for an address.
  85. ///
  86. /// This should be called when a client will store a copy of the complete
  87. /// FunctionInfo for a given address. For one off lookups, use the lookup()
  88. /// function below.
  89. ///
  90. /// Symbolication server processes might want to parse the entire function
  91. /// info for a given address and cache it if the process stays around to
  92. /// service many symbolication addresses, like for parsing profiling
  93. /// information.
  94. ///
  95. /// \param Addr A virtual address from the orignal object file to lookup.
  96. ///
  97. /// \returns An expected FunctionInfo that contains the function info object
  98. /// or an error object that indicates reason for failing to lookup the
  99. /// address.
  100. llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
  101. /// Lookup an address in the a GSYM.
  102. ///
  103. /// Lookup just the information needed for a specific address \a Addr. This
  104. /// function is faster that calling getFunctionInfo() as it will only return
  105. /// information that pertains to \a Addr and allows the parsing to skip any
  106. /// extra information encoded for other addresses. For example the line table
  107. /// parsing can stop when a matching LineEntry has been fouhnd, and the
  108. /// InlineInfo can stop parsing early once a match has been found and also
  109. /// skip information that doesn't match. This avoids memory allocations and
  110. /// is much faster for lookups.
  111. ///
  112. /// \param Addr A virtual address from the orignal object file to lookup.
  113. /// \returns An expected LookupResult that contains only the information
  114. /// needed for the current address, or an error object that indicates reason
  115. /// for failing to lookup the address.
  116. llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
  117. /// Get a string from the string table.
  118. ///
  119. /// \param Offset The string table offset for the string to retrieve.
  120. /// \returns The string from the strin table.
  121. StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
  122. /// Get the a file entry for the suppplied file index.
  123. ///
  124. /// Used to convert any file indexes in the FunctionInfo data back into
  125. /// files. This function can be used for iteration, but is more commonly used
  126. /// for random access when doing lookups.
  127. ///
  128. /// \param Index An index into the file table.
  129. /// \returns An optional FileInfo that will be valid if the file index is
  130. /// valid, or std::nullopt if the file index is out of bounds,
  131. std::optional<FileEntry> getFile(uint32_t Index) const {
  132. if (Index < Files.size())
  133. return Files[Index];
  134. return std::nullopt;
  135. }
  136. /// Dump the entire Gsym data contained in this object.
  137. ///
  138. /// \param OS The output stream to dump to.
  139. void dump(raw_ostream &OS);
  140. /// Dump a FunctionInfo object.
  141. ///
  142. /// This function will convert any string table indexes and file indexes
  143. /// into human readable format.
  144. ///
  145. /// \param OS The output stream to dump to.
  146. ///
  147. /// \param FI The object to dump.
  148. void dump(raw_ostream &OS, const FunctionInfo &FI);
  149. /// Dump a LineTable object.
  150. ///
  151. /// This function will convert any string table indexes and file indexes
  152. /// into human readable format.
  153. ///
  154. ///
  155. /// \param OS The output stream to dump to.
  156. ///
  157. /// \param LT The object to dump.
  158. void dump(raw_ostream &OS, const LineTable &LT);
  159. /// Dump a InlineInfo object.
  160. ///
  161. /// This function will convert any string table indexes and file indexes
  162. /// into human readable format.
  163. ///
  164. /// \param OS The output stream to dump to.
  165. ///
  166. /// \param II The object to dump.
  167. ///
  168. /// \param Indent The indentation as number of spaces. Used for recurive
  169. /// dumping.
  170. void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0);
  171. /// Dump a FileEntry object.
  172. ///
  173. /// This function will convert any string table indexes into human readable
  174. /// format.
  175. ///
  176. /// \param OS The output stream to dump to.
  177. ///
  178. /// \param FE The object to dump.
  179. void dump(raw_ostream &OS, std::optional<FileEntry> FE);
  180. /// Get the number of addresses in this Gsym file.
  181. uint32_t getNumAddresses() const {
  182. return Hdr->NumAddresses;
  183. }
  184. /// Gets an address from the address table.
  185. ///
  186. /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
  187. ///
  188. /// \param Index A index into the address table.
  189. /// \returns A resolved virtual address for adddress in the address table
  190. /// or std::nullopt if Index is out of bounds.
  191. std::optional<uint64_t> getAddress(size_t Index) const;
  192. protected:
  193. /// Get an appropriate address info offsets array.
  194. ///
  195. /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
  196. /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
  197. /// internally as a array of bytes that are in the correct endianness. When
  198. /// we access this table we must get an array that matches those sizes. This
  199. /// templatized helper function is used when accessing address offsets in the
  200. /// AddrOffsets member variable.
  201. ///
  202. /// \returns An ArrayRef of an appropriate address offset size.
  203. template <class T> ArrayRef<T>
  204. getAddrOffsets() const {
  205. return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
  206. AddrOffsets.size()/sizeof(T));
  207. }
  208. /// Get an appropriate address from the address table.
  209. ///
  210. /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
  211. /// byte address offsets from the The gsym::Header::BaseAddress. The table is
  212. /// stored internally as a array of bytes that are in the correct endianness.
  213. /// In order to extract an address from the address table we must access the
  214. /// address offset using the correct size and then add it to the BaseAddress
  215. /// in the header.
  216. ///
  217. /// \param Index An index into the AddrOffsets array.
  218. /// \returns An virtual address that matches the original object file for the
  219. /// address as the specified index, or std::nullopt if Index is out of bounds.
  220. template <class T>
  221. std::optional<uint64_t> addressForIndex(size_t Index) const {
  222. ArrayRef<T> AIO = getAddrOffsets<T>();
  223. if (Index < AIO.size())
  224. return AIO[Index] + Hdr->BaseAddress;
  225. return std::nullopt;
  226. }
  227. /// Lookup an address offset in the AddrOffsets table.
  228. ///
  229. /// Given an address offset, look it up using a binary search of the
  230. /// AddrOffsets table.
  231. ///
  232. /// \param AddrOffset An address offset, that has already been computed by
  233. /// subtracting the gsym::Header::BaseAddress.
  234. /// \returns The matching address offset index. This index will be used to
  235. /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
  236. template <class T>
  237. std::optional<uint64_t>
  238. getAddressOffsetIndex(const uint64_t AddrOffset) const {
  239. ArrayRef<T> AIO = getAddrOffsets<T>();
  240. const auto Begin = AIO.begin();
  241. const auto End = AIO.end();
  242. auto Iter = std::lower_bound(Begin, End, AddrOffset);
  243. // Watch for addresses that fall between the gsym::Header::BaseAddress and
  244. // the first address offset.
  245. if (Iter == Begin && AddrOffset < *Begin)
  246. return std::nullopt;
  247. if (Iter == End || AddrOffset < *Iter)
  248. --Iter;
  249. return std::distance(Begin, Iter);
  250. }
  251. /// Create a GSYM from a memory buffer.
  252. ///
  253. /// Called by both openFile() and copyBuffer(), this function does all of the
  254. /// work of parsing the GSYM file and returning an error.
  255. ///
  256. /// \param MemBuffer A memory buffer that will transfer ownership into the
  257. /// GsymReader.
  258. /// \returns An expected GsymReader that contains the object or an error
  259. /// object that indicates reason for failing to read the GSYM.
  260. static llvm::Expected<llvm::gsym::GsymReader>
  261. create(std::unique_ptr<MemoryBuffer> &MemBuffer);
  262. /// Given an address, find the address index.
  263. ///
  264. /// Binary search the address table and find the matching address index.
  265. ///
  266. /// \param Addr A virtual address that matches the original object file
  267. /// to lookup.
  268. /// \returns An index into the address table. This index can be used to
  269. /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
  270. /// Returns an error if the address isn't in the GSYM with details of why.
  271. Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
  272. /// Given an address index, get the offset for the FunctionInfo.
  273. ///
  274. /// Looking up an address is done by finding the corresponding address
  275. /// index for the address. This index is then used to get the offset of the
  276. /// FunctionInfo data that we will decode using this function.
  277. ///
  278. /// \param Index An index into the address table.
  279. /// \returns An optional GSYM data offset for the offset of the FunctionInfo
  280. /// that needs to be decoded.
  281. std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
  282. };
  283. } // namespace gsym
  284. } // namespace llvm
  285. #endif // LLVM_DEBUGINFO_GSYM_GSYMREADER_H
  286. #ifdef __GNUC__
  287. #pragma GCC diagnostic pop
  288. #endif