GsymReader.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- GsymReader.h ---------------------------------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
  14. #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
  15. #include "llvm/ADT/ArrayRef.h"
  16. #include "llvm/DebugInfo/GSYM/FileEntry.h"
  17. #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
  18. #include "llvm/DebugInfo/GSYM/Header.h"
  19. #include "llvm/DebugInfo/GSYM/LineEntry.h"
  20. #include "llvm/DebugInfo/GSYM/StringTable.h"
  21. #include "llvm/Support/DataExtractor.h"
  22. #include "llvm/Support/Endian.h"
  23. #include "llvm/Support/ErrorOr.h"
  24. #include <inttypes.h>
  25. #include <memory>
  26. #include <stdint.h>
  27. #include <string>
  28. #include <vector>
  29. namespace llvm {
  30. class MemoryBuffer;
  31. class raw_ostream;
  32. namespace gsym {
  33. /// GsymReader is used to read GSYM data from a file or buffer.
  34. ///
  35. /// This class is optimized for very quick lookups when the endianness matches
  36. /// the host system. The Header, address table, address info offsets, and file
  37. /// table is designed to be mmap'ed as read only into memory and used without
  38. /// any parsing needed. If the endianness doesn't match, we swap these objects
  39. /// and tables into GsymReader::SwappedData and then point our header and
  40. /// ArrayRefs to this swapped internal data.
  41. ///
  42. /// GsymReader objects must use one of the static functions to create an
  43. /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
  44. class GsymReader {
  45. GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
  46. llvm::Error parse();
  47. std::unique_ptr<MemoryBuffer> MemBuffer;
  48. StringRef GsymBytes;
  49. llvm::support::endianness Endian;
  50. const Header *Hdr = nullptr;
  51. ArrayRef<uint8_t> AddrOffsets;
  52. ArrayRef<uint32_t> AddrInfoOffsets;
  53. ArrayRef<FileEntry> Files;
  54. StringTable StrTab;
  55. /// When the GSYM file's endianness doesn't match the host system then
  56. /// we must decode all data structures that need to be swapped into
  57. /// local storage and set point the ArrayRef objects above to these swapped
  58. /// copies.
  59. struct SwappedData {
  60. Header Hdr;
  61. std::vector<uint8_t> AddrOffsets;
  62. std::vector<uint32_t> AddrInfoOffsets;
  63. std::vector<FileEntry> Files;
  64. };
  65. std::unique_ptr<SwappedData> Swap;
  66. public:
  67. GsymReader(GsymReader &&RHS);
  68. ~GsymReader();
  69. /// Construct a GsymReader from a file on disk.
  70. ///
  71. /// \param Path The file path the GSYM file to read.
  72. /// \returns An expected GsymReader that contains the object or an error
  73. /// object that indicates reason for failing to read the GSYM.
  74. static llvm::Expected<GsymReader> openFile(StringRef Path);
  75. /// Construct a GsymReader from a buffer.
  76. ///
  77. /// \param Bytes A set of bytes that will be copied and owned by the
  78. /// returned object on success.
  79. /// \returns An expected GsymReader that contains the object or an error
  80. /// object that indicates reason for failing to read the GSYM.
  81. static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
  82. /// Access the GSYM header.
  83. /// \returns A native endian version of the GSYM header.
  84. const Header &getHeader() const;
  85. /// Get the full function info for an address.
  86. ///
  87. /// This should be called when a client will store a copy of the complete
  88. /// FunctionInfo for a given address. For one off lookups, use the lookup()
  89. /// function below.
  90. ///
  91. /// Symbolication server processes might want to parse the entire function
  92. /// info for a given address and cache it if the process stays around to
  93. /// service many symbolication addresses, like for parsing profiling
  94. /// information.
  95. ///
  96. /// \param Addr A virtual address from the orignal object file to lookup.
  97. ///
  98. /// \returns An expected FunctionInfo that contains the function info object
  99. /// or an error object that indicates reason for failing to lookup the
  100. /// address.
  101. llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
  102. /// Lookup an address in the a GSYM.
  103. ///
  104. /// Lookup just the information needed for a specific address \a Addr. This
  105. /// function is faster that calling getFunctionInfo() as it will only return
  106. /// information that pertains to \a Addr and allows the parsing to skip any
  107. /// extra information encoded for other addresses. For example the line table
  108. /// parsing can stop when a matching LineEntry has been fouhnd, and the
  109. /// InlineInfo can stop parsing early once a match has been found and also
  110. /// skip information that doesn't match. This avoids memory allocations and
  111. /// is much faster for lookups.
  112. ///
  113. /// \param Addr A virtual address from the orignal object file to lookup.
  114. /// \returns An expected LookupResult that contains only the information
  115. /// needed for the current address, or an error object that indicates reason
  116. /// for failing to lookup the address.
  117. llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
  118. /// Get a string from the string table.
  119. ///
  120. /// \param Offset The string table offset for the string to retrieve.
  121. /// \returns The string from the strin table.
  122. StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
  123. /// Get the a file entry for the suppplied file index.
  124. ///
  125. /// Used to convert any file indexes in the FunctionInfo data back into
  126. /// files. This function can be used for iteration, but is more commonly used
  127. /// for random access when doing lookups.
  128. ///
  129. /// \param Index An index into the file table.
  130. /// \returns An optional FileInfo that will be valid if the file index is
  131. /// valid, or llvm::None if the file index is out of bounds,
  132. Optional<FileEntry> getFile(uint32_t Index) const {
  133. if (Index < Files.size())
  134. return Files[Index];
  135. return llvm::None;
  136. }
  137. /// Dump the entire Gsym data contained in this object.
  138. ///
  139. /// \param OS The output stream to dump to.
  140. void dump(raw_ostream &OS);
  141. /// Dump a FunctionInfo object.
  142. ///
  143. /// This function will convert any string table indexes and file indexes
  144. /// into human readable format.
  145. ///
  146. /// \param OS The output stream to dump to.
  147. ///
  148. /// \param FI The object to dump.
  149. void dump(raw_ostream &OS, const FunctionInfo &FI);
  150. /// Dump a LineTable object.
  151. ///
  152. /// This function will convert any string table indexes and file indexes
  153. /// into human readable format.
  154. ///
  155. ///
  156. /// \param OS The output stream to dump to.
  157. ///
  158. /// \param LT The object to dump.
  159. void dump(raw_ostream &OS, const LineTable &LT);
  160. /// Dump a InlineInfo object.
  161. ///
  162. /// This function will convert any string table indexes and file indexes
  163. /// into human readable format.
  164. ///
  165. /// \param OS The output stream to dump to.
  166. ///
  167. /// \param II The object to dump.
  168. ///
  169. /// \param Indent The indentation as number of spaces. Used for recurive
  170. /// dumping.
  171. void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0);
  172. /// Dump a FileEntry object.
  173. ///
  174. /// This function will convert any string table indexes into human readable
  175. /// format.
  176. ///
  177. /// \param OS The output stream to dump to.
  178. ///
  179. /// \param FE The object to dump.
  180. void dump(raw_ostream &OS, Optional<FileEntry> FE);
  181. /// Get the number of addresses in this Gsym file.
  182. uint32_t getNumAddresses() const {
  183. return Hdr->NumAddresses;
  184. }
  185. /// Gets an address from the address table.
  186. ///
  187. /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
  188. ///
  189. /// \param Index A index into the address table.
  190. /// \returns A resolved virtual address for adddress in the address table
  191. /// or llvm::None if Index is out of bounds.
  192. Optional<uint64_t> getAddress(size_t Index) const;
  193. protected:
  194. /// Get an appropriate address info offsets array.
  195. ///
  196. /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
  197. /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
  198. /// internally as a array of bytes that are in the correct endianness. When
  199. /// we access this table we must get an array that matches those sizes. This
  200. /// templatized helper function is used when accessing address offsets in the
  201. /// AddrOffsets member variable.
  202. ///
  203. /// \returns An ArrayRef of an appropriate address offset size.
  204. template <class T> ArrayRef<T>
  205. getAddrOffsets() const {
  206. return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
  207. AddrOffsets.size()/sizeof(T));
  208. }
  209. /// Get an appropriate address from the address table.
  210. ///
  211. /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
  212. /// byte address offsets from the The gsym::Header::BaseAddress. The table is
  213. /// stored internally as a array of bytes that are in the correct endianness.
  214. /// In order to extract an address from the address table we must access the
  215. /// address offset using the correct size and then add it to the BaseAddress
  216. /// in the header.
  217. ///
  218. /// \param Index An index into the AddrOffsets array.
  219. /// \returns An virtual address that matches the original object file for the
  220. /// address as the specified index, or llvm::None if Index is out of bounds.
  221. template <class T> Optional<uint64_t>
  222. addressForIndex(size_t Index) const {
  223. ArrayRef<T> AIO = getAddrOffsets<T>();
  224. if (Index < AIO.size())
  225. return AIO[Index] + Hdr->BaseAddress;
  226. return llvm::None;
  227. }
  228. /// Lookup an address offset in the AddrOffsets table.
  229. ///
  230. /// Given an address offset, look it up using a binary search of the
  231. /// AddrOffsets table.
  232. ///
  233. /// \param AddrOffset An address offset, that has already been computed by
  234. /// subtracting the gsym::Header::BaseAddress.
  235. /// \returns The matching address offset index. This index will be used to
  236. /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
  237. template <class T>
  238. llvm::Optional<uint64_t> getAddressOffsetIndex(const uint64_t AddrOffset) const {
  239. ArrayRef<T> AIO = getAddrOffsets<T>();
  240. const auto Begin = AIO.begin();
  241. const auto End = AIO.end();
  242. auto Iter = std::lower_bound(Begin, End, AddrOffset);
  243. // Watch for addresses that fall between the gsym::Header::BaseAddress and
  244. // the first address offset.
  245. if (Iter == Begin && AddrOffset < *Begin)
  246. return llvm::None;
  247. if (Iter == End || AddrOffset < *Iter)
  248. --Iter;
  249. return std::distance(Begin, Iter);
  250. }
  251. /// Create a GSYM from a memory buffer.
  252. ///
  253. /// Called by both openFile() and copyBuffer(), this function does all of the
  254. /// work of parsing the GSYM file and returning an error.
  255. ///
  256. /// \param MemBuffer A memory buffer that will transfer ownership into the
  257. /// GsymReader.
  258. /// \returns An expected GsymReader that contains the object or an error
  259. /// object that indicates reason for failing to read the GSYM.
  260. static llvm::Expected<llvm::gsym::GsymReader>
  261. create(std::unique_ptr<MemoryBuffer> &MemBuffer);
  262. /// Given an address, find the address index.
  263. ///
  264. /// Binary search the address table and find the matching address index.
  265. ///
  266. /// \param Addr A virtual address that matches the original object file
  267. /// to lookup.
  268. /// \returns An index into the address table. This index can be used to
  269. /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
  270. /// Returns an error if the address isn't in the GSYM with details of why.
  271. Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
  272. /// Given an address index, get the offset for the FunctionInfo.
  273. ///
  274. /// Looking up an address is done by finding the corresponding address
  275. /// index for the address. This index is then used to get the offset of the
  276. /// FunctionInfo data that we will decode using this function.
  277. ///
  278. /// \param Index An index into the address table.
  279. /// \returns An optional GSYM data offset for the offset of the FunctionInfo
  280. /// that needs to be decoded.
  281. Optional<uint64_t> getAddressInfoOffset(size_t Index) const;
  282. };
  283. } // namespace gsym
  284. } // namespace llvm
  285. #endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
  286. #ifdef __GNUC__
  287. #pragma GCC diagnostic pop
  288. #endif