GsymCreator.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. //===- GsymCreator.cpp ----------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //===----------------------------------------------------------------------===//
  7. #include "llvm/DebugInfo/GSYM/GsymCreator.h"
  8. #include "llvm/DebugInfo/GSYM/FileWriter.h"
  9. #include "llvm/DebugInfo/GSYM/Header.h"
  10. #include "llvm/DebugInfo/GSYM/LineTable.h"
  11. #include "llvm/MC/StringTableBuilder.h"
  12. #include "llvm/Support/raw_ostream.h"
  13. #include <algorithm>
  14. #include <cassert>
  15. #include <functional>
  16. #include <vector>
  17. using namespace llvm;
  18. using namespace gsym;
  19. GsymCreator::GsymCreator(bool Quiet)
  20. : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
  21. insertFile(StringRef());
  22. }
  23. uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
  24. llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
  25. llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
  26. // We must insert the strings first, then call the FileEntry constructor.
  27. // If we inline the insertString() function call into the constructor, the
  28. // call order is undefined due to parameter lists not having any ordering
  29. // requirements.
  30. const uint32_t Dir = insertString(directory);
  31. const uint32_t Base = insertString(filename);
  32. FileEntry FE(Dir, Base);
  33. std::lock_guard<std::mutex> Guard(Mutex);
  34. const auto NextIndex = Files.size();
  35. // Find FE in hash map and insert if not present.
  36. auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
  37. if (R.second)
  38. Files.emplace_back(FE);
  39. return R.first->second;
  40. }
  41. llvm::Error GsymCreator::save(StringRef Path,
  42. llvm::support::endianness ByteOrder) const {
  43. std::error_code EC;
  44. raw_fd_ostream OutStrm(Path, EC);
  45. if (EC)
  46. return llvm::errorCodeToError(EC);
  47. FileWriter O(OutStrm, ByteOrder);
  48. return encode(O);
  49. }
  50. llvm::Error GsymCreator::encode(FileWriter &O) const {
  51. std::lock_guard<std::mutex> Guard(Mutex);
  52. if (Funcs.empty())
  53. return createStringError(std::errc::invalid_argument,
  54. "no functions to encode");
  55. if (!Finalized)
  56. return createStringError(std::errc::invalid_argument,
  57. "GsymCreator wasn't finalized prior to encoding");
  58. if (Funcs.size() > UINT32_MAX)
  59. return createStringError(std::errc::invalid_argument,
  60. "too many FunctionInfos");
  61. const uint64_t MinAddr =
  62. BaseAddress ? *BaseAddress : Funcs.front().startAddress();
  63. const uint64_t MaxAddr = Funcs.back().startAddress();
  64. const uint64_t AddrDelta = MaxAddr - MinAddr;
  65. Header Hdr;
  66. Hdr.Magic = GSYM_MAGIC;
  67. Hdr.Version = GSYM_VERSION;
  68. Hdr.AddrOffSize = 0;
  69. Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
  70. Hdr.BaseAddress = MinAddr;
  71. Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
  72. Hdr.StrtabOffset = 0; // We will fix this up later.
  73. Hdr.StrtabSize = 0; // We will fix this up later.
  74. memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
  75. if (UUID.size() > sizeof(Hdr.UUID))
  76. return createStringError(std::errc::invalid_argument,
  77. "invalid UUID size %u", (uint32_t)UUID.size());
  78. // Set the address offset size correctly in the GSYM header.
  79. if (AddrDelta <= UINT8_MAX)
  80. Hdr.AddrOffSize = 1;
  81. else if (AddrDelta <= UINT16_MAX)
  82. Hdr.AddrOffSize = 2;
  83. else if (AddrDelta <= UINT32_MAX)
  84. Hdr.AddrOffSize = 4;
  85. else
  86. Hdr.AddrOffSize = 8;
  87. // Copy the UUID value if we have one.
  88. if (UUID.size() > 0)
  89. memcpy(Hdr.UUID, UUID.data(), UUID.size());
  90. // Write out the header.
  91. llvm::Error Err = Hdr.encode(O);
  92. if (Err)
  93. return Err;
  94. // Write out the address offsets.
  95. O.alignTo(Hdr.AddrOffSize);
  96. for (const auto &FuncInfo : Funcs) {
  97. uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
  98. switch (Hdr.AddrOffSize) {
  99. case 1:
  100. O.writeU8(static_cast<uint8_t>(AddrOffset));
  101. break;
  102. case 2:
  103. O.writeU16(static_cast<uint16_t>(AddrOffset));
  104. break;
  105. case 4:
  106. O.writeU32(static_cast<uint32_t>(AddrOffset));
  107. break;
  108. case 8:
  109. O.writeU64(AddrOffset);
  110. break;
  111. }
  112. }
  113. // Write out all zeros for the AddrInfoOffsets.
  114. O.alignTo(4);
  115. const off_t AddrInfoOffsetsOffset = O.tell();
  116. for (size_t i = 0, n = Funcs.size(); i < n; ++i)
  117. O.writeU32(0);
  118. // Write out the file table
  119. O.alignTo(4);
  120. assert(!Files.empty());
  121. assert(Files[0].Dir == 0);
  122. assert(Files[0].Base == 0);
  123. size_t NumFiles = Files.size();
  124. if (NumFiles > UINT32_MAX)
  125. return createStringError(std::errc::invalid_argument, "too many files");
  126. O.writeU32(static_cast<uint32_t>(NumFiles));
  127. for (auto File : Files) {
  128. O.writeU32(File.Dir);
  129. O.writeU32(File.Base);
  130. }
  131. // Write out the sting table.
  132. const off_t StrtabOffset = O.tell();
  133. StrTab.write(O.get_stream());
  134. const off_t StrtabSize = O.tell() - StrtabOffset;
  135. std::vector<uint32_t> AddrInfoOffsets;
  136. // Write out the address infos for each function info.
  137. for (const auto &FuncInfo : Funcs) {
  138. if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
  139. AddrInfoOffsets.push_back(OffsetOrErr.get());
  140. else
  141. return OffsetOrErr.takeError();
  142. }
  143. // Fixup the string table offset and size in the header
  144. O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
  145. O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
  146. // Fixup all address info offsets
  147. uint64_t Offset = 0;
  148. for (auto AddrInfoOffset : AddrInfoOffsets) {
  149. O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
  150. Offset += 4;
  151. }
  152. return ErrorSuccess();
  153. }
  154. // Similar to std::remove_if, but the predicate is binary and it is passed both
  155. // the previous and the current element.
  156. template <class ForwardIt, class BinaryPredicate>
  157. static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt,
  158. BinaryPredicate Pred) {
  159. if (FirstIt != LastIt) {
  160. auto PrevIt = FirstIt++;
  161. FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) {
  162. return Pred(*PrevIt++, Curr);
  163. });
  164. if (FirstIt != LastIt)
  165. for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;)
  166. if (!Pred(*PrevIt, *CurrIt)) {
  167. PrevIt = FirstIt;
  168. *FirstIt++ = std::move(*CurrIt);
  169. }
  170. }
  171. return FirstIt;
  172. }
  173. llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
  174. std::lock_guard<std::mutex> Guard(Mutex);
  175. if (Finalized)
  176. return createStringError(std::errc::invalid_argument, "already finalized");
  177. Finalized = true;
  178. // Sort function infos so we can emit sorted functions.
  179. llvm::sort(Funcs);
  180. // Don't let the string table indexes change by finalizing in order.
  181. StrTab.finalizeInOrder();
  182. // Remove duplicates function infos that have both entries from debug info
  183. // (DWARF or Breakpad) and entries from the SymbolTable.
  184. //
  185. // Also handle overlapping function. Usually there shouldn't be any, but they
  186. // can and do happen in some rare cases.
  187. //
  188. // (a) (b) (c)
  189. // ^ ^ ^ ^
  190. // |X |Y |X ^ |X
  191. // | | | |Y | ^
  192. // | | | v v |Y
  193. // v v v v
  194. //
  195. // In (a) and (b), Y is ignored and X will be reported for the full range.
  196. // In (c), both functions will be included in the result and lookups for an
  197. // address in the intersection will return Y because of binary search.
  198. //
  199. // Note that in case of (b), we cannot include Y in the result because then
  200. // we wouldn't find any function for range (end of Y, end of X)
  201. // with binary search
  202. auto NumBefore = Funcs.size();
  203. Funcs.erase(
  204. removeIfBinary(Funcs.begin(), Funcs.end(),
  205. [&](const auto &Prev, const auto &Curr) {
  206. // Empty ranges won't intersect, but we still need to
  207. // catch the case where we have multiple symbols at the
  208. // same address and coalesce them.
  209. const bool ranges_equal = Prev.Range == Curr.Range;
  210. if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
  211. // Overlapping ranges or empty identical ranges.
  212. if (ranges_equal) {
  213. // Same address range. Check if one is from debug
  214. // info and the other is from a symbol table. If
  215. // so, then keep the one with debug info. Our
  216. // sorting guarantees that entries with matching
  217. // address ranges that have debug info are last in
  218. // the sort.
  219. if (Prev == Curr) {
  220. // FunctionInfo entries match exactly (range,
  221. // lines, inlines)
  222. // We used to output a warning here, but this was
  223. // so frequent on some binaries, in particular
  224. // when those were built with GCC, that it slowed
  225. // down processing extremely.
  226. return true;
  227. } else {
  228. if (!Prev.hasRichInfo() && Curr.hasRichInfo()) {
  229. // Same address range, one with no debug info
  230. // (symbol) and the next with debug info. Keep
  231. // the latter.
  232. return true;
  233. } else {
  234. if (!Quiet) {
  235. OS << "warning: same address range contains "
  236. "different debug "
  237. << "info. Removing:\n"
  238. << Prev << "\nIn favor of this one:\n"
  239. << Curr << "\n";
  240. }
  241. return true;
  242. }
  243. }
  244. } else {
  245. if (!Quiet) { // print warnings about overlaps
  246. OS << "warning: function ranges overlap:\n"
  247. << Prev << "\n"
  248. << Curr << "\n";
  249. }
  250. }
  251. } else if (Prev.Range.size() == 0 &&
  252. Curr.Range.contains(Prev.Range.Start)) {
  253. if (!Quiet) {
  254. OS << "warning: removing symbol:\n"
  255. << Prev << "\nKeeping:\n"
  256. << Curr << "\n";
  257. }
  258. return true;
  259. }
  260. return false;
  261. }),
  262. Funcs.end());
  263. // If our last function info entry doesn't have a size and if we have valid
  264. // text ranges, we should set the size of the last entry since any search for
  265. // a high address might match our last entry. By fixing up this size, we can
  266. // help ensure we don't cause lookups to always return the last symbol that
  267. // has no size when doing lookups.
  268. if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
  269. if (auto Range =
  270. ValidTextRanges->getRangeThatContains(Funcs.back().Range.Start)) {
  271. Funcs.back().Range.End = Range->End;
  272. }
  273. }
  274. OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
  275. << Funcs.size() << " total\n";
  276. return Error::success();
  277. }
  278. uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
  279. if (S.empty())
  280. return 0;
  281. // The hash can be calculated outside the lock.
  282. CachedHashStringRef CHStr(S);
  283. std::lock_guard<std::mutex> Guard(Mutex);
  284. if (Copy) {
  285. // We need to provide backing storage for the string if requested
  286. // since StringTableBuilder stores references to strings. Any string
  287. // that comes from a section in an object file doesn't need to be
  288. // copied, but any string created by code will need to be copied.
  289. // This allows GsymCreator to be really fast when parsing DWARF and
  290. // other object files as most strings don't need to be copied.
  291. if (!StrTab.contains(CHStr))
  292. CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
  293. CHStr.hash()};
  294. }
  295. return StrTab.add(CHStr);
  296. }
  297. void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
  298. std::lock_guard<std::mutex> Guard(Mutex);
  299. Ranges.insert(FI.Range);
  300. Funcs.emplace_back(std::move(FI));
  301. }
  302. void GsymCreator::forEachFunctionInfo(
  303. std::function<bool(FunctionInfo &)> const &Callback) {
  304. std::lock_guard<std::mutex> Guard(Mutex);
  305. for (auto &FI : Funcs) {
  306. if (!Callback(FI))
  307. break;
  308. }
  309. }
  310. void GsymCreator::forEachFunctionInfo(
  311. std::function<bool(const FunctionInfo &)> const &Callback) const {
  312. std::lock_guard<std::mutex> Guard(Mutex);
  313. for (const auto &FI : Funcs) {
  314. if (!Callback(FI))
  315. break;
  316. }
  317. }
  318. size_t GsymCreator::getNumFunctionInfos() const {
  319. std::lock_guard<std::mutex> Guard(Mutex);
  320. return Funcs.size();
  321. }
  322. bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
  323. if (ValidTextRanges)
  324. return ValidTextRanges->contains(Addr);
  325. return true; // No valid text ranges has been set, so accept all ranges.
  326. }
  327. bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
  328. std::lock_guard<std::mutex> Guard(Mutex);
  329. return Ranges.contains(Addr);
  330. }