123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362 |
- //===- GsymCreator.cpp ----------------------------------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //===----------------------------------------------------------------------===//
- #include "llvm/DebugInfo/GSYM/GsymCreator.h"
- #include "llvm/DebugInfo/GSYM/FileWriter.h"
- #include "llvm/DebugInfo/GSYM/Header.h"
- #include "llvm/DebugInfo/GSYM/LineTable.h"
- #include "llvm/MC/StringTableBuilder.h"
- #include "llvm/Support/raw_ostream.h"
- #include <algorithm>
- #include <cassert>
- #include <functional>
- #include <vector>
- using namespace llvm;
- using namespace gsym;
- GsymCreator::GsymCreator(bool Quiet)
- : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
- insertFile(StringRef());
- }
- uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
- llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
- llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
- // We must insert the strings first, then call the FileEntry constructor.
- // If we inline the insertString() function call into the constructor, the
- // call order is undefined due to parameter lists not having any ordering
- // requirements.
- const uint32_t Dir = insertString(directory);
- const uint32_t Base = insertString(filename);
- FileEntry FE(Dir, Base);
- std::lock_guard<std::mutex> Guard(Mutex);
- const auto NextIndex = Files.size();
- // Find FE in hash map and insert if not present.
- auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
- if (R.second)
- Files.emplace_back(FE);
- return R.first->second;
- }
- llvm::Error GsymCreator::save(StringRef Path,
- llvm::support::endianness ByteOrder) const {
- std::error_code EC;
- raw_fd_ostream OutStrm(Path, EC);
- if (EC)
- return llvm::errorCodeToError(EC);
- FileWriter O(OutStrm, ByteOrder);
- return encode(O);
- }
- llvm::Error GsymCreator::encode(FileWriter &O) const {
- std::lock_guard<std::mutex> Guard(Mutex);
- if (Funcs.empty())
- return createStringError(std::errc::invalid_argument,
- "no functions to encode");
- if (!Finalized)
- return createStringError(std::errc::invalid_argument,
- "GsymCreator wasn't finalized prior to encoding");
- if (Funcs.size() > UINT32_MAX)
- return createStringError(std::errc::invalid_argument,
- "too many FunctionInfos");
- const uint64_t MinAddr =
- BaseAddress ? *BaseAddress : Funcs.front().startAddress();
- const uint64_t MaxAddr = Funcs.back().startAddress();
- const uint64_t AddrDelta = MaxAddr - MinAddr;
- Header Hdr;
- Hdr.Magic = GSYM_MAGIC;
- Hdr.Version = GSYM_VERSION;
- Hdr.AddrOffSize = 0;
- Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
- Hdr.BaseAddress = MinAddr;
- Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
- Hdr.StrtabOffset = 0; // We will fix this up later.
- Hdr.StrtabSize = 0; // We will fix this up later.
- memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
- if (UUID.size() > sizeof(Hdr.UUID))
- return createStringError(std::errc::invalid_argument,
- "invalid UUID size %u", (uint32_t)UUID.size());
- // Set the address offset size correctly in the GSYM header.
- if (AddrDelta <= UINT8_MAX)
- Hdr.AddrOffSize = 1;
- else if (AddrDelta <= UINT16_MAX)
- Hdr.AddrOffSize = 2;
- else if (AddrDelta <= UINT32_MAX)
- Hdr.AddrOffSize = 4;
- else
- Hdr.AddrOffSize = 8;
- // Copy the UUID value if we have one.
- if (UUID.size() > 0)
- memcpy(Hdr.UUID, UUID.data(), UUID.size());
- // Write out the header.
- llvm::Error Err = Hdr.encode(O);
- if (Err)
- return Err;
- // Write out the address offsets.
- O.alignTo(Hdr.AddrOffSize);
- for (const auto &FuncInfo : Funcs) {
- uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
- switch (Hdr.AddrOffSize) {
- case 1:
- O.writeU8(static_cast<uint8_t>(AddrOffset));
- break;
- case 2:
- O.writeU16(static_cast<uint16_t>(AddrOffset));
- break;
- case 4:
- O.writeU32(static_cast<uint32_t>(AddrOffset));
- break;
- case 8:
- O.writeU64(AddrOffset);
- break;
- }
- }
- // Write out all zeros for the AddrInfoOffsets.
- O.alignTo(4);
- const off_t AddrInfoOffsetsOffset = O.tell();
- for (size_t i = 0, n = Funcs.size(); i < n; ++i)
- O.writeU32(0);
- // Write out the file table
- O.alignTo(4);
- assert(!Files.empty());
- assert(Files[0].Dir == 0);
- assert(Files[0].Base == 0);
- size_t NumFiles = Files.size();
- if (NumFiles > UINT32_MAX)
- return createStringError(std::errc::invalid_argument, "too many files");
- O.writeU32(static_cast<uint32_t>(NumFiles));
- for (auto File : Files) {
- O.writeU32(File.Dir);
- O.writeU32(File.Base);
- }
- // Write out the sting table.
- const off_t StrtabOffset = O.tell();
- StrTab.write(O.get_stream());
- const off_t StrtabSize = O.tell() - StrtabOffset;
- std::vector<uint32_t> AddrInfoOffsets;
- // Write out the address infos for each function info.
- for (const auto &FuncInfo : Funcs) {
- if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
- AddrInfoOffsets.push_back(OffsetOrErr.get());
- else
- return OffsetOrErr.takeError();
- }
- // Fixup the string table offset and size in the header
- O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
- O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
- // Fixup all address info offsets
- uint64_t Offset = 0;
- for (auto AddrInfoOffset : AddrInfoOffsets) {
- O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
- Offset += 4;
- }
- return ErrorSuccess();
- }
- // Similar to std::remove_if, but the predicate is binary and it is passed both
- // the previous and the current element.
- template <class ForwardIt, class BinaryPredicate>
- static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt,
- BinaryPredicate Pred) {
- if (FirstIt != LastIt) {
- auto PrevIt = FirstIt++;
- FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) {
- return Pred(*PrevIt++, Curr);
- });
- if (FirstIt != LastIt)
- for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;)
- if (!Pred(*PrevIt, *CurrIt)) {
- PrevIt = FirstIt;
- *FirstIt++ = std::move(*CurrIt);
- }
- }
- return FirstIt;
- }
- llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
- std::lock_guard<std::mutex> Guard(Mutex);
- if (Finalized)
- return createStringError(std::errc::invalid_argument, "already finalized");
- Finalized = true;
- // Sort function infos so we can emit sorted functions.
- llvm::sort(Funcs);
- // Don't let the string table indexes change by finalizing in order.
- StrTab.finalizeInOrder();
- // Remove duplicates function infos that have both entries from debug info
- // (DWARF or Breakpad) and entries from the SymbolTable.
- //
- // Also handle overlapping function. Usually there shouldn't be any, but they
- // can and do happen in some rare cases.
- //
- // (a) (b) (c)
- // ^ ^ ^ ^
- // |X |Y |X ^ |X
- // | | | |Y | ^
- // | | | v v |Y
- // v v v v
- //
- // In (a) and (b), Y is ignored and X will be reported for the full range.
- // In (c), both functions will be included in the result and lookups for an
- // address in the intersection will return Y because of binary search.
- //
- // Note that in case of (b), we cannot include Y in the result because then
- // we wouldn't find any function for range (end of Y, end of X)
- // with binary search
- auto NumBefore = Funcs.size();
- Funcs.erase(
- removeIfBinary(Funcs.begin(), Funcs.end(),
- [&](const auto &Prev, const auto &Curr) {
- // Empty ranges won't intersect, but we still need to
- // catch the case where we have multiple symbols at the
- // same address and coalesce them.
- const bool ranges_equal = Prev.Range == Curr.Range;
- if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
- // Overlapping ranges or empty identical ranges.
- if (ranges_equal) {
- // Same address range. Check if one is from debug
- // info and the other is from a symbol table. If
- // so, then keep the one with debug info. Our
- // sorting guarantees that entries with matching
- // address ranges that have debug info are last in
- // the sort.
- if (Prev == Curr) {
- // FunctionInfo entries match exactly (range,
- // lines, inlines)
- // We used to output a warning here, but this was
- // so frequent on some binaries, in particular
- // when those were built with GCC, that it slowed
- // down processing extremely.
- return true;
- } else {
- if (!Prev.hasRichInfo() && Curr.hasRichInfo()) {
- // Same address range, one with no debug info
- // (symbol) and the next with debug info. Keep
- // the latter.
- return true;
- } else {
- if (!Quiet) {
- OS << "warning: same address range contains "
- "different debug "
- << "info. Removing:\n"
- << Prev << "\nIn favor of this one:\n"
- << Curr << "\n";
- }
- return true;
- }
- }
- } else {
- if (!Quiet) { // print warnings about overlaps
- OS << "warning: function ranges overlap:\n"
- << Prev << "\n"
- << Curr << "\n";
- }
- }
- } else if (Prev.Range.size() == 0 &&
- Curr.Range.contains(Prev.Range.start())) {
- if (!Quiet) {
- OS << "warning: removing symbol:\n"
- << Prev << "\nKeeping:\n"
- << Curr << "\n";
- }
- return true;
- }
- return false;
- }),
- Funcs.end());
- // If our last function info entry doesn't have a size and if we have valid
- // text ranges, we should set the size of the last entry since any search for
- // a high address might match our last entry. By fixing up this size, we can
- // help ensure we don't cause lookups to always return the last symbol that
- // has no size when doing lookups.
- if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
- if (auto Range =
- ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
- Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
- }
- }
- OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
- << Funcs.size() << " total\n";
- return Error::success();
- }
- uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
- if (S.empty())
- return 0;
- // The hash can be calculated outside the lock.
- CachedHashStringRef CHStr(S);
- std::lock_guard<std::mutex> Guard(Mutex);
- if (Copy) {
- // We need to provide backing storage for the string if requested
- // since StringTableBuilder stores references to strings. Any string
- // that comes from a section in an object file doesn't need to be
- // copied, but any string created by code will need to be copied.
- // This allows GsymCreator to be really fast when parsing DWARF and
- // other object files as most strings don't need to be copied.
- if (!StrTab.contains(CHStr))
- CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
- CHStr.hash()};
- }
- return StrTab.add(CHStr);
- }
- void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
- std::lock_guard<std::mutex> Guard(Mutex);
- Ranges.insert(FI.Range);
- Funcs.emplace_back(std::move(FI));
- }
- void GsymCreator::forEachFunctionInfo(
- std::function<bool(FunctionInfo &)> const &Callback) {
- std::lock_guard<std::mutex> Guard(Mutex);
- for (auto &FI : Funcs) {
- if (!Callback(FI))
- break;
- }
- }
- void GsymCreator::forEachFunctionInfo(
- std::function<bool(const FunctionInfo &)> const &Callback) const {
- std::lock_guard<std::mutex> Guard(Mutex);
- for (const auto &FI : Funcs) {
- if (!Callback(FI))
- break;
- }
- }
- size_t GsymCreator::getNumFunctionInfos() const {
- std::lock_guard<std::mutex> Guard(Mutex);
- return Funcs.size();
- }
- bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
- if (ValidTextRanges)
- return ValidTextRanges->contains(Addr);
- return true; // No valid text ranges has been set, so accept all ranges.
- }
- bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
- std::lock_guard<std::mutex> Guard(Mutex);
- return Ranges.contains(Addr);
- }
|