//===-- sancov.cpp --------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // This file is a command-line tool for reading and analyzing sanitizer // coverage. //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MD5.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Regex.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/SpecialCaseList.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; namespace { // --------- COMMAND LINE FLAGS --------- enum ActionType { CoveredFunctionsAction, HtmlReportAction, MergeAction, NotCoveredFunctionsAction, PrintAction, PrintCovPointsAction, StatsAction, SymbolizeAction }; cl::opt Action( cl::desc("Action (required)"), cl::Required, cl::values( clEnumValN(PrintAction, "print", "Print coverage addresses"), clEnumValN(PrintCovPointsAction, "print-coverage-pcs", "Print coverage instrumentation points addresses."), clEnumValN(CoveredFunctionsAction, "covered-functions", "Print all covered funcions."), clEnumValN(NotCoveredFunctionsAction, "not-covered-functions", "Print all not covered funcions."), clEnumValN(StatsAction, "print-coverage-stats", "Print coverage statistics."), clEnumValN(HtmlReportAction, "html-report", "REMOVED. Use -symbolize & coverage-report-server.py."), clEnumValN(SymbolizeAction, "symbolize", "Produces a symbolized JSON report from binary report."), clEnumValN(MergeAction, "merge", "Merges reports."))); static cl::list ClInputFiles(cl::Positional, cl::OneOrMore, cl::desc(" <.sancov files...> " "<.symcov files...>")); static cl::opt ClDemangle("demangle", cl::init(true), cl::desc("Print demangled function name.")); static cl::opt ClSkipDeadFiles("skip-dead-files", cl::init(true), cl::desc("Do not list dead source files in reports.")); static cl::opt ClStripPathPrefix( "strip_path_prefix", cl::init(""), cl::desc("Strip this prefix from file paths in reports.")); static cl::opt ClBlacklist("blacklist", cl::init(""), cl::desc("Blacklist file (sanitizer blacklist format).")); static cl::opt ClUseDefaultBlacklist( "use_default_blacklist", cl::init(true), cl::Hidden, cl::desc("Controls if default blacklist should be used.")); static const char *const DefaultBlacklistStr = "fun:__sanitizer_.*\n" "src:/usr/include/.*\n" "src:.*/libc\\+\\+/.*\n"; // --------- FORMAT SPECIFICATION --------- struct FileHeader { uint32_t Bitness; uint32_t Magic; }; static const uint32_t BinCoverageMagic = 0xC0BFFFFF; static const uint32_t Bitness32 = 0xFFFFFF32; static const uint32_t Bitness64 = 0xFFFFFF64; static const Regex SancovFileRegex("(.*)\\.[0-9]+\\.sancov"); static const Regex SymcovFileRegex(".*\\.symcov"); // --------- MAIN DATASTRUCTURES ---------- // Contents of .sancov file: list of coverage point addresses that were // executed. struct RawCoverage { explicit RawCoverage(std::unique_ptr> Addrs) : Addrs(std::move(Addrs)) {} // Read binary .sancov file. static ErrorOr> read(const std::string &FileName); std::unique_ptr> Addrs; }; // Coverage point has an opaque Id and corresponds to multiple source locations. struct CoveragePoint { explicit CoveragePoint(const std::string &Id) : Id(Id) {} std::string Id; SmallVector Locs; }; // Symcov file content: set of covered Ids plus information about all available // coverage points. struct SymbolizedCoverage { // Read json .symcov file. static std::unique_ptr read(const std::string &InputFile); std::set CoveredIds; std::string BinaryHash; std::vector Points; }; struct CoverageStats { size_t AllPoints; size_t CovPoints; size_t AllFns; size_t CovFns; }; // --------- ERROR HANDLING --------- static void fail(const llvm::Twine &E) { errs() << "ERROR: " << E << "\n"; exit(1); } static void failIf(bool B, const llvm::Twine &E) { if (B) fail(E); } static void failIfError(std::error_code Error) { if (!Error) return; errs() << "ERROR: " << Error.message() << "(" << Error.value() << ")\n"; exit(1); } template static void failIfError(const ErrorOr &E) { failIfError(E.getError()); } static void failIfError(Error Err) { if (Err) { logAllUnhandledErrors(std::move(Err), errs(), "ERROR: "); exit(1); } } template static void failIfError(Expected &E) { failIfError(E.takeError()); } static void failIfNotEmpty(const llvm::Twine &E) { if (E.str().empty()) return; fail(E); } template static void failIfEmpty(const std::unique_ptr &Ptr, const std::string &Message) { if (Ptr.get()) return; fail(Message); } // ----------- Coverage I/O ---------- template static void readInts(const char *Start, const char *End, std::set *Ints) { const T *S = reinterpret_cast(Start); const T *E = reinterpret_cast(End); std::copy(S, E, std::inserter(*Ints, Ints->end())); } ErrorOr> RawCoverage::read(const std::string &FileName) { ErrorOr> BufOrErr = MemoryBuffer::getFile(FileName); if (!BufOrErr) return BufOrErr.getError(); std::unique_ptr Buf = std::move(BufOrErr.get()); if (Buf->getBufferSize() < 8) { errs() << "File too small (<8): " << Buf->getBufferSize() << '\n'; return make_error_code(errc::illegal_byte_sequence); } const FileHeader *Header = reinterpret_cast(Buf->getBufferStart()); if (Header->Magic != BinCoverageMagic) { errs() << "Wrong magic: " << Header->Magic << '\n'; return make_error_code(errc::illegal_byte_sequence); } auto Addrs = std::make_unique>(); switch (Header->Bitness) { case Bitness64: readInts(Buf->getBufferStart() + 8, Buf->getBufferEnd(), Addrs.get()); break; case Bitness32: readInts(Buf->getBufferStart() + 8, Buf->getBufferEnd(), Addrs.get()); break; default: errs() << "Unsupported bitness: " << Header->Bitness << '\n'; return make_error_code(errc::illegal_byte_sequence); } // Ignore slots that are zero, so a runtime implementation is not required // to compactify the data. Addrs->erase(0); return std::unique_ptr(new RawCoverage(std::move(Addrs))); } // Print coverage addresses. raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) { for (auto Addr : *CoverageData.Addrs) { OS << "0x"; OS.write_hex(Addr); OS << "\n"; } return OS; } static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) { OS << "all-edges: " << Stats.AllPoints << "\n"; OS << "cov-edges: " << Stats.CovPoints << "\n"; OS << "all-functions: " << Stats.AllFns << "\n"; OS << "cov-functions: " << Stats.CovFns << "\n"; return OS; } // Output symbolized information for coverage points in JSON. // Format: // { // '' : { // '' : { // ' : ':' &Points) { // Group points by file. std::map> PointsByFile; for (const auto &Point : Points) { for (const DILineInfo &Loc : Point.Locs) { PointsByFile[Loc.FileName].push_back(&Point); } } for (const auto &P : PointsByFile) { std::string FileName = P.first; std::map> PointsByFn; for (auto PointPtr : P.second) { for (const DILineInfo &Loc : PointPtr->Locs) { PointsByFn[Loc.FunctionName].push_back(PointPtr); } } W.attributeObject(P.first, [&] { // Group points by function. for (const auto &P : PointsByFn) { std::string FunctionName = P.first; std::set WrittenIds; W.attributeObject(FunctionName, [&] { for (const CoveragePoint *Point : P.second) { for (const auto &Loc : Point->Locs) { if (Loc.FileName != FileName || Loc.FunctionName != FunctionName) continue; if (WrittenIds.find(Point->Id) != WrittenIds.end()) continue; // Output : ":". WrittenIds.insert(Point->Id); W.attribute(Point->Id, (utostr(Loc.Line) + ":" + utostr(Loc.Column))); } } }); } }); } } static void operator<<(json::OStream &W, const SymbolizedCoverage &C) { W.object([&] { W.attributeArray("covered-points", [&] { for (const std::string &P : C.CoveredIds) { W.value(P); } }); W.attribute("binary-hash", C.BinaryHash); W.attributeObject("point-symbol-info", [&] { W << C.Points; }); }); } static std::string parseScalarString(yaml::Node *N) { SmallString<64> StringStorage; yaml::ScalarNode *S = dyn_cast(N); failIf(!S, "expected string"); return std::string(S->getValue(StringStorage)); } std::unique_ptr SymbolizedCoverage::read(const std::string &InputFile) { auto Coverage(std::make_unique()); std::map Points; ErrorOr> BufOrErr = MemoryBuffer::getFile(InputFile); failIfError(BufOrErr); SourceMgr SM; yaml::Stream S(**BufOrErr, SM); yaml::document_iterator DI = S.begin(); failIf(DI == S.end(), "empty document: " + InputFile); yaml::Node *Root = DI->getRoot(); failIf(!Root, "expecting root node: " + InputFile); yaml::MappingNode *Top = dyn_cast(Root); failIf(!Top, "expecting mapping node: " + InputFile); for (auto &KVNode : *Top) { auto Key = parseScalarString(KVNode.getKey()); if (Key == "covered-points") { yaml::SequenceNode *Points = dyn_cast(KVNode.getValue()); failIf(!Points, "expected array: " + InputFile); for (auto I = Points->begin(), E = Points->end(); I != E; ++I) { Coverage->CoveredIds.insert(parseScalarString(&*I)); } } else if (Key == "binary-hash") { Coverage->BinaryHash = parseScalarString(KVNode.getValue()); } else if (Key == "point-symbol-info") { yaml::MappingNode *PointSymbolInfo = dyn_cast(KVNode.getValue()); failIf(!PointSymbolInfo, "expected mapping node: " + InputFile); for (auto &FileKVNode : *PointSymbolInfo) { auto Filename = parseScalarString(FileKVNode.getKey()); yaml::MappingNode *FileInfo = dyn_cast(FileKVNode.getValue()); failIf(!FileInfo, "expected mapping node: " + InputFile); for (auto &FunctionKVNode : *FileInfo) { auto FunctionName = parseScalarString(FunctionKVNode.getKey()); yaml::MappingNode *FunctionInfo = dyn_cast(FunctionKVNode.getValue()); failIf(!FunctionInfo, "expected mapping node: " + InputFile); for (auto &PointKVNode : *FunctionInfo) { auto PointId = parseScalarString(PointKVNode.getKey()); auto Loc = parseScalarString(PointKVNode.getValue()); size_t ColonPos = Loc.find(':'); failIf(ColonPos == std::string::npos, "expected ':': " + InputFile); auto LineStr = Loc.substr(0, ColonPos); auto ColStr = Loc.substr(ColonPos + 1, Loc.size()); if (Points.find(PointId) == Points.end()) Points.insert(std::make_pair(PointId, CoveragePoint(PointId))); DILineInfo LineInfo; LineInfo.FileName = Filename; LineInfo.FunctionName = FunctionName; char *End; LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10); LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10); CoveragePoint *CoveragePoint = &Points.find(PointId)->second; CoveragePoint->Locs.push_back(LineInfo); } } } } else { errs() << "Ignoring unknown key: " << Key << "\n"; } } for (auto &KV : Points) { Coverage->Points.push_back(KV.second); } return Coverage; } // ---------- MAIN FUNCTIONALITY ---------- std::string stripPathPrefix(std::string Path) { if (ClStripPathPrefix.empty()) return Path; size_t Pos = Path.find(ClStripPathPrefix); if (Pos == std::string::npos) return Path; return Path.substr(Pos + ClStripPathPrefix.size()); } static std::unique_ptr createSymbolizer() { symbolize::LLVMSymbolizer::Options SymbolizerOptions; SymbolizerOptions.Demangle = ClDemangle; SymbolizerOptions.UseSymbolTable = true; return std::unique_ptr( new symbolize::LLVMSymbolizer(SymbolizerOptions)); } static std::string normalizeFilename(const std::string &FileName) { SmallString<256> S(FileName); sys::path::remove_dots(S, /* remove_dot_dot */ true); return stripPathPrefix(sys::path::convert_to_slash(std::string(S))); } class Blacklists { public: Blacklists() : DefaultBlacklist(createDefaultBlacklist()), UserBlacklist(createUserBlacklist()) {} bool isBlacklisted(const DILineInfo &I) { if (DefaultBlacklist && DefaultBlacklist->inSection("sancov", "fun", I.FunctionName)) return true; if (DefaultBlacklist && DefaultBlacklist->inSection("sancov", "src", I.FileName)) return true; if (UserBlacklist && UserBlacklist->inSection("sancov", "fun", I.FunctionName)) return true; if (UserBlacklist && UserBlacklist->inSection("sancov", "src", I.FileName)) return true; return false; } private: static std::unique_ptr createDefaultBlacklist() { if (!ClUseDefaultBlacklist) return std::unique_ptr(); std::unique_ptr MB = MemoryBuffer::getMemBuffer(DefaultBlacklistStr); std::string Error; auto Blacklist = SpecialCaseList::create(MB.get(), Error); failIfNotEmpty(Error); return Blacklist; } static std::unique_ptr createUserBlacklist() { if (ClBlacklist.empty()) return std::unique_ptr(); return SpecialCaseList::createOrDie({{ClBlacklist}}, *vfs::getRealFileSystem()); } std::unique_ptr DefaultBlacklist; std::unique_ptr UserBlacklist; }; static std::vector getCoveragePoints(const std::string &ObjectFile, const std::set &Addrs, const std::set &CoveredAddrs) { std::vector Result; auto Symbolizer(createSymbolizer()); Blacklists B; std::set CoveredFiles; if (ClSkipDeadFiles) { for (auto Addr : CoveredAddrs) { // TODO: it would be neccessary to set proper section index here. // object::SectionedAddress::UndefSection works for only absolute // addresses. object::SectionedAddress ModuleAddress = { Addr, object::SectionedAddress::UndefSection}; auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress); failIfError(LineInfo); CoveredFiles.insert(LineInfo->FileName); auto InliningInfo = Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress); failIfError(InliningInfo); for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) { auto FrameInfo = InliningInfo->getFrame(I); CoveredFiles.insert(FrameInfo.FileName); } } } for (auto Addr : Addrs) { std::set Infos; // deduplicate debug info. // TODO: it would be neccessary to set proper section index here. // object::SectionedAddress::UndefSection works for only absolute addresses. object::SectionedAddress ModuleAddress = { Addr, object::SectionedAddress::UndefSection}; auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress); failIfError(LineInfo); if (ClSkipDeadFiles && CoveredFiles.find(LineInfo->FileName) == CoveredFiles.end()) continue; LineInfo->FileName = normalizeFilename(LineInfo->FileName); if (B.isBlacklisted(*LineInfo)) continue; auto Id = utohexstr(Addr, true); auto Point = CoveragePoint(Id); Infos.insert(*LineInfo); Point.Locs.push_back(*LineInfo); auto InliningInfo = Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress); failIfError(InliningInfo); for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) { auto FrameInfo = InliningInfo->getFrame(I); if (ClSkipDeadFiles && CoveredFiles.find(FrameInfo.FileName) == CoveredFiles.end()) continue; FrameInfo.FileName = normalizeFilename(FrameInfo.FileName); if (B.isBlacklisted(FrameInfo)) continue; if (Infos.find(FrameInfo) == Infos.end()) { Infos.insert(FrameInfo); Point.Locs.push_back(FrameInfo); } } Result.push_back(Point); } return Result; } static bool isCoveragePointSymbol(StringRef Name) { return Name == "__sanitizer_cov" || Name == "__sanitizer_cov_with_check" || Name == "__sanitizer_cov_trace_func_enter" || Name == "__sanitizer_cov_trace_pc_guard" || // Mac has '___' prefix Name == "___sanitizer_cov" || Name == "___sanitizer_cov_with_check" || Name == "___sanitizer_cov_trace_func_enter" || Name == "___sanitizer_cov_trace_pc_guard"; } // Locate __sanitizer_cov* function addresses inside the stubs table on MachO. static void findMachOIndirectCovFunctions(const object::MachOObjectFile &O, std::set *Result) { MachO::dysymtab_command Dysymtab = O.getDysymtabLoadCommand(); MachO::symtab_command Symtab = O.getSymtabLoadCommand(); for (const auto &Load : O.load_commands()) { if (Load.C.cmd == MachO::LC_SEGMENT_64) { MachO::segment_command_64 Seg = O.getSegment64LoadCommand(Load); for (unsigned J = 0; J < Seg.nsects; ++J) { MachO::section_64 Sec = O.getSection64(Load, J); uint32_t SectionType = Sec.flags & MachO::SECTION_TYPE; if (SectionType == MachO::S_SYMBOL_STUBS) { uint32_t Stride = Sec.reserved2; uint32_t Cnt = Sec.size / Stride; uint32_t N = Sec.reserved1; for (uint32_t J = 0; J < Cnt && N + J < Dysymtab.nindirectsyms; J++) { uint32_t IndirectSymbol = O.getIndirectSymbolTableEntry(Dysymtab, N + J); uint64_t Addr = Sec.addr + J * Stride; if (IndirectSymbol < Symtab.nsyms) { object::SymbolRef Symbol = *(O.getSymbolByIndex(IndirectSymbol)); Expected Name = Symbol.getName(); failIfError(Name); if (isCoveragePointSymbol(Name.get())) { Result->insert(Addr); } } } } } } if (Load.C.cmd == MachO::LC_SEGMENT) { errs() << "ERROR: 32 bit MachO binaries not supported\n"; } } } // Locate __sanitizer_cov* function addresses that are used for coverage // reporting. static std::set findSanitizerCovFunctions(const object::ObjectFile &O) { std::set Result; for (const object::SymbolRef &Symbol : O.symbols()) { Expected AddressOrErr = Symbol.getAddress(); failIfError(AddressOrErr); uint64_t Address = AddressOrErr.get(); Expected NameOrErr = Symbol.getName(); failIfError(NameOrErr); StringRef Name = NameOrErr.get(); Expected FlagsOrErr = Symbol.getFlags(); // TODO: Test this error. failIfError(FlagsOrErr); uint32_t Flags = FlagsOrErr.get(); if (!(Flags & object::BasicSymbolRef::SF_Undefined) && isCoveragePointSymbol(Name)) { Result.insert(Address); } } if (const auto *CO = dyn_cast(&O)) { for (const object::ExportDirectoryEntryRef &Export : CO->export_directories()) { uint32_t RVA; failIfError(Export.getExportRVA(RVA)); StringRef Name; failIfError(Export.getSymbolName(Name)); if (isCoveragePointSymbol(Name)) Result.insert(CO->getImageBase() + RVA); } } if (const auto *MO = dyn_cast(&O)) { findMachOIndirectCovFunctions(*MO, &Result); } return Result; } static uint64_t getPreviousInstructionPc(uint64_t PC, Triple TheTriple) { if (TheTriple.isARM()) { return (PC - 3) & (~1); } else if (TheTriple.isAArch64()) { return PC - 4; } else if (TheTriple.isMIPS()) { return PC - 8; } else { return PC - 1; } } // Locate addresses of all coverage points in a file. Coverage point // is defined as the 'address of instruction following __sanitizer_cov // call - 1'. static void getObjectCoveragePoints(const object::ObjectFile &O, std::set *Addrs) { Triple TheTriple("unknown-unknown-unknown"); TheTriple.setArch(Triple::ArchType(O.getArch())); auto TripleName = TheTriple.getTriple(); std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); failIfNotEmpty(Error); std::unique_ptr STI( TheTarget->createMCSubtargetInfo(TripleName, "", "")); failIfEmpty(STI, "no subtarget info for target " + TripleName); std::unique_ptr MRI( TheTarget->createMCRegInfo(TripleName)); failIfEmpty(MRI, "no register info for target " + TripleName); MCTargetOptions MCOptions; std::unique_ptr AsmInfo( TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); failIfEmpty(AsmInfo, "no asm info for target " + TripleName); MCContext Ctx(TheTriple, AsmInfo.get(), MRI.get(), STI.get()); std::unique_ptr DisAsm( TheTarget->createMCDisassembler(*STI, Ctx)); failIfEmpty(DisAsm, "no disassembler info for target " + TripleName); std::unique_ptr MII(TheTarget->createMCInstrInfo()); failIfEmpty(MII, "no instruction info for target " + TripleName); std::unique_ptr MIA( TheTarget->createMCInstrAnalysis(MII.get())); failIfEmpty(MIA, "no instruction analysis info for target " + TripleName); auto SanCovAddrs = findSanitizerCovFunctions(O); if (SanCovAddrs.empty()) fail("__sanitizer_cov* functions not found"); for (object::SectionRef Section : O.sections()) { if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same. continue; uint64_t SectionAddr = Section.getAddress(); uint64_t SectSize = Section.getSize(); if (!SectSize) continue; Expected BytesStr = Section.getContents(); failIfError(BytesStr); ArrayRef Bytes = arrayRefFromStringRef(*BytesStr); for (uint64_t Index = 0, Size = 0; Index < Section.getSize(); Index += Size) { MCInst Inst; if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), SectionAddr + Index, nulls())) { if (Size == 0) Size = 1; continue; } uint64_t Addr = Index + SectionAddr; // Sanitizer coverage uses the address of the next instruction - 1. uint64_t CovPoint = getPreviousInstructionPc(Addr + Size, TheTriple); uint64_t Target; if (MIA->isCall(Inst) && MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) && SanCovAddrs.find(Target) != SanCovAddrs.end()) Addrs->insert(CovPoint); } } } static void visitObjectFiles(const object::Archive &A, function_ref Fn) { Error Err = Error::success(); for (auto &C : A.children(Err)) { Expected> ChildOrErr = C.getAsBinary(); failIfError(ChildOrErr); if (auto *O = dyn_cast(&*ChildOrErr.get())) Fn(*O); else failIfError(object::object_error::invalid_file_type); } failIfError(std::move(Err)); } static void visitObjectFiles(const std::string &FileName, function_ref Fn) { Expected> BinaryOrErr = object::createBinary(FileName); if (!BinaryOrErr) failIfError(BinaryOrErr); object::Binary &Binary = *BinaryOrErr.get().getBinary(); if (object::Archive *A = dyn_cast(&Binary)) visitObjectFiles(*A, Fn); else if (object::ObjectFile *O = dyn_cast(&Binary)) Fn(*O); else failIfError(object::object_error::invalid_file_type); } static std::set findSanitizerCovFunctions(const std::string &FileName) { std::set Result; visitObjectFiles(FileName, [&](const object::ObjectFile &O) { auto Addrs = findSanitizerCovFunctions(O); Result.insert(Addrs.begin(), Addrs.end()); }); return Result; } // Locate addresses of all coverage points in a file. Coverage point // is defined as the 'address of instruction following __sanitizer_cov // call - 1'. static std::set findCoveragePointAddrs(const std::string &FileName) { std::set Result; visitObjectFiles(FileName, [&](const object::ObjectFile &O) { getObjectCoveragePoints(O, &Result); }); return Result; } static void printCovPoints(const std::string &ObjFile, raw_ostream &OS) { for (uint64_t Addr : findCoveragePointAddrs(ObjFile)) { OS << "0x"; OS.write_hex(Addr); OS << "\n"; } } static ErrorOr isCoverageFile(const std::string &FileName) { auto ShortFileName = llvm::sys::path::filename(FileName); if (!SancovFileRegex.match(ShortFileName)) return false; ErrorOr> BufOrErr = MemoryBuffer::getFile(FileName); if (!BufOrErr) { errs() << "Warning: " << BufOrErr.getError().message() << "(" << BufOrErr.getError().value() << "), filename: " << llvm::sys::path::filename(FileName) << "\n"; return BufOrErr.getError(); } std::unique_ptr Buf = std::move(BufOrErr.get()); if (Buf->getBufferSize() < 8) { return false; } const FileHeader *Header = reinterpret_cast(Buf->getBufferStart()); return Header->Magic == BinCoverageMagic; } static bool isSymbolizedCoverageFile(const std::string &FileName) { auto ShortFileName = llvm::sys::path::filename(FileName); return SymcovFileRegex.match(ShortFileName); } static std::unique_ptr symbolize(const RawCoverage &Data, const std::string ObjectFile) { auto Coverage = std::make_unique(); ErrorOr> BufOrErr = MemoryBuffer::getFile(ObjectFile); failIfError(BufOrErr); SHA1 Hasher; Hasher.update((*BufOrErr)->getBuffer()); Coverage->BinaryHash = toHex(Hasher.final()); Blacklists B; auto Symbolizer(createSymbolizer()); for (uint64_t Addr : *Data.Addrs) { // TODO: it would be neccessary to set proper section index here. // object::SectionedAddress::UndefSection works for only absolute addresses. auto LineInfo = Symbolizer->symbolizeCode( ObjectFile, {Addr, object::SectionedAddress::UndefSection}); failIfError(LineInfo); if (B.isBlacklisted(*LineInfo)) continue; Coverage->CoveredIds.insert(utohexstr(Addr, true)); } std::set AllAddrs = findCoveragePointAddrs(ObjectFile); if (!std::includes(AllAddrs.begin(), AllAddrs.end(), Data.Addrs->begin(), Data.Addrs->end())) { fail("Coverage points in binary and .sancov file do not match."); } Coverage->Points = getCoveragePoints(ObjectFile, AllAddrs, *Data.Addrs); return Coverage; } struct FileFn { bool operator<(const FileFn &RHS) const { return std::tie(FileName, FunctionName) < std::tie(RHS.FileName, RHS.FunctionName); } std::string FileName; std::string FunctionName; }; static std::set computeFunctions(const std::vector &Points) { std::set Fns; for (const auto &Point : Points) { for (const auto &Loc : Point.Locs) { Fns.insert(FileFn{Loc.FileName, Loc.FunctionName}); } } return Fns; } static std::set computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) { auto Fns = computeFunctions(Coverage.Points); for (const auto &Point : Coverage.Points) { if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end()) continue; for (const auto &Loc : Point.Locs) { Fns.erase(FileFn{Loc.FileName, Loc.FunctionName}); } } return Fns; } static std::set computeCoveredFunctions(const SymbolizedCoverage &Coverage) { auto AllFns = computeFunctions(Coverage.Points); std::set Result; for (const auto &Point : Coverage.Points) { if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end()) continue; for (const auto &Loc : Point.Locs) { Result.insert(FileFn{Loc.FileName, Loc.FunctionName}); } } return Result; } typedef std::map> FunctionLocs; // finds first location in a file for each function. static FunctionLocs resolveFunctions(const SymbolizedCoverage &Coverage, const std::set &Fns) { FunctionLocs Result; for (const auto &Point : Coverage.Points) { for (const auto &Loc : Point.Locs) { FileFn Fn = FileFn{Loc.FileName, Loc.FunctionName}; if (Fns.find(Fn) == Fns.end()) continue; auto P = std::make_pair(Loc.Line, Loc.Column); auto I = Result.find(Fn); if (I == Result.end() || I->second > P) { Result[Fn] = P; } } } return Result; } static void printFunctionLocs(const FunctionLocs &FnLocs, raw_ostream &OS) { for (const auto &P : FnLocs) { OS << stripPathPrefix(P.first.FileName) << ":" << P.second.first << " " << P.first.FunctionName << "\n"; } } CoverageStats computeStats(const SymbolizedCoverage &Coverage) { CoverageStats Stats = {Coverage.Points.size(), Coverage.CoveredIds.size(), computeFunctions(Coverage.Points).size(), computeCoveredFunctions(Coverage).size()}; return Stats; } // Print list of covered functions. // Line format: : static void printCoveredFunctions(const SymbolizedCoverage &CovData, raw_ostream &OS) { auto CoveredFns = computeCoveredFunctions(CovData); printFunctionLocs(resolveFunctions(CovData, CoveredFns), OS); } // Print list of not covered functions. // Line format: : static void printNotCoveredFunctions(const SymbolizedCoverage &CovData, raw_ostream &OS) { auto NotCoveredFns = computeNotCoveredFunctions(CovData); printFunctionLocs(resolveFunctions(CovData, NotCoveredFns), OS); } // Read list of files and merges their coverage info. static void readAndPrintRawCoverage(const std::vector &FileNames, raw_ostream &OS) { std::vector> Covs; for (const auto &FileName : FileNames) { auto Cov = RawCoverage::read(FileName); if (!Cov) continue; OS << *Cov.get(); } } static std::unique_ptr merge(const std::vector> &Coverages) { if (Coverages.empty()) return nullptr; auto Result = std::make_unique(); for (size_t I = 0; I < Coverages.size(); ++I) { const SymbolizedCoverage &Coverage = *Coverages[I]; std::string Prefix; if (Coverages.size() > 1) { // prefix is not needed when there's only one file. Prefix = utostr(I); } for (const auto &Id : Coverage.CoveredIds) { Result->CoveredIds.insert(Prefix + Id); } for (const auto &CovPoint : Coverage.Points) { CoveragePoint NewPoint(CovPoint); NewPoint.Id = Prefix + CovPoint.Id; Result->Points.push_back(NewPoint); } } if (Coverages.size() == 1) { Result->BinaryHash = Coverages[0]->BinaryHash; } return Result; } static std::unique_ptr readSymbolizeAndMergeCmdArguments(std::vector FileNames) { std::vector> Coverages; { // Short name => file name. std::map ObjFiles; std::string FirstObjFile; std::set CovFiles; // Partition input values into coverage/object files. for (const auto &FileName : FileNames) { if (isSymbolizedCoverageFile(FileName)) { Coverages.push_back(SymbolizedCoverage::read(FileName)); } auto ErrorOrIsCoverage = isCoverageFile(FileName); if (!ErrorOrIsCoverage) continue; if (ErrorOrIsCoverage.get()) { CovFiles.insert(FileName); } else { auto ShortFileName = llvm::sys::path::filename(FileName); if (ObjFiles.find(std::string(ShortFileName)) != ObjFiles.end()) { fail("Duplicate binary file with a short name: " + ShortFileName); } ObjFiles[std::string(ShortFileName)] = FileName; if (FirstObjFile.empty()) FirstObjFile = FileName; } } SmallVector Components; // Object file => list of corresponding coverage file names. std::map> CoverageByObjFile; for (const auto &FileName : CovFiles) { auto ShortFileName = llvm::sys::path::filename(FileName); auto Ok = SancovFileRegex.match(ShortFileName, &Components); if (!Ok) { fail("Can't match coverage file name against " "..sancov pattern: " + FileName); } auto Iter = ObjFiles.find(std::string(Components[1])); if (Iter == ObjFiles.end()) { fail("Object file for coverage not found: " + FileName); } CoverageByObjFile[Iter->second].push_back(FileName); }; for (const auto &Pair : ObjFiles) { auto FileName = Pair.second; if (CoverageByObjFile.find(FileName) == CoverageByObjFile.end()) errs() << "WARNING: No coverage file for " << FileName << "\n"; } // Read raw coverage and symbolize it. for (const auto &Pair : CoverageByObjFile) { if (findSanitizerCovFunctions(Pair.first).empty()) { errs() << "WARNING: Ignoring " << Pair.first << " and its coverage because __sanitizer_cov* functions were not " "found.\n"; continue; } for (const std::string &CoverageFile : Pair.second) { auto DataOrError = RawCoverage::read(CoverageFile); failIfError(DataOrError); Coverages.push_back(symbolize(*DataOrError.get(), Pair.first)); } } } return merge(Coverages); } } // namespace int main(int Argc, char **Argv) { llvm::InitLLVM X(Argc, Argv); llvm::InitializeAllTargetInfos(); llvm::InitializeAllTargetMCs(); llvm::InitializeAllDisassemblers(); cl::ParseCommandLineOptions(Argc, Argv, "Sanitizer Coverage Processing Tool (sancov)\n\n" " This tool can extract various coverage-related information from: \n" " coverage-instrumented binary files, raw .sancov files and their " "symbolized .symcov version.\n" " Depending on chosen action the tool expects different input files:\n" " -print-coverage-pcs - coverage-instrumented binary files\n" " -print-coverage - .sancov files\n" " - .sancov files & corresponding binary " "files, .symcov files\n" ); // -print doesn't need object files. if (Action == PrintAction) { readAndPrintRawCoverage(ClInputFiles, outs()); return 0; } else if (Action == PrintCovPointsAction) { // -print-coverage-points doesn't need coverage files. for (const std::string &ObjFile : ClInputFiles) { printCovPoints(ObjFile, outs()); } return 0; } auto Coverage = readSymbolizeAndMergeCmdArguments(ClInputFiles); failIf(!Coverage, "No valid coverage files given."); switch (Action) { case CoveredFunctionsAction: { printCoveredFunctions(*Coverage, outs()); return 0; } case NotCoveredFunctionsAction: { printNotCoveredFunctions(*Coverage, outs()); return 0; } case StatsAction: { outs() << computeStats(*Coverage); return 0; } case MergeAction: case SymbolizeAction: { // merge & symbolize are synonims. json::OStream W(outs(), 2); W << *Coverage; return 0; } case HtmlReportAction: errs() << "-html-report option is removed: " "use -symbolize & coverage-report-server.py instead\n"; return 1; case PrintAction: case PrintCovPointsAction: llvm_unreachable("unsupported action"); } }