//===-- Analysis.cpp --------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Analysis.h" #include "BenchmarkResult.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/FormatVariadic.h" #include #include #include namespace llvm { namespace exegesis { static const char kCsvSep = ','; namespace { enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString }; template void writeEscaped(raw_ostream &OS, const StringRef S); template <> void writeEscaped(raw_ostream &OS, const StringRef S) { if (!llvm::is_contained(S, kCsvSep)) { OS << S; } else { // Needs escaping. OS << '"'; for (const char C : S) { if (C == '"') OS << "\"\""; else OS << C; } OS << '"'; } } template <> void writeEscaped(raw_ostream &OS, const StringRef S) { for (const char C : S) { if (C == '<') OS << "<"; else if (C == '>') OS << ">"; else if (C == '&') OS << "&"; else OS << C; } } template <> void writeEscaped(raw_ostream &OS, const StringRef S) { for (const char C : S) { if (C == '"') OS << "\\\""; else OS << C; } } } // namespace template static void writeClusterId(raw_ostream &OS, const InstructionBenchmarkClustering::ClusterId &CID) { if (CID.isNoise()) writeEscaped(OS, "[noise]"); else if (CID.isError()) writeEscaped(OS, "[error]"); else OS << CID.getId(); } template static void writeMeasurementValue(raw_ostream &OS, const double Value) { // Given Value, if we wanted to serialize it to a string, // how many base-10 digits will we need to store, max? static constexpr auto MaxDigitCount = std::numeric_limits::max_digits10; // Also, we will need a decimal separator. static constexpr auto DecimalSeparatorLen = 1; // '.' e.g. // So how long of a string will the serialization produce, max? static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen; // WARNING: when changing the format, also adjust the small-size estimate ^. static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}"); writeEscaped( OS, formatv(SimpleFloatFormat.data(), Value).sstr()); } template void Analysis::writeSnippet(raw_ostream &OS, ArrayRef Bytes, const char *Separator) const { SmallVector Lines; const auto &SI = State_.getSubtargetInfo(); // Parse the asm snippet and print it. while (!Bytes.empty()) { MCInst MI; uint64_t MISize = 0; if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, nulls())) { writeEscaped(OS, join(Lines, Separator)); writeEscaped(OS, Separator); writeEscaped(OS, "[error decoding asm snippet]"); return; } SmallString<128> InstPrinterStr; // FIXME: magic number. raw_svector_ostream OSS(InstPrinterStr); InstPrinter_->printInst(&MI, 0, "", SI, OSS); Bytes = Bytes.drop_front(MISize); Lines.emplace_back(InstPrinterStr.str().trim()); } writeEscaped(OS, join(Lines, Separator)); } // Prints a row representing an instruction, along with scheduling info and // point coordinates (measurements). void Analysis::printInstructionRowCsv(const size_t PointId, raw_ostream &OS) const { const InstructionBenchmark &Point = Clustering_.getPoints()[PointId]; writeClusterId(OS, Clustering_.getClusterIdForPoint(PointId)); OS << kCsvSep; writeSnippet(OS, Point.AssembledSnippet, "; "); OS << kCsvSep; writeEscaped(OS, Point.Key.Config); OS << kCsvSep; assert(!Point.Key.Instructions.empty()); const MCInst &MCI = Point.keyInstruction(); unsigned SchedClassId; std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId( State_.getSubtargetInfo(), State_.getInstrInfo(), MCI); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) const MCSchedClassDesc *const SCDesc = State_.getSubtargetInfo().getSchedModel().getSchedClassDesc(SchedClassId); writeEscaped(OS, SCDesc->Name); #else OS << SchedClassId; #endif for (const auto &Measurement : Point.Measurements) { OS << kCsvSep; writeMeasurementValue(OS, Measurement.PerInstructionValue); } OS << "\n"; } Analysis::Analysis(const LLVMState &State, const InstructionBenchmarkClustering &Clustering, double AnalysisInconsistencyEpsilon, bool AnalysisDisplayUnstableOpcodes) : Clustering_(Clustering), State_(State), AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon * AnalysisInconsistencyEpsilon), AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) { if (Clustering.getPoints().empty()) return; MCTargetOptions MCOptions; const auto &TM = State.getTargetMachine(); const auto &Triple = TM.getTargetTriple(); AsmInfo_.reset(TM.getTarget().createMCAsmInfo(State_.getRegInfo(), Triple.str(), MCOptions)); InstPrinter_.reset(TM.getTarget().createMCInstPrinter( Triple, 0 /*default variant*/, *AsmInfo_, State_.getInstrInfo(), State_.getRegInfo())); Context_ = std::make_unique( Triple, AsmInfo_.get(), &State_.getRegInfo(), &State_.getSubtargetInfo()); Disasm_.reset(TM.getTarget().createMCDisassembler(State_.getSubtargetInfo(), *Context_)); assert(Disasm_ && "cannot create MCDisassembler. missing call to " "InitializeXXXTargetDisassembler ?"); } template <> Error Analysis::run(raw_ostream &OS) const { if (Clustering_.getPoints().empty()) return Error::success(); // Write the header. OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config" << kCsvSep << "sched_class"; for (const auto &Measurement : Clustering_.getPoints().front().Measurements) { OS << kCsvSep; writeEscaped(OS, Measurement.Key); } OS << "\n"; // Write the points. for (const auto &ClusterIt : Clustering_.getValidClusters()) { for (const size_t PointId : ClusterIt.PointIndices) { printInstructionRowCsv(PointId, OS); } OS << "\n\n"; } return Error::success(); } Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints( ResolvedSchedClass &&RSC) : RSC(std::move(RSC)) {} std::vector Analysis::makePointsPerSchedClass() const { std::vector Entries; // Maps SchedClassIds to index in result. std::unordered_map SchedClassIdToIndex; const auto &Points = Clustering_.getPoints(); for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) { const InstructionBenchmark &Point = Points[PointId]; if (!Point.Error.empty()) continue; assert(!Point.Key.Instructions.empty()); // FIXME: we should be using the tuple of classes for instructions in the // snippet as key. const MCInst &MCI = Point.keyInstruction(); unsigned SchedClassId; bool WasVariant; std::tie(SchedClassId, WasVariant) = ResolvedSchedClass::resolveSchedClassId(State_.getSubtargetInfo(), State_.getInstrInfo(), MCI); const auto IndexIt = SchedClassIdToIndex.find(SchedClassId); if (IndexIt == SchedClassIdToIndex.end()) { // Create a new entry. SchedClassIdToIndex.emplace(SchedClassId, Entries.size()); ResolvedSchedClassAndPoints Entry(ResolvedSchedClass( State_.getSubtargetInfo(), SchedClassId, WasVariant)); Entry.PointIds.push_back(PointId); Entries.push_back(std::move(Entry)); } else { // Append to the existing entry. Entries[IndexIt->second].PointIds.push_back(PointId); } } return Entries; } // Parallel benchmarks repeat the same opcode multiple times. Just show this // opcode and show the whole snippet only on hover. static void writeParallelSnippetHtml(raw_ostream &OS, const std::vector &Instructions, const MCInstrInfo &InstrInfo) { if (Instructions.empty()) return; writeEscaped(OS, InstrInfo.getName(Instructions[0].getOpcode())); if (Instructions.size() > 1) OS << " (x" << Instructions.size() << ")"; } // Latency tries to find a serial path. Just show the opcode path and show the // whole snippet only on hover. static void writeLatencySnippetHtml(raw_ostream &OS, const std::vector &Instructions, const MCInstrInfo &InstrInfo) { bool First = true; for (const MCInst &Instr : Instructions) { if (First) First = false; else OS << " → "; writeEscaped(OS, InstrInfo.getName(Instr.getOpcode())); } } void Analysis::printPointHtml(const InstructionBenchmark &Point, llvm::raw_ostream &OS) const { OS << "
  • (OS, Point.AssembledSnippet, "\n"); OS << "\">"; switch (Point.Mode) { case InstructionBenchmark::Latency: writeLatencySnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo()); break; case InstructionBenchmark::Uops: case InstructionBenchmark::InverseThroughput: writeParallelSnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo()); break; default: llvm_unreachable("invalid mode"); } OS << " "; writeEscaped(OS, Point.Key.Config); OS << "
  • "; } void Analysis::printSchedClassClustersHtml( const std::vector &Clusters, const ResolvedSchedClass &RSC, raw_ostream &OS) const { const auto &Points = Clustering_.getPoints(); OS << ""; OS << ""; assert(!Clusters.empty()); for (const auto &Measurement : Points[Clusters[0].getPointIds()[0]].Measurements) { OS << ""; } OS << ""; for (const SchedClassCluster &Cluster : Clusters) { OS << ""; for (const auto &Stats : Cluster.getCentroid().getStats()) { OS << ""; } OS << ""; } OS << "
    ClusterIdOpcode/Config"; writeEscaped(OS, Measurement.Key); OS << "
    "; writeClusterId(OS, Cluster.id()); OS << "
      "; for (const size_t PointId : Cluster.getPointIds()) { printPointHtml(Points[PointId], OS); } OS << "
    "; writeMeasurementValue(OS, Stats.avg()); OS << "
    ["; writeMeasurementValue(OS, Stats.min()); OS << ";"; writeMeasurementValue(OS, Stats.max()); OS << "]
    "; } void Analysis::SchedClassCluster::addPoint( size_t PointId, const InstructionBenchmarkClustering &Clustering) { PointIds.push_back(PointId); const auto &Point = Clustering.getPoints()[PointId]; if (ClusterId.isUndef()) ClusterId = Clustering.getClusterIdForPoint(PointId); assert(ClusterId == Clustering.getClusterIdForPoint(PointId)); Centroid.addPoint(Point.Measurements); } bool Analysis::SchedClassCluster::measurementsMatch( const MCSubtargetInfo &STI, const ResolvedSchedClass &RSC, const InstructionBenchmarkClustering &Clustering, const double AnalysisInconsistencyEpsilonSquared_) const { assert(!Clustering.getPoints().empty()); const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode; if (!Centroid.validate(Mode)) return false; const std::vector ClusterCenterPoint = Centroid.getAsPoint(); const std::vector SchedClassPoint = RSC.getAsPoint(Mode, STI, Centroid.getStats()); if (SchedClassPoint.empty()) return false; // In Uops mode validate() may not be enough. assert(ClusterCenterPoint.size() == SchedClassPoint.size() && "Expected measured/sched data dimensions to match."); return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint, AnalysisInconsistencyEpsilonSquared_); } void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC, raw_ostream &OS) const { OS << ""; OS << ""; if (RSC.SCDesc->isValid()) { const auto &SI = State_.getSubtargetInfo(); const auto &SM = SI.getSchedModel(); OS << ""; OS << ""; OS << ""; // Latencies. OS << ""; // inverse throughput. OS << ""; // WriteProcRes. OS << ""; // Idealized port pressure. OS << ""; OS << ""; } else { OS << ""; } OS << "
    ValidVariantNumMicroOpsLatencyRThroughputWriteProcResIdealized Resource Pressure
    " << (RSC.WasVariant ? "✔" : "✕") << "" << RSC.SCDesc->NumMicroOps << "
      "; for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { const auto *const Entry = SI.getWriteLatencyEntry(RSC.SCDesc, I); OS << "
    • " << Entry->Cycles; if (RSC.SCDesc->NumWriteLatencyEntries > 1) { // Dismabiguate if more than 1 latency. OS << " (WriteResourceID " << Entry->WriteResourceID << ")"; } OS << "
    • "; } OS << "
    "; writeMeasurementValue( OS, MCSchedModel::getReciprocalThroughput(SI, *RSC.SCDesc)); OS << "
      "; for (const auto &WPR : RSC.NonRedundantWriteProcRes) { OS << "
    • "; writeEscaped(OS, SM.getProcResource(WPR.ProcResourceIdx)->Name); OS << ": " << WPR.Cycles << "
    • "; } OS << "
      "; for (const auto &Pressure : RSC.IdealizedProcResPressure) { OS << "
    • "; writeEscaped( OS, SI.getSchedModel().getProcResource(Pressure.first)->Name); OS << ": "; writeMeasurementValue(OS, Pressure.second); OS << "
    • "; } OS << "
    "; } void Analysis::printClusterRawHtml( const InstructionBenchmarkClustering::ClusterId &Id, StringRef display_name, llvm::raw_ostream &OS) const { const auto &Points = Clustering_.getPoints(); const auto &Cluster = Clustering_.getCluster(Id); if (Cluster.PointIndices.empty()) return; OS << "

    " << display_name << " Cluster (" << Cluster.PointIndices.size() << " points)

    "; OS << ""; // Table Header. OS << ""; for (const auto &Measurement : Points[Cluster.PointIndices[0]].Measurements) { OS << ""; } OS << ""; // Point data. for (const auto &PointId : Cluster.PointIndices) { OS << ""; for (const auto &Measurement : Points[PointId].Measurements) { OS << ""; } OS << "
    ClusterIdOpcode/Config"; writeEscaped(OS, Measurement.Key); OS << "
    " << display_name << "
      "; printPointHtml(Points[PointId], OS); OS << "
    "; writeMeasurementValue(OS, Measurement.PerInstructionValue); } OS << "
    "; OS << "
    "; } // namespace exegesis static constexpr const char kHtmlHead[] = R"( llvm-exegesis Analysis Results )"; template <> Error Analysis::run( raw_ostream &OS) const { const auto &FirstPoint = Clustering_.getPoints()[0]; // Print the header. OS << "" << kHtmlHead << ""; OS << "

    llvm-exegesis Analysis Results

    "; OS << "

    Triple: "; writeEscaped(OS, FirstPoint.LLVMTriple); OS << "

    Cpu: "; writeEscaped(OS, FirstPoint.CpuName); OS << "

    "; const auto &SI = State_.getSubtargetInfo(); for (const auto &RSCAndPoints : makePointsPerSchedClass()) { if (!RSCAndPoints.RSC.SCDesc) continue; // Bucket sched class points into sched class clusters. std::vector SchedClassClusters; for (const size_t PointId : RSCAndPoints.PointIds) { const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId); if (!ClusterId.isValid()) continue; // Ignore noise and errors. FIXME: take noise into account ? if (ClusterId.isUnstable() ^ AnalysisDisplayUnstableOpcodes_) continue; // Either display stable or unstable clusters only. auto SchedClassClusterIt = llvm::find_if( SchedClassClusters, [ClusterId](const SchedClassCluster &C) { return C.id() == ClusterId; }); if (SchedClassClusterIt == SchedClassClusters.end()) { SchedClassClusters.emplace_back(); SchedClassClusterIt = std::prev(SchedClassClusters.end()); } SchedClassClusterIt->addPoint(PointId, Clustering_); } // Print any scheduling class that has at least one cluster that does not // match the checked-in data. if (all_of(SchedClassClusters, [this, &RSCAndPoints, &SI](const SchedClassCluster &C) { return C.measurementsMatch(SI, RSCAndPoints.RSC, Clustering_, AnalysisInconsistencyEpsilonSquared_); })) continue; // Nothing weird. OS << "

    Sched Class "; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) writeEscaped(OS, RSCAndPoints.RSC.SCDesc->Name); #else OS << RSCAndPoints.RSC.SchedClassId; #endif OS << " contains instructions whose performance characteristics do" " not match that of LLVM:

    "; printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS); OS << "

    llvm SchedModel data:

    "; printSchedClassDescHtml(RSCAndPoints.RSC, OS); OS << "
    "; } printClusterRawHtml(InstructionBenchmarkClustering::ClusterId::noise(), "[noise]", OS); OS << ""; return Error::success(); } } // namespace exegesis } // namespace llvm