#include "bench.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using re2::RE2; using namespace NBench; using namespace NColorizer; using namespace NLastGetopt; namespace { struct TOptions { double TimeBudget; }; struct TResult { TStringBuf TestName; ui64 Samples; ui64 Iterations; TMaybe CyclesPerIteration; TMaybe SecondsPerIteration; double RunTime; size_t TestId; // Sequential test id (zero-based) }; struct ITestRunner: public TIntrusiveListItem { virtual ~ITestRunner() = default; void Register(); virtual TStringBuf Name() const noexcept = 0; virtual TResult Run(const TOptions& opts) = 0; size_t SequentialId = 0; }; struct TCpuBenchmark: public ITestRunner { inline TCpuBenchmark(const char* name, NCpu::TUserFunc func) : F(func) , N(name) { Register(); } TResult Run(const TOptions& opts) override; TStringBuf Name() const noexcept override { return N; } std::function F; const TStringBuf N; }; inline TString DoFmtTime(double t) { if (t > 0.1) { return ToString(t) + " seconds"; } t *= 1000.0; if (t > 0.1) { return ToString(t) + " milliseconds"; } t *= 1000.0; if (t > 0.1) { return ToString(t) + " microseconds"; } t *= 1000.0; if (t < 0.05) { t = 0.0; } return ToString(t) + " nanoseconds"; } struct THiPerfTimer: public THPTimer { static inline TString FmtTime(double t) { return DoFmtTime(t); } }; struct TSimpleTimer { inline double Passed() const noexcept { return (TInstant::Now() - N).MicroSeconds() / 1000000.0; } static inline TString FmtTime(double t) { return DoFmtTime(t); } const TInstant N = TInstant::Now(); }; struct TCycleTimer { inline ui64 Passed() const noexcept { return GetCycleCount() - N; } static inline TString FmtTime(double t) { if (t < 0.5) { t = 0.0; } TString hr; if (t > 10 * 1000) { hr = " (" + ToString(HumanReadableSize(t, ESizeFormat::SF_QUANTITY)) + ")"; } return ToString(t) + hr + " cycles"; } const ui64 N = GetCycleCount(); }; template inline double Measure(T&& t, size_t n) { TMyTimer timer; t(n); return timer.Passed(); } struct TSampleIterator { inline size_t Next() noexcept { return M++; N *= 1.02; M += 1; return Max(N, M); } double N = 1.0; size_t M = 1; }; using TSample = std::pair; using TSamples = TVector; struct TLinFunc { double A; double B; inline double operator()(double x) const noexcept { return A * x + B; } }; TLinFunc CalcModel(const TSamples& s) { TKahanSLRSolver solver; for (const auto& p : s) { solver.Add(p.first, p.second); } double c = 0; double i = 0; solver.Solve(c, i); return TLinFunc{c, i}; } inline TSamples RemoveOutliers(const TSamples& s, double fraction) { if (s.size() < 20) { return s; } const auto predictor = CalcModel(s); const auto errfunc = [&predictor](const TSample& p) -> double { //return (1.0 + fabs(predictor(p.first) - p.second)) / (1.0 + fabs(p.second)); //return fabs((predictor(p.first) - p.second)) / (1.0 + fabs(p.second)); //return fabs((predictor(p.first) - p.second)) / (1.0 + p.first); return fabs((predictor(p.first) - p.second)); }; using TSampleWithError = std::pair; TVector v; v.reserve(s.size()); for (const auto& p : s) { v.emplace_back(&p, errfunc(p)); } Sort(v.begin(), v.end(), [](const TSampleWithError& l, const TSampleWithError& r) -> bool { return (l.second < r.second) || ((l.second == r.second) && (l.first < r.first)); }); if (0) { for (const auto& x : v) { Cout << x.first->first << ", " << x.first->second << " -> " << x.second << Endl; } } TSamples ret; ret.reserve(v.size()); for (const auto i : xrange(0, fraction * v.size())) { ret.push_back(*v[i].first); } return ret; } template static inline TResult RunTest(T&& func, double budget, ITestRunner& test) { THPTimer start; start.Passed(); TSampleIterator sample; TSamples samples; ui64 iters = 0; //warm up func(1); while (start.Passed() < budget) { if (start.Passed() < ((budget * samples.size()) / 2000000.0)) { ThreadYield(); } else { const size_t n = sample.Next(); iters += (ui64)n; samples.emplace_back(n, Measure(func, n)); } } auto filtered = RemoveOutliers(samples, 0.9); return {test.Name(), filtered.size(), iters, CalcModel(filtered).A, Nothing(), start.Passed(), test.SequentialId}; } using TTests = TIntrusiveListWithAutoDelete; inline TTests& Tests() { return *Singleton(); } void ITestRunner::Register() { Tests().PushBack(this); } TResult TCpuBenchmark::Run(const TOptions& opts) { return RunTest([this](size_t n) { NCpu::TParams params{n}; F(params); }, opts.TimeBudget, *this); } enum EOutFormat { F_CONSOLE = 0 /* "console" */, F_CSV /* "csv" */, F_JSON /* "json" */ }; TAdaptiveLock STDOUT_LOCK; struct IReporter { virtual void Report(TResult&& result) = 0; virtual void Finish() { } virtual ~IReporter() { } }; class TConsoleReporter: public IReporter { public: TConsoleReporter(IOutputStream& outputStream) : OutputStream(outputStream) { } ~TConsoleReporter() override { } void Report(TResult&& r) override { with_lock (STDOUT_LOCK) { OutputStream << r; } } private: IOutputStream& OutputStream; }; class TCSVReporter: public IReporter { public: TCSVReporter(IOutputStream& outputStream) : OutputStream(outputStream) { outputStream << "Name\tSamples\tIterations\tRun_time\tPer_iteration_sec\tPer_iteration_cycles" << Endl; } ~TCSVReporter() override { } void Report(TResult&& r) override { with_lock (STDOUT_LOCK) { OutputStream << r.TestName << '\t' << r.Samples << '\t' << r.Iterations << '\t' << r.RunTime; OutputStream << '\t'; if (r.CyclesPerIteration) { OutputStream << TCycleTimer::FmtTime(*r.CyclesPerIteration); } else { OutputStream << '-'; } OutputStream << '\t'; if (r.SecondsPerIteration) { OutputStream << DoFmtTime(*r.SecondsPerIteration); } else { OutputStream << '-'; } OutputStream << Endl; } } private: IOutputStream& OutputStream; }; class TJSONReporter: public IReporter { public: TJSONReporter(IOutputStream& outputStream) : OutputStream(outputStream) { } ~TJSONReporter() override { } void Report(TResult&& r) override { with_lock (ResultsLock_) { Results_.emplace_back(std::move(r)); } } void Finish() override { NJson::TJsonValue report; auto& bench = report["benchmark"]; bench.SetType(NJson::JSON_ARRAY); NJson::TJsonValue benchReport; for (const auto& result : Results_) { NJson::TJsonValue{}.Swap(benchReport); benchReport["name"] = result.TestName; benchReport["samples"] = result.Samples; benchReport["run_time"] = result.RunTime; benchReport["iterations"] = result.Iterations; if (result.CyclesPerIteration) { benchReport["per_iteration_cycles"] = *result.CyclesPerIteration; } if (result.SecondsPerIteration) { benchReport["per_iteration_secons"] = *result.SecondsPerIteration; } bench.AppendValue(benchReport); } OutputStream << report << Endl; } private: IOutputStream& OutputStream; TAdaptiveLock ResultsLock_; TVector Results_; }; class TOrderedReporter: public IReporter { public: TOrderedReporter(THolder slave) : Slave_(std::move(slave)) { } void Report(TResult&& result) override { with_lock (ResultsLock_) { OrderedResultQueue_.emplace(result.TestId, std::move(result)); while (!OrderedResultQueue_.empty() && OrderedResultQueue_.begin()->first <= ExpectedTestId_) { Slave_->Report(std::move(OrderedResultQueue_.begin()->second)); OrderedResultQueue_.erase(OrderedResultQueue_.begin()); ++ExpectedTestId_; } } } void Finish() override { for (auto& it : OrderedResultQueue_) { Slave_->Report(std::move(it.second)); } OrderedResultQueue_.clear(); Slave_->Finish(); } private: THolder Slave_; size_t ExpectedTestId_ = 0; TMap OrderedResultQueue_; TAdaptiveLock ResultsLock_; }; THolder MakeReporter(const EOutFormat type, IOutputStream& outputStream) { switch (type) { case F_CONSOLE: return MakeHolder(outputStream); case F_CSV: return MakeHolder(outputStream); case F_JSON: return MakeHolder(outputStream); default: break; } return MakeHolder(outputStream); // make compiler happy } THolder MakeOrderedReporter(const EOutFormat type, IOutputStream& outputStream) { return MakeHolder(MakeReporter(type, outputStream)); } void EnumerateTests(TVector& tests) { for (size_t id : xrange(tests.size())) { tests[id]->SequentialId = id; } } } template <> EOutFormat FromStringImpl(const char* data, size_t len) { const auto s = TStringBuf{data, len}; if (TStringBuf("console") == s) { return F_CONSOLE; } else if (TStringBuf("csv") == s) { return F_CSV; } else if (TStringBuf("json") == s) { return F_JSON; } ythrow TFromStringException{} << "failed to convert '" << s << '\''; } template <> void Out(IOutputStream& out, const TResult& r) { out << "----------- " << LightRed() << r.TestName << Old() << " ---------------" << Endl << " samples: " << White() << r.Samples << Old() << Endl << " iterations: " << White() << r.Iterations << Old() << Endl << " iterations hr: " << White() << HumanReadableSize(r.Iterations, SF_QUANTITY) << Old() << Endl << " run time: " << White() << r.RunTime << Old() << Endl; if (r.CyclesPerIteration) { out << " per iteration: " << White() << TCycleTimer::FmtTime(*r.CyclesPerIteration) << Old() << Endl; } if (r.SecondsPerIteration) { out << " per iteration: " << White() << DoFmtTime(*r.SecondsPerIteration) << Old() << Endl; } } NCpu::TRegistar::TRegistar(const char* name, TUserFunc func) { static_assert(sizeof(TCpuBenchmark) + alignof(TCpuBenchmark) < sizeof(Buf), "fix Buf size"); new (AlignUp(Buf, alignof(TCpuBenchmark))) TCpuBenchmark(name, func); } namespace { struct TProgOpts { TProgOpts(int argc, char** argv) { TOpts opts = TOpts::Default(); opts.AddHelpOption(); opts.AddLongOption('b', "budget") .StoreResult(&TimeBudget) .RequiredArgument("SEC") .Optional() .Help("overall time budget"); opts.AddLongOption('l', "list") .NoArgument() .StoreValue(&ListTests, true) .Help("list all tests"); opts.AddLongOption('t', "threads") .StoreResult(&Threads) .OptionalValue(ToString((NSystemInfo::CachedNumberOfCpus() + 1) / 2), "JOBS") .DefaultValue("1") .Help("run benchmarks in parallel"); opts.AddLongOption('f', "format") .AddLongName("benchmark_format") .StoreResult(&OutFormat) .RequiredArgument("FORMAT") .DefaultValue("console") .Help("output format (console|csv|json)"); opts.AddLongOption('r', "report_path") .StoreResult(&ReportPath) .Optional() .Help("path to save report"); opts.SetFreeArgDefaultTitle("REGEXP", "RE2 regular expression to filter tests"); const TOptsParseResult parseResult{&opts, argc, argv}; for (const auto& regexp : parseResult.GetFreeArgs()) { Filters.push_back(MakeHolder(regexp.data(), RE2::Quiet)); Y_ENSURE(Filters.back()->ok(), "incorrect RE2 expression '" << regexp << "'"); } } bool MatchFilters(const TStringBuf& name) const { if (!Filters) { return true; } for (auto&& re : Filters) { if (RE2::FullMatchN({name.data(), name.size()}, *re, nullptr, 0)) { return true; } } return false; } bool ListTests = false; double TimeBudget = -1.0; TVector> Filters; size_t Threads = 0; EOutFormat OutFormat; std::string ReportPath; }; } int NBench::Main(int argc, char** argv) { const TProgOpts opts(argc, argv); TVector tests; for (auto&& it : Tests()) { if (opts.MatchFilters(it.Name())) { tests.push_back(&it); } } EnumerateTests(tests); if (opts.ListTests) { for (const auto* const it : tests) { Cout << it->Name() << Endl; } return 0; } if (!tests) { return 0; } double timeBudget = opts.TimeBudget; if (timeBudget < 0) { timeBudget = 5.0 * tests.size(); } THolder outputHolder; IOutputStream* outputStream = &Cout; if (opts.ReportPath != "") { TString filePath(opts.ReportPath); outputHolder.Reset(outputStream = new TFileOutput(filePath)); } const TOptions testOpts = {timeBudget / tests.size()}; const auto reporter = MakeOrderedReporter(opts.OutFormat, *outputStream); std::function func = [&](ITestRunner** it) { auto&& res = (*it)->Run(testOpts); reporter->Report(std::move(res)); }; if (opts.Threads > 1) { NYmp::SetThreadCount(opts.Threads); NYmp::ParallelForStaticChunk(tests.data(), tests.data() + tests.size(), 1, func); } else { for (auto it : tests) { func(&it); } } reporter->Finish(); return 0; }