bench.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. #include "bench.h"
  2. #include <contrib/libs/re2/re2/re2.h>
  3. #include <library/cpp/colorizer/output.h>
  4. #include <library/cpp/getopt/small/last_getopt.h>
  5. #include <library/cpp/json/json_value.h>
  6. #include <library/cpp/linear_regression/linear_regression.h>
  7. #include <library/cpp/threading/poor_man_openmp/thread_helper.h>
  8. #include <util/generic/ptr.h>
  9. #include <util/system/hp_timer.h>
  10. #include <util/system/info.h>
  11. #include <util/stream/output.h>
  12. #include <util/datetime/base.h>
  13. #include <util/random/random.h>
  14. #include <util/string/cast.h>
  15. #include <util/generic/xrange.h>
  16. #include <util/generic/algorithm.h>
  17. #include <util/generic/singleton.h>
  18. #include <util/system/spinlock.h>
  19. #include <util/generic/function.h>
  20. #include <util/generic/maybe.h>
  21. #include <util/generic/strbuf.h>
  22. #include <util/generic/intrlist.h>
  23. #include <util/stream/format.h>
  24. #include <util/stream/file.h>
  25. #include <util/system/yield.h>
  26. using re2::RE2;
  27. using namespace NBench;
  28. using namespace NColorizer;
  29. using namespace NLastGetopt;
  30. namespace {
  31. struct TOptions {
  32. double TimeBudget;
  33. };
  34. struct TResult {
  35. TStringBuf TestName;
  36. ui64 Samples;
  37. ui64 Iterations;
  38. TMaybe<double> CyclesPerIteration;
  39. TMaybe<double> SecondsPerIteration;
  40. double RunTime;
  41. size_t TestId; // Sequential test id (zero-based)
  42. };
  43. struct ITestRunner: public TIntrusiveListItem<ITestRunner> {
  44. virtual ~ITestRunner() = default;
  45. void Register();
  46. virtual TStringBuf Name() const noexcept = 0;
  47. virtual TResult Run(const TOptions& opts) = 0;
  48. size_t SequentialId = 0;
  49. };
  50. struct TCpuBenchmark: public ITestRunner {
  51. inline TCpuBenchmark(const char* name, NCpu::TUserFunc func)
  52. : F(func)
  53. , N(name)
  54. {
  55. Register();
  56. }
  57. TResult Run(const TOptions& opts) override;
  58. TStringBuf Name() const noexcept override {
  59. return N;
  60. }
  61. std::function<NCpu::TUserFunc> F;
  62. const TStringBuf N;
  63. };
  64. inline TString DoFmtTime(double t) {
  65. if (t > 0.1) {
  66. return ToString(t) + " seconds";
  67. }
  68. t *= 1000.0;
  69. if (t > 0.1) {
  70. return ToString(t) + " milliseconds";
  71. }
  72. t *= 1000.0;
  73. if (t > 0.1) {
  74. return ToString(t) + " microseconds";
  75. }
  76. t *= 1000.0;
  77. if (t < 0.05) {
  78. t = 0.0;
  79. }
  80. return ToString(t) + " nanoseconds";
  81. }
  82. struct THiPerfTimer: public THPTimer {
  83. static inline TString FmtTime(double t) {
  84. return DoFmtTime(t);
  85. }
  86. };
  87. struct TSimpleTimer {
  88. inline double Passed() const noexcept {
  89. return (TInstant::Now() - N).MicroSeconds() / 1000000.0;
  90. }
  91. static inline TString FmtTime(double t) {
  92. return DoFmtTime(t);
  93. }
  94. const TInstant N = TInstant::Now();
  95. };
  96. struct TCycleTimer {
  97. inline ui64 Passed() const noexcept {
  98. return GetCycleCount() - N;
  99. }
  100. static inline TString FmtTime(double t) {
  101. if (t < 0.5) {
  102. t = 0.0;
  103. }
  104. TString hr;
  105. if (t > 10 * 1000) {
  106. hr = " (" + ToString(HumanReadableSize(t, ESizeFormat::SF_QUANTITY)) + ")";
  107. }
  108. return ToString(t) + hr + " cycles";
  109. }
  110. const ui64 N = GetCycleCount();
  111. };
  112. template <class TMyTimer, class T>
  113. inline double Measure(T&& t, size_t n) {
  114. TMyTimer timer;
  115. t(n);
  116. return timer.Passed();
  117. }
  118. struct TSampleIterator {
  119. inline size_t Next() noexcept {
  120. return M++;
  121. N *= 1.02;
  122. M += 1;
  123. return Max<double>(N, M);
  124. }
  125. double N = 1.0;
  126. size_t M = 1;
  127. };
  128. using TSample = std::pair<size_t, double>;
  129. using TSamples = TVector<TSample>;
  130. struct TLinFunc {
  131. double A;
  132. double B;
  133. inline double operator()(double x) const noexcept {
  134. return A * x + B;
  135. }
  136. };
  137. TLinFunc CalcModel(const TSamples& s) {
  138. TKahanSLRSolver solver;
  139. for (const auto& p : s) {
  140. solver.Add(p.first, p.second);
  141. }
  142. double c = 0;
  143. double i = 0;
  144. solver.Solve(c, i);
  145. return TLinFunc{c, i};
  146. }
  147. inline TSamples RemoveOutliers(const TSamples& s, double fraction) {
  148. if (s.size() < 20) {
  149. return s;
  150. }
  151. const auto predictor = CalcModel(s);
  152. const auto errfunc = [&predictor](const TSample& p) -> double {
  153. //return (1.0 + fabs(predictor(p.first) - p.second)) / (1.0 + fabs(p.second));
  154. //return fabs((predictor(p.first) - p.second)) / (1.0 + fabs(p.second));
  155. //return fabs((predictor(p.first) - p.second)) / (1.0 + p.first);
  156. return fabs((predictor(p.first) - p.second));
  157. };
  158. using TSampleWithError = std::pair<const TSample*, double>;
  159. TVector<TSampleWithError> v;
  160. v.reserve(s.size());
  161. for (const auto& p : s) {
  162. v.emplace_back(&p, errfunc(p));
  163. }
  164. Sort(v.begin(), v.end(), [](const TSampleWithError& l, const TSampleWithError& r) -> bool {
  165. return (l.second < r.second) || ((l.second == r.second) && (l.first < r.first));
  166. });
  167. if (0) {
  168. for (const auto& x : v) {
  169. Cout << x.first->first << ", " << x.first->second << " -> " << x.second << Endl;
  170. }
  171. }
  172. TSamples ret;
  173. ret.reserve(v.size());
  174. for (const auto i : xrange<size_t>(0, fraction * v.size())) {
  175. ret.push_back(*v[i].first);
  176. }
  177. return ret;
  178. }
  179. template <class TMyTimer, class T>
  180. static inline TResult RunTest(T&& func, double budget, ITestRunner& test) {
  181. THPTimer start;
  182. start.Passed();
  183. TSampleIterator sample;
  184. TSamples samples;
  185. ui64 iters = 0;
  186. //warm up
  187. func(1);
  188. while (start.Passed() < budget) {
  189. if (start.Passed() < ((budget * samples.size()) / 2000000.0)) {
  190. ThreadYield();
  191. } else {
  192. const size_t n = sample.Next();
  193. iters += (ui64)n;
  194. samples.emplace_back(n, Measure<TMyTimer>(func, n));
  195. }
  196. }
  197. auto filtered = RemoveOutliers(samples, 0.9);
  198. return {test.Name(), filtered.size(), iters, CalcModel(filtered).A, Nothing(), start.Passed(), test.SequentialId};
  199. }
  200. using TTests = TIntrusiveListWithAutoDelete<ITestRunner, TDestructor>;
  201. inline TTests& Tests() {
  202. return *Singleton<TTests>();
  203. }
  204. void ITestRunner::Register() {
  205. Tests().PushBack(this);
  206. }
  207. TResult TCpuBenchmark::Run(const TOptions& opts) {
  208. return RunTest<TCycleTimer>([this](size_t n) {
  209. NCpu::TParams params{n};
  210. F(params);
  211. }, opts.TimeBudget, *this);
  212. }
  213. enum EOutFormat {
  214. F_CONSOLE = 0 /* "console" */,
  215. F_CSV /* "csv" */,
  216. F_JSON /* "json" */
  217. };
  218. TAdaptiveLock STDOUT_LOCK;
  219. struct IReporter {
  220. virtual void Report(TResult&& result) = 0;
  221. virtual void Finish() {
  222. }
  223. virtual ~IReporter() {
  224. }
  225. };
  226. class TConsoleReporter: public IReporter {
  227. public:
  228. TConsoleReporter(IOutputStream& outputStream) : OutputStream(outputStream) {
  229. }
  230. ~TConsoleReporter() override {
  231. }
  232. void Report(TResult&& r) override {
  233. with_lock (STDOUT_LOCK) {
  234. OutputStream << r;
  235. }
  236. }
  237. private:
  238. IOutputStream& OutputStream;
  239. };
  240. class TCSVReporter: public IReporter {
  241. public:
  242. TCSVReporter(IOutputStream& outputStream) : OutputStream(outputStream) {
  243. outputStream << "Name\tSamples\tIterations\tRun_time\tPer_iteration_sec\tPer_iteration_cycles" << Endl;
  244. }
  245. ~TCSVReporter() override {
  246. }
  247. void Report(TResult&& r) override {
  248. with_lock (STDOUT_LOCK) {
  249. OutputStream << r.TestName
  250. << '\t' << r.Samples
  251. << '\t' << r.Iterations
  252. << '\t' << r.RunTime;
  253. OutputStream << '\t';
  254. if (r.CyclesPerIteration) {
  255. OutputStream << TCycleTimer::FmtTime(*r.CyclesPerIteration);
  256. } else {
  257. OutputStream << '-';
  258. }
  259. OutputStream << '\t';
  260. if (r.SecondsPerIteration) {
  261. OutputStream << DoFmtTime(*r.SecondsPerIteration);
  262. } else {
  263. OutputStream << '-';
  264. }
  265. OutputStream << Endl;
  266. }
  267. }
  268. private:
  269. IOutputStream& OutputStream;
  270. };
  271. class TJSONReporter: public IReporter {
  272. public:
  273. TJSONReporter(IOutputStream& outputStream) : OutputStream(outputStream) {
  274. }
  275. ~TJSONReporter() override {
  276. }
  277. void Report(TResult&& r) override {
  278. with_lock (ResultsLock_) {
  279. Results_.emplace_back(std::move(r));
  280. }
  281. }
  282. void Finish() override {
  283. NJson::TJsonValue report;
  284. auto& bench = report["benchmark"];
  285. bench.SetType(NJson::JSON_ARRAY);
  286. NJson::TJsonValue benchReport;
  287. for (const auto& result : Results_) {
  288. NJson::TJsonValue{}.Swap(benchReport);
  289. benchReport["name"] = result.TestName;
  290. benchReport["samples"] = result.Samples;
  291. benchReport["run_time"] = result.RunTime;
  292. benchReport["iterations"] = result.Iterations;
  293. if (result.CyclesPerIteration) {
  294. benchReport["per_iteration_cycles"] = *result.CyclesPerIteration;
  295. }
  296. if (result.SecondsPerIteration) {
  297. benchReport["per_iteration_secons"] = *result.SecondsPerIteration;
  298. }
  299. bench.AppendValue(benchReport);
  300. }
  301. OutputStream << report << Endl;
  302. }
  303. private:
  304. IOutputStream& OutputStream;
  305. TAdaptiveLock ResultsLock_;
  306. TVector<TResult> Results_;
  307. };
  308. class TOrderedReporter: public IReporter {
  309. public:
  310. TOrderedReporter(THolder<IReporter> slave)
  311. : Slave_(std::move(slave))
  312. {
  313. }
  314. void Report(TResult&& result) override {
  315. with_lock (ResultsLock_) {
  316. OrderedResultQueue_.emplace(result.TestId, std::move(result));
  317. while (!OrderedResultQueue_.empty() && OrderedResultQueue_.begin()->first <= ExpectedTestId_) {
  318. Slave_->Report(std::move(OrderedResultQueue_.begin()->second));
  319. OrderedResultQueue_.erase(OrderedResultQueue_.begin());
  320. ++ExpectedTestId_;
  321. }
  322. }
  323. }
  324. void Finish() override {
  325. for (auto& it : OrderedResultQueue_) {
  326. Slave_->Report(std::move(it.second));
  327. }
  328. OrderedResultQueue_.clear();
  329. Slave_->Finish();
  330. }
  331. private:
  332. THolder<IReporter> Slave_;
  333. size_t ExpectedTestId_ = 0;
  334. TMap<size_t, TResult> OrderedResultQueue_;
  335. TAdaptiveLock ResultsLock_;
  336. };
  337. THolder<IReporter> MakeReporter(const EOutFormat type, IOutputStream& outputStream) {
  338. switch (type) {
  339. case F_CONSOLE:
  340. return MakeHolder<TConsoleReporter>(outputStream);
  341. case F_CSV:
  342. return MakeHolder<TCSVReporter>(outputStream);
  343. case F_JSON:
  344. return MakeHolder<TJSONReporter>(outputStream);
  345. default:
  346. break;
  347. }
  348. return MakeHolder<TConsoleReporter>(outputStream); // make compiler happy
  349. }
  350. THolder<IReporter> MakeOrderedReporter(const EOutFormat type, IOutputStream& outputStream) {
  351. return MakeHolder<TOrderedReporter>(MakeReporter(type, outputStream));
  352. }
  353. void EnumerateTests(TVector<ITestRunner*>& tests) {
  354. for (size_t id : xrange(tests.size())) {
  355. tests[id]->SequentialId = id;
  356. }
  357. }
  358. }
  359. template <>
  360. EOutFormat FromStringImpl<EOutFormat>(const char* data, size_t len) {
  361. const auto s = TStringBuf{data, len};
  362. if (TStringBuf("console") == s) {
  363. return F_CONSOLE;
  364. } else if (TStringBuf("csv") == s) {
  365. return F_CSV;
  366. } else if (TStringBuf("json") == s) {
  367. return F_JSON;
  368. }
  369. ythrow TFromStringException{} << "failed to convert '" << s << '\'';
  370. }
  371. template <>
  372. void Out<TResult>(IOutputStream& out, const TResult& r) {
  373. out << "----------- " << LightRed() << r.TestName << Old() << " ---------------" << Endl
  374. << " samples: " << White() << r.Samples << Old() << Endl
  375. << " iterations: " << White() << r.Iterations << Old() << Endl
  376. << " iterations hr: " << White() << HumanReadableSize(r.Iterations, SF_QUANTITY) << Old() << Endl
  377. << " run time: " << White() << r.RunTime << Old() << Endl;
  378. if (r.CyclesPerIteration) {
  379. out << " per iteration: " << White() << TCycleTimer::FmtTime(*r.CyclesPerIteration) << Old() << Endl;
  380. }
  381. if (r.SecondsPerIteration) {
  382. out << " per iteration: " << White() << DoFmtTime(*r.SecondsPerIteration) << Old() << Endl;
  383. }
  384. }
  385. NCpu::TRegistar::TRegistar(const char* name, TUserFunc func) {
  386. static_assert(sizeof(TCpuBenchmark) + alignof(TCpuBenchmark) < sizeof(Buf), "fix Buf size");
  387. new (AlignUp(Buf, alignof(TCpuBenchmark))) TCpuBenchmark(name, func);
  388. }
  389. namespace {
  390. struct TProgOpts {
  391. TProgOpts(int argc, char** argv) {
  392. TOpts opts = TOpts::Default();
  393. opts.AddHelpOption();
  394. opts.AddLongOption('b', "budget")
  395. .StoreResult(&TimeBudget)
  396. .RequiredArgument("SEC")
  397. .Optional()
  398. .Help("overall time budget");
  399. opts.AddLongOption('l', "list")
  400. .NoArgument()
  401. .StoreValue(&ListTests, true)
  402. .Help("list all tests");
  403. opts.AddLongOption('t', "threads")
  404. .StoreResult(&Threads)
  405. .OptionalValue(ToString((NSystemInfo::CachedNumberOfCpus() + 1) / 2), "JOBS")
  406. .DefaultValue("1")
  407. .Help("run benchmarks in parallel");
  408. opts.AddLongOption('f', "format")
  409. .AddLongName("benchmark_format")
  410. .StoreResult(&OutFormat)
  411. .RequiredArgument("FORMAT")
  412. .DefaultValue("console")
  413. .Help("output format (console|csv|json)");
  414. opts.AddLongOption('r', "report_path")
  415. .StoreResult(&ReportPath)
  416. .Optional()
  417. .Help("path to save report");
  418. opts.SetFreeArgDefaultTitle("REGEXP", "RE2 regular expression to filter tests");
  419. const TOptsParseResult parseResult{&opts, argc, argv};
  420. for (const auto& regexp : parseResult.GetFreeArgs()) {
  421. Filters.push_back(MakeHolder<RE2>(regexp.data(), RE2::Quiet));
  422. Y_ENSURE(Filters.back()->ok(), "incorrect RE2 expression '" << regexp << "'");
  423. }
  424. }
  425. bool MatchFilters(const TStringBuf& name) const {
  426. if (!Filters) {
  427. return true;
  428. }
  429. for (auto&& re : Filters) {
  430. if (RE2::FullMatchN({name.data(), name.size()}, *re, nullptr, 0)) {
  431. return true;
  432. }
  433. }
  434. return false;
  435. }
  436. bool ListTests = false;
  437. double TimeBudget = -1.0;
  438. TVector<THolder<RE2>> Filters;
  439. size_t Threads = 0;
  440. EOutFormat OutFormat;
  441. std::string ReportPath;
  442. };
  443. }
  444. int NBench::Main(int argc, char** argv) {
  445. const TProgOpts opts(argc, argv);
  446. TVector<ITestRunner*> tests;
  447. for (auto&& it : Tests()) {
  448. if (opts.MatchFilters(it.Name())) {
  449. tests.push_back(&it);
  450. }
  451. }
  452. EnumerateTests(tests);
  453. if (opts.ListTests) {
  454. for (const auto* const it : tests) {
  455. Cout << it->Name() << Endl;
  456. }
  457. return 0;
  458. }
  459. if (!tests) {
  460. return 0;
  461. }
  462. double timeBudget = opts.TimeBudget;
  463. if (timeBudget < 0) {
  464. timeBudget = 5.0 * tests.size();
  465. }
  466. THolder<IOutputStream> outputHolder;
  467. IOutputStream* outputStream = &Cout;
  468. if (opts.ReportPath != "") {
  469. TString filePath(opts.ReportPath);
  470. outputHolder.Reset(outputStream = new TFileOutput(filePath));
  471. }
  472. const TOptions testOpts = {timeBudget / tests.size()};
  473. const auto reporter = MakeOrderedReporter(opts.OutFormat, *outputStream);
  474. std::function<void(ITestRunner**)> func = [&](ITestRunner** it) {
  475. auto&& res = (*it)->Run(testOpts);
  476. reporter->Report(std::move(res));
  477. };
  478. if (opts.Threads > 1) {
  479. NYmp::SetThreadCount(opts.Threads);
  480. NYmp::ParallelForStaticChunk(tests.data(), tests.data() + tests.size(), 1, func);
  481. } else {
  482. for (auto it : tests) {
  483. func(&it);
  484. }
  485. }
  486. reporter->Finish();
  487. return 0;
  488. }