mprefetch.cpp 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #include <util/datetime/cputimer.h>
  2. #include <library/cpp/getopt/last_getopt.h>
  3. #include <deque>
  4. int main(int argc, char** argv) {
  5. NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();
  6. ui32 nIters = 100000000;
  7. ui32 nRows = 100000000;
  8. ui32 nRepeats = 10;
  9. ui32 nPrefetch = 0;
  10. ui32 nSpin = 0;
  11. bool shuffle = true;
  12. opts.AddLongOption('r', "rows", "# of rows").StoreResult(&nRows);
  13. opts.AddLongOption('i', "iter", "# of iterations").StoreResult(&nIters);
  14. opts.AddLongOption('t', "repeats", "# of repeats").StoreResult(&nRepeats);
  15. opts.AddLongOption('p', "prefetch", "# of prefetch").StoreResult(&nPrefetch);
  16. opts.AddLongOption('h', "shuffle", "randomize").StoreResult(&shuffle);
  17. opts.AddLongOption('s', "spin", "spin count").StoreResult(&nSpin);
  18. opts.SetFreeArgsMax(0);
  19. NLastGetopt::TOptsParseResult res(&opts, argc, argv);
  20. std::vector<ui32> v(nRows);
  21. std::vector<ui32> data(nRows);
  22. std::iota(v.begin(), v.end(), 0);
  23. if (shuffle) {
  24. std::random_shuffle(v.begin(), v.end());
  25. }
  26. std::vector<ui32> prefetchQueue(nPrefetch);
  27. ui32 queueBegin = 0;
  28. ui32 queueEnd = 0;
  29. ui32 queueSize = 0;
  30. volatile ui64 tmp = 0;
  31. std::vector<double> durations;
  32. for (ui32 j = 0; j < nRepeats; ++j) {
  33. TSimpleTimer timer;
  34. ui32 index = 0;
  35. if (nPrefetch == 0) {
  36. for (ui32 i = 0; i < nIters; ++i) {
  37. data[v[index++]]+=1;
  38. if (index == nRows) {
  39. index = 0;
  40. }
  41. for (ui32 j = 0; j < nSpin; ++j) {
  42. ++tmp;
  43. }
  44. }
  45. } else {
  46. auto handle = [&]() {
  47. auto prevJ = prefetchQueue[queueBegin++];
  48. --queueSize;
  49. if (queueBegin == nPrefetch) {
  50. queueBegin = 0;
  51. }
  52. data[prevJ]+=1;
  53. for (ui32 j = 0; j < nSpin; ++j) {
  54. ++tmp;
  55. }
  56. };
  57. for (ui32 i = 0; i < nIters; ++i) {
  58. auto j = v[index++];
  59. if (index == nRows) {
  60. index = 0;
  61. }
  62. __builtin_prefetch(data.data() + j, 1, 3);
  63. prefetchQueue[queueEnd++] = j;
  64. ++queueSize;
  65. if (queueEnd == nPrefetch) {
  66. queueEnd = 0;
  67. }
  68. if (queueSize == nPrefetch) {
  69. handle();
  70. }
  71. }
  72. while (queueSize > 0) {
  73. handle();
  74. }
  75. }
  76. auto duration = timer.Get();
  77. durations.push_back(1e-6*duration.MicroSeconds());
  78. }
  79. // remove 1/3 of worst measurements
  80. Sort(durations.begin(), durations.end());
  81. durations.erase(durations.begin() + nRepeats * 2 / 3, durations.end());
  82. nRepeats = durations.size();
  83. double sumDurations = 0.0, sumDurationsQ = 0.0;
  84. for (auto d : durations) {
  85. sumDurations += d;
  86. sumDurationsQ += d * d;
  87. }
  88. double avgDuration = sumDurations / nRepeats;
  89. double dispDuration = sqrt(sumDurationsQ / nRepeats - avgDuration * avgDuration);
  90. Cerr << "Elapsed: " << avgDuration << ", noise: " << 100*dispDuration/avgDuration << "%\n";
  91. Cerr << "Speed: " << 1e-6 * (ui64(nIters) / avgDuration) << " M iters/sec\n";
  92. return 0;
  93. }