pack_num_bench.cpp 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. #include "pack.h"
  2. #include <yql/essentials/minikql/pack_num.h>
  3. #include <library/cpp/testing/benchmark/bench.h>
  4. #include <library/cpp/packedtypes/longs.h>
  5. #include <util/generic/xrange.h>
  6. #include <util/generic/singleton.h>
  7. #include <util/random/random.h>
  8. namespace {
  9. template <ui32 UPPER, size_t MAX_BYTE_SIZE = (1 << 15) - (1 << 10)>
  10. struct TSamples32 {
  11. constexpr static size_t COUNT = MAX_BYTE_SIZE / sizeof(ui32);
  12. TSamples32() {
  13. for (size_t i: xrange(COUNT)) {
  14. Data[i] = RandomNumber<ui32>(UPPER);
  15. }
  16. }
  17. ui32 Data[COUNT];
  18. };
  19. template <ui64 UPPER, size_t MAX_BYTE_SIZE = (1 << 15) - (1 << 10)>
  20. struct TSamples64 {
  21. constexpr static size_t COUNT = MAX_BYTE_SIZE / sizeof(ui64);
  22. TSamples64() {
  23. for (size_t i: xrange(COUNT)) {
  24. Data[i] = RandomNumber<ui64>(UPPER);
  25. }
  26. }
  27. ui64 Data[COUNT];
  28. };
  29. template <ui32 UPPER, typename TCoder, size_t MAX_BYTE_SIZE = (1 << 15) - (1 << 10)>
  30. struct TCodedData32 {
  31. constexpr static size_t BYTES_PER_NUM = sizeof(ui32) + 2;
  32. constexpr static size_t COUNT = MAX_BYTE_SIZE / BYTES_PER_NUM;
  33. TCodedData32() {
  34. for (size_t i: xrange(COUNT)) {
  35. Length[i] = TCoder()(RandomNumber<ui32>(UPPER), Data + i * BYTES_PER_NUM);
  36. }
  37. }
  38. char Data[MAX_BYTE_SIZE + BYTES_PER_NUM];
  39. size_t Length[COUNT];
  40. };
  41. template <ui64 UPPER, typename TCoder, size_t MAX_BYTE_SIZE = (1 << 15) - (1 << 10)>
  42. struct TCodedData64 {
  43. constexpr static size_t BYTES_PER_NUM = sizeof(ui64) + 2;
  44. constexpr static size_t COUNT = MAX_BYTE_SIZE / BYTES_PER_NUM;
  45. TCodedData64() {
  46. for (size_t i: xrange(COUNT)) {
  47. Length[i] = TCoder()(RandomNumber<ui64>(UPPER), Data + i * BYTES_PER_NUM);
  48. }
  49. }
  50. char Data[MAX_BYTE_SIZE + BYTES_PER_NUM];
  51. size_t Length[COUNT];
  52. };
  53. struct TKikimrCoder32 {
  54. size_t operator() (ui32 num, char* buf) const {
  55. return NKikimr::Pack32(num, buf);
  56. }
  57. };
  58. struct TKikimrCoder64 {
  59. size_t operator() (ui64 num, char* buf) const {
  60. return NKikimr::Pack64(num, buf);
  61. }
  62. };
  63. struct TPackedTypesCoder32 {
  64. size_t operator() (ui32 num, char* buf) const {
  65. return Pack32(num, buf) - buf;
  66. }
  67. };
  68. struct TPackedTypesCoder64 {
  69. size_t operator() (ui64 num, char* buf) const {
  70. return Pack64(num, buf) - buf;
  71. }
  72. };
  73. struct TDictUtilsCoder32 {
  74. size_t operator() (ui32 num, char* buf) const {
  75. return PackU32(num, buf);
  76. }
  77. };
  78. struct TDictUtilsCoder64 {
  79. size_t operator() (ui64 num, char* buf) const {
  80. return PackU64(num, buf);
  81. }
  82. };
  83. } // unnamed
  84. #define DEF_WRITE_BENCH(base, limit) \
  85. Y_CPU_BENCHMARK(Write##base##_Kikimr_##limit, iface) { \
  86. char buffer[sizeof(ui##base) + 1]; \
  87. auto& data = Default<TSamples##base<limit>>(); \
  88. for (size_t i = 0; i < iface.Iterations(); ++i) { \
  89. Y_DO_NOT_OPTIMIZE_AWAY(NKikimr::Pack##base(data.Data[i % data.COUNT], buffer)); \
  90. NBench::Clobber(); \
  91. } \
  92. } \
  93. Y_CPU_BENCHMARK(Write##base##_PackedTypes_##limit, iface) { \
  94. char buffer[sizeof(ui##base) + 1]; \
  95. auto& data = Default<TSamples##base<limit>>(); \
  96. for (size_t i = 0; i < iface.Iterations(); ++i) { \
  97. Y_DO_NOT_OPTIMIZE_AWAY(Pack##base(data.Data[i % data.COUNT], buffer)); \
  98. NBench::Clobber(); \
  99. } \
  100. } \
  101. Y_CPU_BENCHMARK(Write##base##_DictUtils_##limit, iface) { \
  102. char buffer[sizeof(ui##base) + 1]; \
  103. auto& data = Default<TSamples##base<limit>>(); \
  104. for (size_t i = 0; i < iface.Iterations(); ++i) { \
  105. Y_DO_NOT_OPTIMIZE_AWAY(PackU##base(data.Data[i % data.COUNT], buffer)); \
  106. NBench::Clobber(); \
  107. } \
  108. }
  109. #define DEF_READ_BENCH(base, limit) \
  110. Y_CPU_BENCHMARK(Read##base##_KikimrLong_##limit, iface) { \
  111. ui##base num = 0; \
  112. NBench::Escape(&num); \
  113. const auto& data = *HugeSingleton<TCodedData##base<limit, TKikimrCoder##base>>(); \
  114. for (size_t i = 0; i < iface.Iterations(); ++i) { \
  115. const size_t pos = i % data.COUNT; \
  116. Y_DO_NOT_OPTIMIZE_AWAY(NKikimr::Unpack##base(data.Data + pos * data.BYTES_PER_NUM, data.BYTES_PER_NUM, num)); \
  117. NBench::Clobber(); \
  118. } \
  119. } \
  120. Y_CPU_BENCHMARK(Read##base##_KikimrShort_##limit, iface) { \
  121. ui##base num = 0; \
  122. NBench::Escape(&num); \
  123. const auto& data = *HugeSingleton<TCodedData##base<limit, TKikimrCoder##base>>(); \
  124. for (size_t i = 0; i < iface.Iterations(); ++i) { \
  125. const size_t pos = i % data.COUNT; \
  126. Y_DO_NOT_OPTIMIZE_AWAY(NKikimr::Unpack##base(data.Data + pos * data.BYTES_PER_NUM, data.Length[pos], num)); \
  127. NBench::Clobber(); \
  128. } \
  129. } \
  130. Y_CPU_BENCHMARK(Read##base##_PackedTypes_##limit, iface) { \
  131. ui##base num = 0; \
  132. NBench::Escape(&num); \
  133. const auto& data = *HugeSingleton<TCodedData##base<limit, TPackedTypesCoder##base>>(); \
  134. for (size_t i = 0; i < iface.Iterations(); ++i) { \
  135. const size_t pos = i % data.COUNT; \
  136. Y_DO_NOT_OPTIMIZE_AWAY(Unpack##base(num, data.Data + pos * data.BYTES_PER_NUM)); \
  137. NBench::Clobber(); \
  138. } \
  139. } \
  140. Y_CPU_BENCHMARK(Read##base##_DictUtils_##limit, iface) { \
  141. ui##base num = 0; \
  142. NBench::Escape(&num); \
  143. const auto& data = *HugeSingleton<TCodedData##base<limit, TDictUtilsCoder##base>>(); \
  144. for (size_t i = 0; i < iface.Iterations(); ++i) { \
  145. const size_t pos = i % data.COUNT; \
  146. Y_DO_NOT_OPTIMIZE_AWAY(UnpackU##base(&num, data.Data + pos * data.BYTES_PER_NUM)); \
  147. NBench::Clobber(); \
  148. } \
  149. }
  150. DEF_WRITE_BENCH(32, 10)
  151. DEF_WRITE_BENCH(32, 126)
  152. DEF_WRITE_BENCH(32, 127)
  153. DEF_WRITE_BENCH(32, 128)
  154. DEF_WRITE_BENCH(32, 254)
  155. DEF_WRITE_BENCH(32, 255)
  156. DEF_WRITE_BENCH(32, 256)
  157. DEF_WRITE_BENCH(32, 65534)
  158. DEF_WRITE_BENCH(32, 65535)
  159. DEF_WRITE_BENCH(32, 65536)
  160. DEF_WRITE_BENCH(32, 4294967295)
  161. DEF_WRITE_BENCH(64, 10)
  162. DEF_WRITE_BENCH(64, 126)
  163. DEF_WRITE_BENCH(64, 127)
  164. DEF_WRITE_BENCH(64, 128)
  165. DEF_WRITE_BENCH(64, 254)
  166. DEF_WRITE_BENCH(64, 255)
  167. DEF_WRITE_BENCH(64, 256)
  168. DEF_WRITE_BENCH(64, 65534)
  169. DEF_WRITE_BENCH(64, 65535)
  170. DEF_WRITE_BENCH(64, 65536)
  171. DEF_WRITE_BENCH(64, 4294967294ull)
  172. DEF_WRITE_BENCH(64, 4294967295ull)
  173. DEF_WRITE_BENCH(64, 4294967296ull)
  174. DEF_WRITE_BENCH(64, 18446744073709551615ull)
  175. DEF_READ_BENCH(32, 10)
  176. DEF_READ_BENCH(32, 126)
  177. DEF_READ_BENCH(32, 127)
  178. DEF_READ_BENCH(32, 128)
  179. DEF_READ_BENCH(32, 254)
  180. DEF_READ_BENCH(32, 255)
  181. DEF_READ_BENCH(32, 256)
  182. DEF_READ_BENCH(32, 65534)
  183. DEF_READ_BENCH(32, 65535)
  184. DEF_READ_BENCH(32, 65536)
  185. DEF_READ_BENCH(32, 4294967295)
  186. DEF_READ_BENCH(64, 10)
  187. DEF_READ_BENCH(64, 126)
  188. DEF_READ_BENCH(64, 127)
  189. DEF_READ_BENCH(64, 128)
  190. DEF_READ_BENCH(64, 254)
  191. DEF_READ_BENCH(64, 255)
  192. DEF_READ_BENCH(64, 256)
  193. DEF_READ_BENCH(64, 65534)
  194. DEF_READ_BENCH(64, 65535)
  195. DEF_READ_BENCH(64, 65536)
  196. DEF_READ_BENCH(64, 4294967294ull)
  197. DEF_READ_BENCH(64, 4294967295ull)
  198. DEF_READ_BENCH(64, 4294967296ull)
  199. DEF_READ_BENCH(64, 18446744073709551615ull)