Просмотр исходного кода

Add faster data generation to Storage Load Actor, #1156 (#1157)

* Add faster data generation to Storage Load Actor, #1156

* Add alexvru's faster data generation method
Sergey Belyakov 1 год назад
Родитель
Сommit
98f51c88ad
2 измененных файлов с 80 добавлено и 14 удалено
  1. 1 1
      ydb/core/load_test/group_write.cpp
  2. 79 13
      ydb/core/util/lz4_data_generator.h

+ 1 - 1
ydb/core/load_test/group_write.cpp

@@ -1431,7 +1431,7 @@ public:
 
     template <class ResultContainer = TString>
     static ResultContainer GenerateBuffer(const TLogoBlobID& id) {
-        return GenDataForLZ4<ResultContainer>(id.BlobSize());
+        return FastGenDataForLZ4<ResultContainer>(id.BlobSize());
     }
 
     STRICT_STFUNC(StateFunc,

+ 79 - 13
ydb/core/util/lz4_data_generator.h

@@ -24,27 +24,93 @@ inline ResultContainer GenDataForLZ4(const ui64 size, const ui64 seed = 0) {
     return data;
 }
 
-inline TString FastGenDataForLZ4(size_t size, ui64 seed) {
-    TString data = TString::Uninitialized(size);
-
+template <class ResultContainer = TString>
+inline ResultContainer FastGenDataForLZ4(size_t size, ui64 seed = 0) {
+    ResultContainer data = ResultContainer::Uninitialized(size);
+    char *ptr = [&]() -> char * {
+        if constexpr(std::is_same<ResultContainer, TString>::value) {
+            return data.Detach();
+        } else {
+            return data.mutable_data();
+        }
+    }();
     TReallyFastRng32 rng(seed);
 
     constexpr size_t minRunLen = 32;
     constexpr size_t maxRunLen = 64;
     const size_t runLen = minRunLen + sizeof(ui32) * (rng() % ((maxRunLen - minRunLen) / sizeof(ui32) + 1));
 
-    char run[maxRunLen];
-    ui32 i;
-    for (i = 0; i < runLen; i += sizeof(ui32)) {
-        reinterpret_cast<ui32&>(i[run]) = rng();
-    }
-    Y_DEBUG_ABORT_UNLESS(i == runLen);
+#define UNROLL(LEN) \
+    do {                                                        \
+        ui64 x0, x1, x2, x3, x4 = 0, x5 = 0, x6 = 0, x7 = 0;    \
+        x0 = rng() | (ui64)rng() << 32;                         \
+        x1 = rng() | (ui64)rng() << 32;                         \
+        x2 = rng() | (ui64)rng() << 32;                         \
+        x3 = rng() | (ui64)rng() << 32;                         \
+        if constexpr (LEN >= 36) {                              \
+            x4 = rng() | (ui64)rng() << 32;                     \
+        }                                                       \
+        if constexpr (LEN >= 44) {                              \
+            x5 = rng() | (ui64)rng() << 32;                     \
+        }                                                       \
+        if constexpr (LEN >= 52) {                              \
+            x6 = rng() | (ui64)rng() << 32;                     \
+        }                                                       \
+        if constexpr (LEN >= 60) {                              \
+            x7 = rng() | (ui64)rng() << 32;                     \
+        }                                                       \
+        while (size >= LEN) {                                   \
+            *reinterpret_cast<ui64*>(ptr) = x0;                 \
+            *reinterpret_cast<ui64*>(ptr + 8) = x1;             \
+            *reinterpret_cast<ui64*>(ptr + 16) = x2;            \
+            *reinterpret_cast<ui64*>(ptr + 24) = x3;            \
+            if constexpr (LEN == 36) {                          \
+                *reinterpret_cast<ui32*>(ptr + 32) = x4;        \
+            } else if constexpr (LEN >= 40) {                   \
+                *reinterpret_cast<ui64*>(ptr + 32) = x4;        \
+            }                                                   \
+            if constexpr (LEN == 44) {                          \
+                *reinterpret_cast<ui32*>(ptr + 40) = x5;        \
+            } else if constexpr (LEN >= 48) {                   \
+                *reinterpret_cast<ui64*>(ptr + 40) = x5;        \
+            }                                                   \
+            if constexpr (LEN == 52) {                          \
+                *reinterpret_cast<ui32*>(ptr + 48) = x6;        \
+            } else if constexpr (LEN >= 56) {                   \
+                *reinterpret_cast<ui64*>(ptr + 48) = x6;        \
+            }                                                   \
+            if constexpr (LEN == 60) {                          \
+                *reinterpret_cast<ui32*>(ptr + 56) = x7;        \
+            } else if constexpr (LEN >= 64) {                   \
+                *reinterpret_cast<ui64*>(ptr + 56) = x7;        \
+            }                                                   \
+            ptr += LEN;                                         \
+            size -= LEN;                                        \
+        }                                                       \
+        for (ui64 x : {x0, x1, x2, x3, x4, x5, x6, x7}) {       \
+            if (size >= 8) {                                    \
+                *reinterpret_cast<ui64*>(ptr) = x;              \
+                ptr += 8;                                       \
+                size -= 8;                                      \
+            } else {                                            \
+                memcpy(ptr, &x, size);                          \
+                break;                                          \
+            }                                                   \
+        }                                                       \
+    } while (false);
 
-    char *ptr = data.Detach();
-    for (; size >= runLen; size -= runLen, ptr += runLen) {
-        memcpy(ptr, run, runLen);
+    switch (runLen) {
+        case 32: UNROLL(32); break;
+        case 36: UNROLL(36); break;
+        case 40: UNROLL(40); break;
+        case 44: UNROLL(44); break;
+        case 48: UNROLL(48); break;
+        case 52: UNROLL(52); break;
+        case 56: UNROLL(56); break;
+        case 60: UNROLL(60); break;
+        case 64: UNROLL(64); break;
+        default: Y_ABORT();
     }
-    memcpy(ptr, run, size);
 
     return data;
 }