packers.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. #pragma once
  2. #include <util/generic/string.h>
  3. #include <util/generic/strbuf.h>
  4. #include <util/generic/set.h>
  5. #include <util/generic/list.h>
  6. #include <util/generic/vector.h>
  7. #include <util/generic/bitops.h>
  8. #include <array>
  9. // Data serialization strategy class.
  10. // Default realization can pack only limited range of types, but you can pack any data other using your own strategy class.
  11. template <class T>
  12. class TNullPacker { // Very effective package class - pack any data into zero bytes :)
  13. public:
  14. void UnpackLeaf(const char*, T& t) const {
  15. t = T();
  16. }
  17. void PackLeaf(char*, const T&, size_t) const {
  18. }
  19. size_t MeasureLeaf(const T&) const {
  20. return 0;
  21. }
  22. size_t SkipLeaf(const char*) const {
  23. return 0;
  24. }
  25. };
  26. template <typename T>
  27. class TAsIsPacker { // this packer is not really a packer...
  28. public:
  29. void UnpackLeaf(const char* p, T& t) const {
  30. memcpy(&t, p, sizeof(T));
  31. }
  32. void PackLeaf(char* buffer, const T& data, size_t computedSize) const {
  33. Y_ASSERT(computedSize == sizeof(data));
  34. memcpy(buffer, &data, sizeof(T));
  35. }
  36. size_t MeasureLeaf(const T& data) const {
  37. Y_UNUSED(data);
  38. return sizeof(T);
  39. }
  40. size_t SkipLeaf(const char*) const {
  41. return sizeof(T);
  42. }
  43. };
  44. // Implementation
  45. namespace NPackers {
  46. template <class T>
  47. inline ui64 ConvertIntegral(const T& data);
  48. template <>
  49. inline ui64 ConvertIntegral(const i64& data) {
  50. if (data < 0) {
  51. return (static_cast<ui64>(-1 * data) << 1) | 1;
  52. } else {
  53. return static_cast<ui64>(data) << 1;
  54. }
  55. }
  56. namespace NImpl {
  57. template <class T, bool isSigned>
  58. struct TConvertImpl {
  59. static inline ui64 Convert(const T& data);
  60. };
  61. template <class T>
  62. struct TConvertImpl<T, true> {
  63. static inline ui64 Convert(const T& data) {
  64. return ConvertIntegral<i64>(static_cast<i64>(data));
  65. }
  66. };
  67. template <class T>
  68. struct TConvertImpl<T, false> {
  69. static inline ui64 Convert(const T& data) {
  70. return data;
  71. }
  72. };
  73. }
  74. template <class T>
  75. inline ui64 ConvertIntegral(const T& data) {
  76. static_assert(std::is_integral<T>::value, "T must be integral type");
  77. return NImpl::TConvertImpl<T, std::is_signed<T>::value>::Convert(data);
  78. }
  79. //---------------------------------
  80. // TIntegralPacker --- for integral types.
  81. template <class T>
  82. class TIntegralPacker { // can pack only integral types <= ui64
  83. public:
  84. void UnpackLeaf(const char* p, T& t) const;
  85. void PackLeaf(char* buffer, const T& data, size_t size) const;
  86. size_t MeasureLeaf(const T& data) const;
  87. size_t SkipLeaf(const char* p) const;
  88. };
  89. template <>
  90. inline size_t TIntegralPacker<ui64>::MeasureLeaf(const ui64& val) const {
  91. constexpr size_t MAX_SIZE = sizeof(ui64) + sizeof(ui64) / 8;
  92. ui64 value = val;
  93. size_t len = 1;
  94. value >>= 7;
  95. for (; value && len < MAX_SIZE; value >>= 7)
  96. ++len;
  97. return len;
  98. }
  99. template <>
  100. inline void TIntegralPacker<ui64>::PackLeaf(char* buffer, const ui64& val, size_t len) const {
  101. ui64 value = val;
  102. int lenmask = 0;
  103. for (size_t i = len - 1; i; --i) {
  104. buffer[i] = (char)(value & 0xFF);
  105. value >>= 8;
  106. lenmask = ((lenmask >> 1) | (1 << 7));
  107. }
  108. buffer[0] = (char)(lenmask | value);
  109. }
  110. extern const ui8 SkipTable[];
  111. template <>
  112. inline void TIntegralPacker<ui64>::UnpackLeaf(const char* p, ui64& result) const {
  113. unsigned char ch = *(p++);
  114. size_t taillen = SkipTable[ch] - 1;
  115. result = (ch & (0x7F >> taillen));
  116. while (taillen--)
  117. result = ((result << 8) | (*(p++) & 0xFF));
  118. }
  119. template <>
  120. inline size_t TIntegralPacker<ui64>::SkipLeaf(const char* p) const {
  121. return SkipTable[(ui8)*p];
  122. }
  123. namespace NImpl {
  124. template <class T, bool isSigned>
  125. struct TUnpackLeafImpl {
  126. inline void UnpackLeaf(const char* p, T& t) const;
  127. };
  128. template <class T>
  129. struct TUnpackLeafImpl<T, true> {
  130. inline void UnpackLeaf(const char* p, T& t) const {
  131. ui64 val;
  132. TIntegralPacker<ui64>().UnpackLeaf(p, val);
  133. if (val & 1) {
  134. t = -1 * static_cast<i64>(val >> 1);
  135. } else {
  136. t = static_cast<T>(val >> 1);
  137. }
  138. }
  139. };
  140. template <class T>
  141. struct TUnpackLeafImpl<T, false> {
  142. inline void UnpackLeaf(const char* p, T& t) const {
  143. ui64 tmp;
  144. TIntegralPacker<ui64>().UnpackLeaf(p, tmp);
  145. t = static_cast<T>(tmp);
  146. }
  147. };
  148. }
  149. template <class T>
  150. inline void TIntegralPacker<T>::UnpackLeaf(const char* p, T& t) const {
  151. NImpl::TUnpackLeafImpl<T, std::is_signed<T>::value>().UnpackLeaf(p, t);
  152. }
  153. template <class T>
  154. inline void TIntegralPacker<T>::PackLeaf(char* buffer, const T& data, size_t size) const {
  155. TIntegralPacker<ui64>().PackLeaf(buffer, ConvertIntegral<T>(data), size);
  156. }
  157. template <class T>
  158. inline size_t TIntegralPacker<T>::MeasureLeaf(const T& data) const {
  159. return TIntegralPacker<ui64>().MeasureLeaf(ConvertIntegral<T>(data));
  160. }
  161. template <class T>
  162. inline size_t TIntegralPacker<T>::SkipLeaf(const char* p) const {
  163. return TIntegralPacker<ui64>().SkipLeaf(p);
  164. }
  165. //-------------------------------------------
  166. // TFPPacker --- for float/double
  167. namespace NImpl {
  168. template <class TFloat, class TUInt>
  169. class TFPPackerBase {
  170. protected:
  171. typedef TIntegralPacker<TUInt> TPacker;
  172. union THelper {
  173. TFloat F;
  174. TUInt U;
  175. };
  176. TFloat FromUInt(TUInt u) const {
  177. THelper h;
  178. h.U = ReverseBytes(u);
  179. return h.F;
  180. }
  181. TUInt ToUInt(TFloat f) const {
  182. THelper h;
  183. h.F = f;
  184. return ReverseBytes(h.U);
  185. }
  186. public:
  187. void UnpackLeaf(const char* c, TFloat& t) const {
  188. TUInt u = 0;
  189. TPacker().UnpackLeaf(c, u);
  190. t = FromUInt(u);
  191. }
  192. void PackLeaf(char* c, const TFloat& t, size_t sz) const {
  193. TPacker().PackLeaf(c, ToUInt(t), sz);
  194. }
  195. size_t MeasureLeaf(const TFloat& t) const {
  196. return TPacker().MeasureLeaf(ToUInt(t));
  197. }
  198. size_t SkipLeaf(const char* c) const {
  199. return TPacker().SkipLeaf(c);
  200. }
  201. };
  202. }
  203. class TFloatPacker: public NImpl::TFPPackerBase<float, ui32> {
  204. };
  205. class TDoublePacker: public NImpl::TFPPackerBase<double, ui64> {
  206. };
  207. //-------------------------------------------
  208. // TStringPacker --- for TString/TUtf16String and TStringBuf.
  209. template <class TStringType>
  210. class TStringPacker {
  211. public:
  212. void UnpackLeaf(const char* p, TStringType& t) const;
  213. void PackLeaf(char* buffer, const TStringType& data, size_t size) const;
  214. size_t MeasureLeaf(const TStringType& data) const;
  215. size_t SkipLeaf(const char* p) const;
  216. };
  217. template <class TStringType>
  218. inline void TStringPacker<TStringType>::UnpackLeaf(const char* buf, TStringType& t) const {
  219. size_t len;
  220. TIntegralPacker<size_t>().UnpackLeaf(buf, len);
  221. size_t start = TIntegralPacker<size_t>().SkipLeaf(buf);
  222. t = TStringType((const typename TStringType::char_type*)(buf + start), len);
  223. }
  224. template <class TStringType>
  225. inline void TStringPacker<TStringType>::PackLeaf(char* buf, const TStringType& str, size_t size) const {
  226. size_t len = str.size();
  227. size_t lenChar = len * sizeof(typename TStringType::char_type);
  228. size_t start = size - lenChar;
  229. TIntegralPacker<size_t>().PackLeaf(buf, len, TIntegralPacker<size_t>().MeasureLeaf(len));
  230. memcpy(buf + start, str.data(), lenChar);
  231. }
  232. template <class TStringType>
  233. inline size_t TStringPacker<TStringType>::MeasureLeaf(const TStringType& str) const {
  234. size_t len = str.size();
  235. return TIntegralPacker<size_t>().MeasureLeaf(len) + len * sizeof(typename TStringType::char_type);
  236. }
  237. template <class TStringType>
  238. inline size_t TStringPacker<TStringType>::SkipLeaf(const char* buf) const {
  239. size_t result = TIntegralPacker<size_t>().SkipLeaf(buf);
  240. {
  241. size_t len;
  242. TIntegralPacker<size_t>().UnpackLeaf(buf, len);
  243. result += len * sizeof(typename TStringType::char_type);
  244. }
  245. return result;
  246. }
  247. template <class T>
  248. class TPacker;
  249. // TContainerPacker --- for any container
  250. // Requirements to class C:
  251. // - has method size() (returns size_t)
  252. // - has subclass C::value_type
  253. // - has subclass C::const_iterator
  254. // - has methods begin() and end() (return C::const_iterator)
  255. // - has method insert(C::const_iterator, const C::value_type&)
  256. // Examples: TVector, TList, TSet
  257. // Requirements to class EP: has methods as in any packer (UnpackLeaf, PackLeaf, MeasureLeaf, SkipLeaf) that
  258. // are applicable to C::value_type
  259. template <typename T>
  260. struct TContainerInfo {
  261. enum {
  262. IsVector = 0
  263. };
  264. };
  265. template <typename T>
  266. struct TContainerInfo<std::vector<T>> {
  267. enum {
  268. IsVector = 1
  269. };
  270. };
  271. template <typename T>
  272. struct TContainerInfo<TVector<T>> {
  273. enum {
  274. IsVector = 1
  275. };
  276. };
  277. template <bool IsVector>
  278. class TContainerPackerHelper {
  279. };
  280. template <>
  281. class TContainerPackerHelper<false> {
  282. public:
  283. template <class Packer, class Container>
  284. static void UnpackLeaf(Packer& p, const char* buffer, Container& c) {
  285. p.UnpackLeafSimple(buffer, c);
  286. }
  287. };
  288. template <>
  289. class TContainerPackerHelper<true> {
  290. public:
  291. template <class Packer, class Container>
  292. static void UnpackLeaf(Packer& p, const char* buffer, Container& c) {
  293. p.UnpackLeafVector(buffer, c);
  294. }
  295. };
  296. template <class C, class EP = TPacker<typename C::value_type>>
  297. class TContainerPacker {
  298. private:
  299. typedef C TContainer;
  300. typedef EP TElementPacker;
  301. typedef typename TContainer::const_iterator TElementIterator;
  302. void UnpackLeafSimple(const char* buffer, TContainer& c) const;
  303. void UnpackLeafVector(const char* buffer, TContainer& c) const;
  304. friend class TContainerPackerHelper<TContainerInfo<C>::IsVector>;
  305. public:
  306. void UnpackLeaf(const char* buffer, TContainer& c) const {
  307. TContainerPackerHelper<TContainerInfo<C>::IsVector>::UnpackLeaf(*this, buffer, c);
  308. }
  309. void PackLeaf(char* buffer, const TContainer& data, size_t size) const;
  310. size_t MeasureLeaf(const TContainer& data) const;
  311. size_t SkipLeaf(const char* buffer) const;
  312. };
  313. template <class C, class EP>
  314. inline void TContainerPacker<C, EP>::UnpackLeafSimple(const char* buffer, C& result) const {
  315. size_t offset = TIntegralPacker<size_t>().SkipLeaf(buffer); // first value is the total size (not needed here)
  316. size_t len;
  317. TIntegralPacker<size_t>().UnpackLeaf(buffer + offset, len);
  318. offset += TIntegralPacker<size_t>().SkipLeaf(buffer + offset);
  319. result.clear();
  320. typename C::value_type value;
  321. for (size_t i = 0; i < len; i++) {
  322. TElementPacker().UnpackLeaf(buffer + offset, value);
  323. result.insert(result.end(), value);
  324. offset += TElementPacker().SkipLeaf(buffer + offset);
  325. }
  326. }
  327. template <class C, class EP>
  328. inline void TContainerPacker<C, EP>::UnpackLeafVector(const char* buffer, C& result) const {
  329. size_t offset = TIntegralPacker<size_t>().SkipLeaf(buffer); // first value is the total size (not needed here)
  330. size_t len;
  331. TIntegralPacker<size_t>().UnpackLeaf(buffer + offset, len);
  332. offset += TIntegralPacker<size_t>().SkipLeaf(buffer + offset);
  333. result.resize(len);
  334. for (size_t i = 0; i < len; i++) {
  335. TElementPacker().UnpackLeaf(buffer + offset, result[i]);
  336. offset += TElementPacker().SkipLeaf(buffer + offset);
  337. }
  338. }
  339. template <class C, class EP>
  340. inline void TContainerPacker<C, EP>::PackLeaf(char* buffer, const C& data, size_t size) const {
  341. size_t sizeOfSize = TIntegralPacker<size_t>().MeasureLeaf(size);
  342. TIntegralPacker<size_t>().PackLeaf(buffer, size, sizeOfSize);
  343. size_t len = data.size();
  344. size_t curSize = TIntegralPacker<size_t>().MeasureLeaf(len);
  345. TIntegralPacker<size_t>().PackLeaf(buffer + sizeOfSize, len, curSize);
  346. curSize += sizeOfSize;
  347. for (TElementIterator p = data.begin(); p != data.end(); p++) {
  348. size_t sizeChange = TElementPacker().MeasureLeaf(*p);
  349. TElementPacker().PackLeaf(buffer + curSize, *p, sizeChange);
  350. curSize += sizeChange;
  351. }
  352. Y_ASSERT(curSize == size);
  353. }
  354. template <class C, class EP>
  355. inline size_t TContainerPacker<C, EP>::MeasureLeaf(const C& data) const {
  356. size_t curSize = TIntegralPacker<size_t>().MeasureLeaf(data.size());
  357. for (TElementIterator p = data.begin(); p != data.end(); p++)
  358. curSize += TElementPacker().MeasureLeaf(*p);
  359. size_t extraSize = TIntegralPacker<size_t>().MeasureLeaf(curSize);
  360. // Double measurement protects against sudden increases in extraSize,
  361. // e.g. when curSize is 127 and stays in one byte, but curSize + 1 requires two bytes.
  362. extraSize = TIntegralPacker<size_t>().MeasureLeaf(curSize + extraSize);
  363. Y_ASSERT(extraSize == TIntegralPacker<size_t>().MeasureLeaf(curSize + extraSize));
  364. return curSize + extraSize;
  365. }
  366. template <class C, class EP>
  367. inline size_t TContainerPacker<C, EP>::SkipLeaf(const char* buffer) const {
  368. size_t value;
  369. TIntegralPacker<size_t>().UnpackLeaf(buffer, value);
  370. return value;
  371. }
  372. // TPairPacker --- for std::pair<T1, T2> (any two types; can be nested)
  373. // TPacker<T1> and TPacker<T2> should be valid classes
  374. template <class T1, class T2, class TPacker1 = TPacker<T1>, class TPacker2 = TPacker<T2>>
  375. class TPairPacker {
  376. private:
  377. typedef std::pair<T1, T2> TMyPair;
  378. public:
  379. void UnpackLeaf(const char* buffer, TMyPair& pair) const;
  380. void PackLeaf(char* buffer, const TMyPair& data, size_t size) const;
  381. size_t MeasureLeaf(const TMyPair& data) const;
  382. size_t SkipLeaf(const char* buffer) const;
  383. };
  384. template <class T1, class T2, class TPacker1, class TPacker2>
  385. inline void TPairPacker<T1, T2, TPacker1, TPacker2>::UnpackLeaf(const char* buffer, std::pair<T1, T2>& pair) const {
  386. TPacker1().UnpackLeaf(buffer, pair.first);
  387. size_t size = TPacker1().SkipLeaf(buffer);
  388. TPacker2().UnpackLeaf(buffer + size, pair.second);
  389. }
  390. template <class T1, class T2, class TPacker1, class TPacker2>
  391. inline void TPairPacker<T1, T2, TPacker1, TPacker2>::PackLeaf(char* buffer, const std::pair<T1, T2>& data, size_t size) const {
  392. size_t size1 = TPacker1().MeasureLeaf(data.first);
  393. TPacker1().PackLeaf(buffer, data.first, size1);
  394. size_t size2 = TPacker2().MeasureLeaf(data.second);
  395. TPacker2().PackLeaf(buffer + size1, data.second, size2);
  396. Y_ASSERT(size == size1 + size2);
  397. }
  398. template <class T1, class T2, class TPacker1, class TPacker2>
  399. inline size_t TPairPacker<T1, T2, TPacker1, TPacker2>::MeasureLeaf(const std::pair<T1, T2>& data) const {
  400. size_t size1 = TPacker1().MeasureLeaf(data.first);
  401. size_t size2 = TPacker2().MeasureLeaf(data.second);
  402. return size1 + size2;
  403. }
  404. template <class T1, class T2, class TPacker1, class TPacker2>
  405. inline size_t TPairPacker<T1, T2, TPacker1, TPacker2>::SkipLeaf(const char* buffer) const {
  406. size_t size1 = TPacker1().SkipLeaf(buffer);
  407. size_t size2 = TPacker2().SkipLeaf(buffer + size1);
  408. return size1 + size2;
  409. }
  410. //------------------------------------------------------------------------------------------
  411. // Packer for fixed-size arrays, i.e. for std::array.
  412. // Saves memory by not storing anything about their size.
  413. // SkipLeaf skips every value, so can be slow for big arrays.
  414. // Requires std::tuple_size<TValue>, TValue::operator[] and possibly TValue::value_type.
  415. template <class TValue, class TElementPacker = TPacker<typename TValue::value_type>>
  416. class TArrayPacker {
  417. public:
  418. using TElemPacker = TElementPacker;
  419. enum {
  420. Size = std::tuple_size<TValue>::value
  421. };
  422. void UnpackLeaf(const char* p, TValue& t) const {
  423. const char* buf = p;
  424. for (size_t i = 0; i < Size; ++i) {
  425. TElemPacker().UnpackLeaf(buf, t[i]);
  426. buf += TElemPacker().SkipLeaf(buf);
  427. }
  428. }
  429. void PackLeaf(char* buffer, const TValue& data, size_t computedSize) const {
  430. size_t remainingSize = computedSize;
  431. char* pos = buffer;
  432. for (size_t i = 0; i < Size; ++i) {
  433. const size_t elemSize = TElemPacker().MeasureLeaf(data[i]);
  434. TElemPacker().PackLeaf(pos, data[i], Min(elemSize, remainingSize));
  435. pos += elemSize;
  436. remainingSize -= elemSize;
  437. }
  438. }
  439. size_t MeasureLeaf(const TValue& data) const {
  440. size_t result = 0;
  441. for (size_t i = 0; i < Size; ++i) {
  442. result += TElemPacker().MeasureLeaf(data[i]);
  443. }
  444. return result;
  445. }
  446. size_t SkipLeaf(const char* p) const // this function better be fast because it is very frequently used
  447. {
  448. const char* buf = p;
  449. for (size_t i = 0; i < Size; ++i) {
  450. buf += TElemPacker().SkipLeaf(buf);
  451. }
  452. return buf - p;
  453. }
  454. };
  455. //------------------------------------
  456. // TPacker --- the generic packer.
  457. template <class T, bool IsIntegral>
  458. class TPackerImpl;
  459. template <class T>
  460. class TPackerImpl<T, true>: public TIntegralPacker<T> {
  461. };
  462. // No implementation for non-integral types.
  463. template <class T>
  464. class TPacker: public TPackerImpl<T, std::is_integral<T>::value> {
  465. };
  466. template <>
  467. class TPacker<float>: public TAsIsPacker<float> {
  468. };
  469. template <>
  470. class TPacker<double>: public TAsIsPacker<double> {
  471. };
  472. template <>
  473. class TPacker<TString>: public TStringPacker<TString> {
  474. };
  475. template <>
  476. class TPacker<TUtf16String>: public TStringPacker<TUtf16String> {
  477. };
  478. template <>
  479. class TPacker<TStringBuf>: public TStringPacker<TStringBuf> {
  480. };
  481. template <>
  482. class TPacker<TWtringBuf>: public TStringPacker<TWtringBuf> {
  483. };
  484. template <class T>
  485. class TPacker<std::vector<T>>: public TContainerPacker<std::vector<T>> {
  486. };
  487. template <class T>
  488. class TPacker<TVector<T>>: public TContainerPacker<TVector<T>> {
  489. };
  490. template <class T>
  491. class TPacker<std::list<T>>: public TContainerPacker<std::list<T>> {
  492. };
  493. template <class T>
  494. class TPacker<TList<T>>: public TContainerPacker<TList<T>> {
  495. };
  496. template <class T>
  497. class TPacker<std::set<T>>: public TContainerPacker<std::set<T>> {
  498. };
  499. template <class T>
  500. class TPacker<TSet<T>>: public TContainerPacker<TSet<T>> {
  501. };
  502. template <class T1, class T2>
  503. class TPacker<std::pair<T1, T2>>: public TPairPacker<T1, T2> {
  504. };
  505. template <class T, size_t N>
  506. class TPacker<std::array<T, N>>: public TArrayPacker<std::array<T, N>> {
  507. };
  508. }