write_trie_backwards.cpp 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. #include "write_trie_backwards.h"
  2. #include "comptrie_impl.h"
  3. #include "leaf_skipper.h"
  4. #include <util/generic/buffer.h>
  5. #include <util/generic/vector.h>
  6. namespace NCompactTrie {
  7. size_t WriteTrieBackwards(IOutputStream& os, TReverseNodeEnumerator& enumerator, bool verbose) {
  8. if (verbose) {
  9. Cerr << "Writing down the trie..." << Endl;
  10. }
  11. // Rewrite everything from the back, removing unused pieces.
  12. const size_t chunksize = 0x10000;
  13. TVector<char*> resultData;
  14. resultData.push_back(new char[chunksize]);
  15. char* chunkend = resultData.back() + chunksize;
  16. size_t resultLength = 0;
  17. size_t chunkLength = 0;
  18. size_t counter = 0;
  19. TBuffer bufferHolder;
  20. while (enumerator.Move()) {
  21. if (verbose)
  22. ShowProgress(++counter);
  23. size_t bufferLength = 64 + enumerator.GetLeafLength(); // never know how big leaf data can be
  24. bufferHolder.Clear();
  25. bufferHolder.Resize(bufferLength);
  26. char* buffer = bufferHolder.Data();
  27. size_t nodelength = enumerator.RecreateNode(buffer, resultLength);
  28. Y_ASSERT(nodelength <= bufferLength);
  29. resultLength += nodelength;
  30. if (chunkLength + nodelength <= chunksize) {
  31. chunkLength += nodelength;
  32. memcpy(chunkend - chunkLength, buffer, nodelength);
  33. } else { // allocate a new chunk
  34. memcpy(chunkend - chunksize, buffer + nodelength - (chunksize - chunkLength), chunksize - chunkLength);
  35. chunkLength = chunkLength + nodelength - chunksize;
  36. resultData.push_back(new char[chunksize]);
  37. chunkend = resultData.back() + chunksize;
  38. while (chunkLength > chunksize) { // allocate a new chunks
  39. chunkLength -= chunksize;
  40. memcpy(chunkend - chunksize, buffer + chunkLength, chunksize);
  41. resultData.push_back(new char[chunksize]);
  42. chunkend = resultData.back() + chunksize;
  43. }
  44. memcpy(chunkend - chunkLength, buffer, chunkLength);
  45. }
  46. }
  47. if (verbose)
  48. Cerr << counter << Endl;
  49. // Write the whole thing down
  50. while (!resultData.empty()) {
  51. char* chunk = resultData.back();
  52. os.Write(chunk + chunksize - chunkLength, chunkLength);
  53. chunkLength = chunksize;
  54. delete[] chunk;
  55. resultData.pop_back();
  56. }
  57. return resultLength;
  58. }
  59. size_t WriteTrieBackwardsNoAlloc(IOutputStream& os, TReverseNodeEnumerator& enumerator, TOpaqueTrie& trie, EMinimizeMode mode) {
  60. char* data = const_cast<char*>(trie.Data);
  61. char* end = data + trie.Length;
  62. char* pos = end;
  63. TVector<char> buf(64);
  64. while (enumerator.Move()) {
  65. size_t nodeLength = enumerator.RecreateNode(nullptr, end - pos);
  66. if (nodeLength > buf.size())
  67. buf.resize(nodeLength);
  68. size_t realLength = enumerator.RecreateNode(buf.data(), end - pos);
  69. Y_ASSERT(realLength == nodeLength);
  70. pos -= nodeLength;
  71. memcpy(pos, buf.data(), nodeLength);
  72. }
  73. switch (mode) {
  74. case MM_NOALLOC:
  75. os.Write(pos, end - pos);
  76. break;
  77. case MM_INPLACE:
  78. memmove(data, pos, end - pos);
  79. break;
  80. default:
  81. Y_ABORT_UNLESS(false);
  82. }
  83. return end - pos;
  84. }
  85. }