make_fast_layout.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. #include "make_fast_layout.h"
  2. #include "node.h"
  3. #include "writeable_node.h"
  4. #include "write_trie_backwards.h"
  5. #include "comptrie_impl.h"
  6. #include <util/generic/hash.h>
  7. #include <util/generic/utility.h>
  8. // Lay the trie in memory in such a way that there are less cache misses when jumping from root to leaf.
  9. // The trie becomes about 2% larger, but the access became about 25% faster in our experiments.
  10. // Can be called on minimized and non-minimized tries, in the first case in requires half a trie more memory.
  11. // Calling it the second time on the same trie does nothing.
  12. //
  13. // The algorithm is based on van Emde Boas layout as described in the yandex data school lectures on external memory algoritms
  14. // by Maxim Babenko and Ivan Puzyrevsky. The difference is that when we cut the tree into levels
  15. // two nodes connected by a forward link are put into the same level (because they usually lie near each other in the original tree).
  16. // The original paper (describing the layout in Section 2.1) is:
  17. // Michael A. Bender, Erik D. Demaine, Martin Farach-Colton. Cache-Oblivious B-Trees // SIAM Journal on Computing, volume 35, number 2, 2005, pages 341–358.
  18. // Available on the web: http://erikdemaine.org/papers/CacheObliviousBTrees_SICOMP/
  19. // Or: Michael A. Bender, Erik D. Demaine, and Martin Farach-Colton. Cache-Oblivious B-Trees // Proceedings of the 41st Annual Symposium
  20. // on Foundations of Computer Science (FOCS 2000), Redondo Beach, California, November 12–14, 2000, pages 399–409.
  21. // Available on the web: http://erikdemaine.org/papers/FOCS2000b/
  22. // (there is not much difference between these papers, actually).
  23. //
  24. namespace NCompactTrie {
  25. static size_t FindSupportingPowerOf2(size_t n) {
  26. size_t result = 1ull << (8 * sizeof(size_t) - 1);
  27. while (result > n) {
  28. result >>= 1;
  29. }
  30. return result;
  31. }
  32. namespace {
  33. class TTrieNodeSet {
  34. public:
  35. TTrieNodeSet() = default;
  36. explicit TTrieNodeSet(const TOpaqueTrie& trie)
  37. : Body(trie.Length / (8 * MinNodeSize) + 1, 0)
  38. {
  39. }
  40. bool Has(size_t offset) const {
  41. const size_t reducedOffset = ReducedOffset(offset);
  42. return OffsetCell(reducedOffset) & OffsetMask(reducedOffset);
  43. }
  44. void Add(size_t offset) {
  45. const size_t reducedOffset = ReducedOffset(offset);
  46. OffsetCell(reducedOffset) |= OffsetMask(reducedOffset);
  47. }
  48. void Remove(size_t offset) {
  49. const size_t reducedOffset = ReducedOffset(offset);
  50. OffsetCell(reducedOffset) &= ~OffsetMask(reducedOffset);
  51. }
  52. void Swap(TTrieNodeSet& other) {
  53. Body.swap(other.Body);
  54. }
  55. private:
  56. static const size_t MinNodeSize = 2;
  57. TVector<ui8> Body;
  58. static size_t ReducedOffset(size_t offset) {
  59. return offset / MinNodeSize;
  60. }
  61. static ui8 OffsetMask(size_t reducedOffset) {
  62. return 1 << (reducedOffset % 8);
  63. }
  64. ui8& OffsetCell(size_t reducedOffset) {
  65. return Body.at(reducedOffset / 8);
  66. }
  67. const ui8& OffsetCell(size_t reducedOffset) const {
  68. return Body.at(reducedOffset / 8);
  69. }
  70. };
  71. //---------------------------------------------------------------------
  72. class TTrieNodeCounts {
  73. public:
  74. TTrieNodeCounts() = default;
  75. explicit TTrieNodeCounts(const TOpaqueTrie& trie)
  76. : Body(trie.Length / MinNodeSize, 0)
  77. , IsTree(false)
  78. {
  79. }
  80. size_t Get(size_t offset) const {
  81. return IsTree ? 1 : Body.at(offset / MinNodeSize);
  82. }
  83. void Inc(size_t offset) {
  84. if (IsTree) {
  85. return;
  86. }
  87. ui8& count = Body.at(offset / MinNodeSize);
  88. if (count != MaxCount) {
  89. ++count;
  90. }
  91. }
  92. size_t Dec(size_t offset) {
  93. if (IsTree) {
  94. return 0;
  95. }
  96. ui8& count = Body.at(offset / MinNodeSize);
  97. Y_ASSERT(count > 0);
  98. if (count != MaxCount) {
  99. --count;
  100. }
  101. return count;
  102. }
  103. void Swap(TTrieNodeCounts& other) {
  104. Body.swap(other.Body);
  105. ::DoSwap(IsTree, other.IsTree);
  106. }
  107. void SetTreeMode() {
  108. IsTree = true;
  109. Body = TVector<ui8>();
  110. }
  111. private:
  112. static const size_t MinNodeSize = 2;
  113. static const ui8 MaxCount = 255;
  114. TVector<ui8> Body;
  115. bool IsTree = false;
  116. };
  117. //----------------------------------------------------------
  118. class TOffsetIndex {
  119. public:
  120. // In all methods:
  121. // Key --- offset from the beginning of the old trie.
  122. // Value --- offset from the end of the new trie.
  123. explicit TOffsetIndex(TTrieNodeCounts& counts) {
  124. ParentCounts.Swap(counts);
  125. }
  126. void Add(size_t key, size_t value) {
  127. Data[key] = value;
  128. }
  129. size_t Size() const {
  130. return Data.size();
  131. }
  132. size_t Get(size_t key) {
  133. auto pos = Data.find(key);
  134. if (pos == Data.end()) {
  135. ythrow yexception() << "Bad node walking order: trying to get node offset too early or too many times!";
  136. }
  137. size_t result = pos->second;
  138. if (ParentCounts.Dec(key) == 0) {
  139. // We don't need this offset any more.
  140. Data.erase(pos);
  141. }
  142. return result;
  143. }
  144. private:
  145. TTrieNodeCounts ParentCounts;
  146. THashMap<size_t, size_t> Data;
  147. };
  148. //---------------------------------------------------------------------------------------
  149. class TTrieMeasurer {
  150. public:
  151. TTrieMeasurer(const TOpaqueTrie& trie, bool verbose);
  152. void Measure();
  153. size_t GetDepth() const {
  154. return Depth;
  155. }
  156. size_t GetNodeCount() const {
  157. return NodeCount;
  158. }
  159. size_t GetUnminimizedNodeCount() const {
  160. return UnminimizedNodeCount;
  161. }
  162. bool IsTree() const {
  163. return NodeCount == UnminimizedNodeCount;
  164. }
  165. TTrieNodeCounts& GetParentCounts() {
  166. return ParentCounts;
  167. }
  168. const TOpaqueTrie& GetTrie() const {
  169. return Trie;
  170. }
  171. private:
  172. const TOpaqueTrie& Trie;
  173. size_t Depth;
  174. TTrieNodeCounts ParentCounts;
  175. size_t NodeCount;
  176. size_t UnminimizedNodeCount;
  177. const bool Verbose;
  178. // returns depth, increments NodeCount.
  179. size_t MeasureSubtrie(size_t rootOffset, bool isNewPath);
  180. };
  181. TTrieMeasurer::TTrieMeasurer(const TOpaqueTrie& trie, bool verbose)
  182. : Trie(trie)
  183. , Depth(0)
  184. , ParentCounts(trie)
  185. , NodeCount(0)
  186. , UnminimizedNodeCount(0)
  187. , Verbose(verbose)
  188. {
  189. Y_ASSERT(Trie.Data);
  190. }
  191. void TTrieMeasurer::Measure() {
  192. if (Verbose) {
  193. Cerr << "Measuring the trie..." << Endl;
  194. }
  195. NodeCount = 0;
  196. UnminimizedNodeCount = 0;
  197. Depth = MeasureSubtrie(0, true);
  198. if (IsTree()) {
  199. ParentCounts.SetTreeMode();
  200. }
  201. if (Verbose) {
  202. Cerr << "Unminimized node count: " << UnminimizedNodeCount << Endl;
  203. Cerr << "Trie depth: " << Depth << Endl;
  204. Cerr << "Node count: " << NodeCount << Endl;
  205. }
  206. }
  207. // A chain of nodes linked by forward links
  208. // is considered one node with many left and right children
  209. // for depth measuring here and in
  210. // TVanEmdeBoasReverseNodeEnumerator::FindDescendants.
  211. size_t TTrieMeasurer::MeasureSubtrie(size_t rootOffset, bool isNewPath) {
  212. Y_ASSERT(rootOffset < Trie.Length);
  213. TNode node(Trie.Data, rootOffset, Trie.SkipFunction);
  214. size_t depth = 0;
  215. for (;;) {
  216. ++UnminimizedNodeCount;
  217. if (Verbose) {
  218. ShowProgress(UnminimizedNodeCount);
  219. }
  220. if (isNewPath) {
  221. if (ParentCounts.Get(node.GetOffset()) > 0) {
  222. isNewPath = false;
  223. } else {
  224. ++NodeCount;
  225. }
  226. ParentCounts.Inc(node.GetOffset());
  227. }
  228. if (node.GetLeftOffset()) {
  229. depth = Max(depth, 1 + MeasureSubtrie(node.GetLeftOffset(), isNewPath));
  230. }
  231. if (node.GetRightOffset()) {
  232. depth = Max(depth, 1 + MeasureSubtrie(node.GetRightOffset(), isNewPath));
  233. }
  234. if (node.GetForwardOffset()) {
  235. node = TNode(Trie.Data, node.GetForwardOffset(), Trie.SkipFunction);
  236. } else {
  237. break;
  238. }
  239. }
  240. return depth;
  241. }
  242. //--------------------------------------------------------------------------------------
  243. using TLevelNodes = TVector<size_t>;
  244. struct TLevel {
  245. size_t Depth;
  246. TLevelNodes Nodes;
  247. explicit TLevel(size_t depth)
  248. : Depth(depth)
  249. {
  250. }
  251. };
  252. //----------------------------------------------------------------------------------------
  253. class TVanEmdeBoasReverseNodeEnumerator: public TReverseNodeEnumerator {
  254. public:
  255. TVanEmdeBoasReverseNodeEnumerator(TTrieMeasurer& measurer, bool verbose)
  256. : Fresh(true)
  257. , Trie(measurer.GetTrie())
  258. , Depth(measurer.GetDepth())
  259. , MaxIndexSize(0)
  260. , BackIndex(measurer.GetParentCounts())
  261. , ProcessedNodes(measurer.GetTrie())
  262. , Verbose(verbose)
  263. {
  264. }
  265. bool Move() override {
  266. if (!Fresh) {
  267. CentralWord.pop_back();
  268. }
  269. if (CentralWord.empty()) {
  270. return MoveCentralWordStart();
  271. }
  272. return true;
  273. }
  274. const TNode& Get() const {
  275. return CentralWord.back();
  276. }
  277. size_t GetLeafLength() const override {
  278. return Get().GetLeafLength();
  279. }
  280. // Returns recalculated offset from the end of the current node.
  281. size_t PrepareOffset(size_t absoffset, size_t resultLength) {
  282. if (!absoffset)
  283. return NPOS;
  284. return resultLength - BackIndex.Get(absoffset);
  285. }
  286. size_t RecreateNode(char* buffer, size_t resultLength) override {
  287. TWriteableNode newNode(Get(), Trie.Data);
  288. newNode.ForwardOffset = PrepareOffset(Get().GetForwardOffset(), resultLength);
  289. newNode.LeftOffset = PrepareOffset(Get().GetLeftOffset(), resultLength);
  290. newNode.RightOffset = PrepareOffset(Get().GetRightOffset(), resultLength);
  291. const size_t len = newNode.Pack(buffer);
  292. ProcessedNodes.Add(Get().GetOffset());
  293. BackIndex.Add(Get().GetOffset(), resultLength + len);
  294. MaxIndexSize = Max(MaxIndexSize, BackIndex.Size());
  295. return len;
  296. }
  297. private:
  298. bool Fresh;
  299. TOpaqueTrie Trie;
  300. size_t Depth;
  301. size_t MaxIndexSize;
  302. TVector<TLevel> Trace;
  303. TOffsetIndex BackIndex;
  304. TVector<TNode> CentralWord;
  305. TTrieNodeSet ProcessedNodes;
  306. const bool Verbose;
  307. private:
  308. bool IsVisited(size_t offset) const {
  309. return ProcessedNodes.Has(offset);
  310. }
  311. void AddCascade(size_t step) {
  312. Y_ASSERT(!(step & (step - 1))); // Should be a power of 2.
  313. while (step > 0) {
  314. size_t root = Trace.back().Nodes.back();
  315. TLevel level(Trace.back().Depth + step);
  316. Trace.push_back(level);
  317. size_t actualStep = FindSupportingPowerOf2(FindDescendants(root, step, Trace.back().Nodes));
  318. if (actualStep != step) {
  319. // Retry with a smaller step.
  320. Y_ASSERT(actualStep < step);
  321. step = actualStep;
  322. Trace.pop_back();
  323. } else {
  324. step /= 2;
  325. }
  326. }
  327. }
  328. void FillCentralWord() {
  329. CentralWord.clear();
  330. CentralWord.push_back(TNode(Trie.Data, Trace.back().Nodes.back(), Trie.SkipFunction));
  331. // Do not check for epsilon links, as the traversal order now is different.
  332. while (CentralWord.back().GetForwardOffset() && !IsVisited(CentralWord.back().GetForwardOffset())) {
  333. CentralWord.push_back(TNode(Trie.Data, CentralWord.back().GetForwardOffset(), Trie.SkipFunction));
  334. }
  335. }
  336. bool MoveCentralWordStart() {
  337. do {
  338. if (Fresh) {
  339. TLevel root(0);
  340. Trace.push_back(root);
  341. Trace.back().Nodes.push_back(0);
  342. const size_t sectionDepth = FindSupportingPowerOf2(Depth);
  343. AddCascade(sectionDepth);
  344. Fresh = false;
  345. } else {
  346. Trace.back().Nodes.pop_back();
  347. if (Trace.back().Nodes.empty() && Trace.size() == 1) {
  348. if (Verbose) {
  349. Cerr << "Max index size: " << MaxIndexSize << Endl;
  350. Cerr << "Current index size: " << BackIndex.Size() << Endl;
  351. }
  352. // We just popped the root.
  353. return false;
  354. }
  355. size_t lastStep = Trace.back().Depth - Trace[Trace.size() - 2].Depth;
  356. if (Trace.back().Nodes.empty()) {
  357. Trace.pop_back();
  358. }
  359. AddCascade(lastStep / 2);
  360. }
  361. } while (IsVisited(Trace.back().Nodes.back()));
  362. FillCentralWord();
  363. return true;
  364. }
  365. // Returns the maximal depth it has reached while searching.
  366. // This is a method because it needs OffsetIndex to skip processed nodes.
  367. size_t FindDescendants(size_t rootOffset, size_t depth, TLevelNodes& result) const {
  368. if (depth == 0) {
  369. result.push_back(rootOffset);
  370. return 0;
  371. }
  372. size_t actualDepth = 0;
  373. TNode node(Trie.Data, rootOffset, Trie.SkipFunction);
  374. for (;;) {
  375. if (node.GetLeftOffset() && !IsVisited(node.GetLeftOffset())) {
  376. actualDepth = Max(actualDepth,
  377. 1 + FindDescendants(node.GetLeftOffset(), depth - 1, result));
  378. }
  379. if (node.GetRightOffset() && !IsVisited(node.GetRightOffset())) {
  380. actualDepth = Max(actualDepth,
  381. 1 + FindDescendants(node.GetRightOffset(), depth - 1, result));
  382. }
  383. if (node.GetForwardOffset() && !IsVisited(node.GetForwardOffset())) {
  384. node = TNode(Trie.Data, node.GetForwardOffset(), Trie.SkipFunction);
  385. } else {
  386. break;
  387. }
  388. }
  389. return actualDepth;
  390. }
  391. };
  392. } // Anonymous namespace.
  393. //-----------------------------------------------------------------------------------
  394. size_t RawCompactTrieFastLayoutImpl(IOutputStream& os, const TOpaqueTrie& trie, bool verbose) {
  395. if (!trie.Data || !trie.Length) {
  396. return 0;
  397. }
  398. TTrieMeasurer mes(trie, verbose);
  399. mes.Measure();
  400. TVanEmdeBoasReverseNodeEnumerator enumerator(mes, verbose);
  401. return WriteTrieBackwards(os, enumerator, verbose);
  402. }
  403. }