str_map.h 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. #pragma once
  2. #include <util/memory/segmented_string_pool.h>
  3. #include <util/generic/map.h>
  4. #include <util/generic/hash.h>
  5. #include <util/generic/buffer.h>
  6. #include <util/str_stl.h> // less<> and equal_to<> for const char*
  7. #include <utility>
  8. #include <util/generic/noncopyable.h>
  9. template <class T, class HashFcn = THash<const char*>, class EqualTo = TEqualTo<const char*>, class Alloc = std::allocator<const char*>>
  10. class string_hash;
  11. template <class T, class HashFcn = THash<const char*>, class EqualTo = TEqualTo<const char*>>
  12. class segmented_string_hash;
  13. template <class Map>
  14. inline std::pair<typename Map::iterator, bool>
  15. pool_insert(Map* m, const char* key, const typename Map::mapped_type& data, TBuffer& pool) {
  16. std::pair<typename Map::iterator, bool> ins = m->insert(typename Map::value_type(key, data));
  17. if (ins.second) { // new?
  18. size_t buflen = strlen(key) + 1; // strlen???
  19. const char* old_pool = pool.Begin();
  20. pool.Append(key, buflen);
  21. if (pool.Begin() != old_pool) // repoint?
  22. for (typename Map::iterator it = m->begin(); it != m->end(); ++it)
  23. if ((*it).first != key)
  24. const_cast<const char*&>((*it).first) += (pool.Begin() - old_pool);
  25. const_cast<const char*&>((*ins.first).first) = pool.End() - buflen;
  26. }
  27. return ins;
  28. }
  29. #define HASH_SIZE_DEFAULT 100
  30. #define AVERAGEWORD_BUF 10
  31. template <class T, class HashFcn, class EqualTo, class Alloc>
  32. class string_hash: public THashMap<const char*, T, HashFcn, EqualTo, Alloc> {
  33. protected:
  34. TBuffer pool;
  35. public:
  36. using yh = THashMap<const char*, T, HashFcn, EqualTo, Alloc>;
  37. using iterator = typename yh::iterator;
  38. using const_iterator = typename yh::const_iterator;
  39. using mapped_type = typename yh::mapped_type;
  40. using size_type = typename yh::size_type;
  41. using pool_size_type = typename yh::size_type;
  42. string_hash() {
  43. pool.Reserve(HASH_SIZE_DEFAULT * AVERAGEWORD_BUF); // reserve here
  44. }
  45. string_hash(size_type hash_size, pool_size_type pool_size)
  46. : THashMap<const char*, T, HashFcn, EqualTo, Alloc>(hash_size)
  47. {
  48. pool.Reserve(pool_size); // reserve here
  49. }
  50. std::pair<iterator, bool> insert_copy(const char* key, const mapped_type& data) {
  51. return ::pool_insert(this, key, data, pool);
  52. }
  53. void clear_hash() {
  54. yh::clear();
  55. pool.Clear();
  56. }
  57. pool_size_type pool_size() const {
  58. return pool.Size();
  59. }
  60. string_hash(const string_hash& sh)
  61. : THashMap<const char*, T, HashFcn, EqualTo, Alloc>()
  62. {
  63. for (const_iterator i = sh.begin(); i != sh.end(); ++i)
  64. insert_copy((*i).first, (*i).second);
  65. }
  66. /* May be faster?
  67. string_hash(const string_hash& sh)
  68. : THashMap<const char *, T, HashFcn, EqualTo>(sh)
  69. {
  70. pool = sh.pool;
  71. size_t delta = pool.begin() - sh.pool.begin();
  72. for (iterator i = begin(); i != end(); ++i)
  73. (const char*&)(*i).first += delta;
  74. }
  75. */
  76. string_hash& operator=(const string_hash& sh) {
  77. if (&sh != this) {
  78. clear_hash();
  79. for (const_iterator i = sh.begin(); i != sh.end(); ++i)
  80. insert_copy((*i).first, (*i).second);
  81. }
  82. return *this;
  83. }
  84. mapped_type& operator[](const char* key) {
  85. iterator I = yh::find(key);
  86. if (I == yh::end())
  87. I = insert_copy(key, mapped_type()).first;
  88. return (*I).second;
  89. }
  90. };
  91. template <class C, class T, class HashFcn, class EqualTo>
  92. class THashWithSegmentedPoolForKeys: protected THashMap<const C*, T, HashFcn, EqualTo>, TNonCopyable {
  93. protected:
  94. segmented_pool<C> pool;
  95. public:
  96. using yh = THashMap<const C*, T, HashFcn, EqualTo>;
  97. using iterator = typename yh::iterator;
  98. using const_iterator = typename yh::const_iterator;
  99. using mapped_type = typename yh::mapped_type;
  100. using size_type = typename yh::size_type;
  101. using key_type = typename yh::key_type;
  102. using value_type = typename yh::value_type;
  103. THashWithSegmentedPoolForKeys(size_type hash_size = HASH_SIZE_DEFAULT, size_t segsize = HASH_SIZE_DEFAULT * AVERAGEWORD_BUF, bool afs = false)
  104. : yh(hash_size)
  105. , pool(segsize)
  106. {
  107. if (afs)
  108. pool.alloc_first_seg();
  109. }
  110. std::pair<iterator, bool> insert_copy(const C* key, size_t keylen, const mapped_type& data) {
  111. std::pair<iterator, bool> ins = this->insert(value_type(key, data));
  112. if (ins.second) // new?
  113. (const C*&)(*ins.first).first = pool.append(key, keylen);
  114. return ins;
  115. }
  116. void clear_hash() {
  117. yh::clear();
  118. pool.restart();
  119. }
  120. size_t pool_size() const {
  121. return pool.size();
  122. }
  123. size_t size() const {
  124. return yh::size();
  125. }
  126. bool empty() const {
  127. return yh::empty();
  128. }
  129. iterator begin() {
  130. return yh::begin();
  131. }
  132. iterator end() {
  133. return yh::end();
  134. }
  135. const_iterator begin() const {
  136. return yh::begin();
  137. }
  138. const_iterator end() const {
  139. return yh::end();
  140. }
  141. iterator find(const key_type& key) {
  142. return yh::find(key);
  143. }
  144. const_iterator find(const key_type& key) const {
  145. return yh::find(key);
  146. }
  147. const yh& get_THashMap() const {
  148. return static_cast<const yh&>(*this);
  149. }
  150. };
  151. template <class T, class HashFcn, class EqualTo>
  152. class segmented_string_hash: public THashWithSegmentedPoolForKeys<char, T, HashFcn, EqualTo> {
  153. public:
  154. using Base = THashWithSegmentedPoolForKeys<char, T, HashFcn, EqualTo>;
  155. using iterator = typename Base::iterator;
  156. using const_iterator = typename Base::const_iterator;
  157. using mapped_type = typename Base::mapped_type;
  158. using size_type = typename Base::size_type;
  159. using key_type = typename Base::key_type;
  160. using value_type = typename Base::value_type;
  161. public:
  162. segmented_string_hash(size_type hash_size = HASH_SIZE_DEFAULT, size_t segsize = HASH_SIZE_DEFAULT * AVERAGEWORD_BUF, bool afs = false)
  163. : Base(hash_size, segsize, afs)
  164. {
  165. }
  166. std::pair<iterator, bool> insert_copy(const char* key, const mapped_type& data) {
  167. return Base::insert_copy(key, strlen(key) + 1, data);
  168. }
  169. mapped_type& operator[](const char* key) {
  170. iterator I = Base::find(key);
  171. if (I == Base::end())
  172. I = insert_copy(key, mapped_type()).first;
  173. return (*I).second;
  174. }
  175. };