simple_hashtable.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #ifndef NETDATA_SIMPLE_HASHTABLE_H
  3. #define NETDATA_SIMPLE_HASHTABLE_H
  4. #ifndef XXH_INLINE_ALL
  5. #define XXH_INLINE_ALL
  6. #endif
  7. #include "xxhash.h"
  8. typedef uint64_t SIMPLE_HASHTABLE_HASH;
  9. #define SIMPLE_HASHTABLE_HASH_SECOND_HASH_SHIFTS 32
  10. #ifndef SIMPLE_HASHTABLE_NAME
  11. #define SIMPLE_HASHTABLE_NAME
  12. #endif
  13. #ifndef SIMPLE_HASHTABLE_VALUE_TYPE
  14. #define SIMPLE_HASHTABLE_VALUE_TYPE void
  15. #endif
  16. // First layer of macro for token concatenation
  17. #define CONCAT_INTERNAL(a, b) a ## b
  18. // Second layer of macro, which ensures proper expansion
  19. #define CONCAT(a, b) CONCAT_INTERNAL(a, b)
  20. // define names for all structures and structures
  21. #define simple_hashtable_init_named CONCAT(simple_hashtable_init, SIMPLE_HASHTABLE_NAME)
  22. #define simple_hashtable_destroy_named CONCAT(simple_hashtable_destroy, SIMPLE_HASHTABLE_NAME)
  23. #define simple_hashtable_slot_named CONCAT(simple_hashtable_slot, SIMPLE_HASHTABLE_NAME)
  24. #define SIMPLE_HASHTABLE_SLOT_NAMED CONCAT(SIMPLE_HASHTABLE_SLOT, SIMPLE_HASHTABLE_NAME)
  25. #define simple_hashtable_named CONCAT(simple_hashtable, SIMPLE_HASHTABLE_NAME)
  26. #define SIMPLE_HASHTABLE_NAMED CONCAT(SIMPLE_HASHTABLE, SIMPLE_HASHTABLE_NAME)
  27. #define simple_hashtable_resize_named CONCAT(simple_hashtable_resize, SIMPLE_HASHTABLE_NAME)
  28. #define simple_hashtable_get_slot_named CONCAT(simple_hashtable_get_slot, SIMPLE_HASHTABLE_NAME)
  29. #define simple_hashtable_del_slot_named CONCAT(simple_hashtable_del_slot, SIMPLE_HASHTABLE_NAME)
  30. #define simple_hashtable_set_slot_named CONCAT(simple_hashtable_set_slot, SIMPLE_HASHTABLE_NAME)
  31. #define simple_hashtable_first_read_only_named CONCAT(simple_hashtable_first_read_only, SIMPLE_HASHTABLE_NAME)
  32. #define simple_hashtable_next_read_only_named CONCAT(simple_hashtable_next_read_only, SIMPLE_HASHTABLE_NAME)
  33. #define simple_hashtable_sorted_binary_search_named CONCAT(simple_hashtable_sorted_binary_search, SIMPLE_HASHTABLE_NAME)
  34. #define simple_hashtable_add_value_sorted_named CONCAT(simple_hashtable_add_value_sorted, SIMPLE_HASHTABLE_NAME)
  35. #define simple_hashtable_del_value_sorted_named CONCAT(simple_hashtable_del_value_sorted, SIMPLE_HASHTABLE_NAME)
  36. #define simple_hashtable_replace_value_sorted_named CONCAT(simple_hashtable_replace_value_sorted, SIMPLE_HASHTABLE_NAME)
  37. #define simple_hashtable_sorted_array_first_read_only_named CONCAT(simple_hashtable_sorted_array_first_read_only, SIMPLE_HASHTABLE_NAME)
  38. #define simple_hashtable_sorted_array_next_read_only_named CONCAT(simple_hashtable_sorted_array_next_read_only, SIMPLE_HASHTABLE_NAME)
  39. typedef struct simple_hashtable_slot_named {
  40. SIMPLE_HASHTABLE_HASH hash;
  41. SIMPLE_HASHTABLE_VALUE_TYPE *data;
  42. } SIMPLE_HASHTABLE_SLOT_NAMED;
  43. typedef struct simple_hashtable_named {
  44. size_t resizes;
  45. size_t searches;
  46. size_t collisions;
  47. size_t deletions;
  48. size_t deleted;
  49. size_t used;
  50. size_t size;
  51. SIMPLE_HASHTABLE_SLOT_NAMED *hashtable;
  52. #ifdef SIMPLE_HASHTABLE_SORT_FUNCTION
  53. struct {
  54. size_t used;
  55. size_t size;
  56. SIMPLE_HASHTABLE_VALUE_TYPE **array;
  57. } sorted;
  58. #endif
  59. } SIMPLE_HASHTABLE_NAMED;
  60. #ifdef SIMPLE_HASHTABLE_SORT_FUNCTION
  61. static inline size_t simple_hashtable_sorted_binary_search_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_VALUE_TYPE *value) {
  62. size_t left = 0, right = ht->sorted.used;
  63. while (left < right) {
  64. size_t mid = left + (right - left) / 2;
  65. if (SIMPLE_HASHTABLE_SORT_FUNCTION(ht->sorted.array[mid], value) < 0)
  66. left = mid + 1;
  67. else
  68. right = mid;
  69. }
  70. return left;
  71. }
  72. static inline void simple_hashtable_add_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_VALUE_TYPE *value) {
  73. size_t index = simple_hashtable_sorted_binary_search_named(ht, value);
  74. // Ensure there's enough space in the sorted array
  75. if (ht->sorted.used >= ht->sorted.size) {
  76. size_t size = ht->sorted.size ? ht->sorted.size * 2 : 64;
  77. SIMPLE_HASHTABLE_VALUE_TYPE **array = mallocz(size * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *));
  78. if(ht->sorted.array) {
  79. memcpy(array, ht->sorted.array, ht->sorted.size * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *));
  80. freez(ht->sorted.array);
  81. }
  82. ht->sorted.array = array;
  83. ht->sorted.size = size;
  84. }
  85. // Use memmove to shift elements and create space for the new element
  86. memmove(&ht->sorted.array[index + 1], &ht->sorted.array[index], (ht->sorted.used - index) * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *));
  87. ht->sorted.array[index] = value;
  88. ht->sorted.used++;
  89. }
  90. static inline void simple_hashtable_del_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_VALUE_TYPE *value) {
  91. size_t index = simple_hashtable_sorted_binary_search_named(ht, value);
  92. // Check if the value exists at the found index
  93. assert(index < ht->sorted.used && ht->sorted.array[index] == value);
  94. // Use memmove to shift elements and close the gap
  95. memmove(&ht->sorted.array[index], &ht->sorted.array[index + 1], (ht->sorted.used - index - 1) * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *));
  96. ht->sorted.used--;
  97. }
  98. static inline void simple_hashtable_replace_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_VALUE_TYPE *old_value, SIMPLE_HASHTABLE_VALUE_TYPE *new_value) {
  99. if(new_value == old_value)
  100. return;
  101. size_t old_value_index = simple_hashtable_sorted_binary_search_named(ht, old_value);
  102. assert(old_value_index < ht->sorted.used && ht->sorted.array[old_value_index] == old_value);
  103. int r = SIMPLE_HASHTABLE_SORT_FUNCTION(old_value, new_value);
  104. if(r == 0) {
  105. // Same value, so use the same index
  106. ht->sorted.array[old_value_index] = new_value;
  107. return;
  108. }
  109. size_t new_value_index = simple_hashtable_sorted_binary_search_named(ht, new_value);
  110. if(old_value_index == new_value_index) {
  111. // Not the same value, but still at the same index
  112. ht->sorted.array[old_value_index] = new_value;
  113. return;
  114. }
  115. else if (old_value_index < new_value_index) {
  116. // The old value is before the new value
  117. size_t shift_start = old_value_index + 1;
  118. size_t shift_end = new_value_index - 1;
  119. size_t shift_size = shift_end - old_value_index;
  120. memmove(&ht->sorted.array[old_value_index], &ht->sorted.array[shift_start], shift_size * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *));
  121. ht->sorted.array[shift_end] = new_value;
  122. }
  123. else {
  124. // The old value is after the new value
  125. size_t shift_start = new_value_index;
  126. size_t shift_end = old_value_index;
  127. size_t shift_size = shift_end - new_value_index;
  128. memmove(&ht->sorted.array[new_value_index + 1], &ht->sorted.array[shift_start], shift_size * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *));
  129. ht->sorted.array[new_value_index] = new_value;
  130. }
  131. }
  132. static inline SIMPLE_HASHTABLE_VALUE_TYPE **simple_hashtable_sorted_array_first_read_only_named(SIMPLE_HASHTABLE_NAMED *ht) {
  133. if (ht->sorted.used > 0) {
  134. return &ht->sorted.array[0];
  135. }
  136. return NULL;
  137. }
  138. static inline SIMPLE_HASHTABLE_VALUE_TYPE **simple_hashtable_sorted_array_next_read_only_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_VALUE_TYPE **last) {
  139. if (!last) return NULL;
  140. // Calculate the current position in the sorted array
  141. size_t currentIndex = last - ht->sorted.array;
  142. // Proceed to the next element if it exists
  143. if (currentIndex + 1 < ht->sorted.used) {
  144. return &ht->sorted.array[currentIndex + 1];
  145. }
  146. // If no more elements, return NULL
  147. return NULL;
  148. }
  149. #define SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY(ht, var, type, name) \
  150. for (type **(var) = simple_hashtable_sorted_array_first_read_only ## name(ht); \
  151. var; \
  152. (var) = simple_hashtable_sorted_array_next_read_only ## name(ht, var))
  153. #define SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY_VALUE(var) (*(var))
  154. #else
  155. static inline void simple_hashtable_add_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht __maybe_unused, SIMPLE_HASHTABLE_VALUE_TYPE *value __maybe_unused) { ; }
  156. static inline void simple_hashtable_del_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht __maybe_unused, SIMPLE_HASHTABLE_VALUE_TYPE *value __maybe_unused) { ; }
  157. static inline void simple_hashtable_replace_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht __maybe_unused, SIMPLE_HASHTABLE_VALUE_TYPE *old_value __maybe_unused, SIMPLE_HASHTABLE_VALUE_TYPE *new_value __maybe_unused) { ; }
  158. #endif
  159. static void simple_hashtable_init_named(SIMPLE_HASHTABLE_NAMED *ht, size_t size) {
  160. memset(ht, 0, sizeof(*ht));
  161. ht->size = size;
  162. ht->hashtable = callocz(ht->size, sizeof(*ht->hashtable));
  163. }
  164. static void simple_hashtable_destroy_named(SIMPLE_HASHTABLE_NAMED *ht) {
  165. #ifdef SIMPLE_HASHTABLE_SORT_FUNCTION
  166. freez(ht->sorted.array);
  167. #endif
  168. freez(ht->hashtable);
  169. memset(ht, 0, sizeof(*ht));
  170. }
  171. static inline void simple_hashtable_resize_named(SIMPLE_HASHTABLE_NAMED *ht);
  172. #define SHTS_DATA_UNSET ((void *)NULL)
  173. #define SHTS_DATA_DELETED ((void *)0x01)
  174. #define SHTS_DATA_USERNULL ((void *)0x02)
  175. #define SHTS_IS_UNSET(sl) ((sl)->data == SHTS_DATA_UNSET)
  176. #define SHTS_IS_DELETED(sl) ((sl)->data == SHTS_DATA_DELETED)
  177. #define SHTS_IS_USERNULL(sl) ((sl)->data == SHTS_DATA_USERNULL)
  178. #define SIMPLE_HASHTABLE_SLOT_DATA(sl) ((SHTS_IS_UNSET(sl) || SHTS_IS_DELETED(sl) || SHTS_IS_USERNULL(sl)) ? NULL : (sl)->data)
  179. #define SIMPLE_HASHTABLE_SLOT_UNSET_OR_DELETED(sl) ((SHTS_IS_UNSET(sl) || SHTS_IS_DELETED(sl)) ? NULL : (sl)->data)
  180. // IMPORTANT
  181. // The pointer returned by this call is valid up to the next call of this function (or the resize one)
  182. // If you need to cache something, cache the hash, not the slot pointer.
  183. static inline SIMPLE_HASHTABLE_SLOT_NAMED *simple_hashtable_get_slot_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_HASH hash, bool resize) {
  184. ht->searches++;
  185. size_t slot;
  186. SIMPLE_HASHTABLE_SLOT_NAMED *sl;
  187. SIMPLE_HASHTABLE_SLOT_NAMED *deleted;
  188. slot = hash % ht->size;
  189. sl = &ht->hashtable[slot];
  190. deleted = SHTS_IS_DELETED(sl) ? sl : NULL;
  191. if(likely(!SIMPLE_HASHTABLE_SLOT_UNSET_OR_DELETED(sl) || sl->hash == hash))
  192. return (SHTS_IS_UNSET(sl) && deleted) ? deleted : sl;
  193. ht->collisions++;
  194. if(unlikely(resize && (ht->size <= (ht->used << 1) || ht->used >= ht->size))) {
  195. simple_hashtable_resize_named(ht);
  196. slot = hash % ht->size;
  197. sl = &ht->hashtable[slot];
  198. deleted = (!deleted && SHTS_IS_DELETED(sl)) ? sl : deleted;
  199. if(likely(!SIMPLE_HASHTABLE_SLOT_UNSET_OR_DELETED(sl) || sl->hash == hash))
  200. return (SHTS_IS_UNSET(sl) && deleted) ? deleted : sl;
  201. ht->collisions++;
  202. }
  203. slot = ((hash >> SIMPLE_HASHTABLE_HASH_SECOND_HASH_SHIFTS) + 1) % ht->size;
  204. sl = &ht->hashtable[slot];
  205. deleted = (!deleted && SHTS_IS_DELETED(sl)) ? sl : deleted;
  206. // Linear probing until we find it
  207. while (SIMPLE_HASHTABLE_SLOT_UNSET_OR_DELETED(sl) && sl->hash != hash) {
  208. slot = (slot + 1) % ht->size; // Wrap around if necessary
  209. sl = &ht->hashtable[slot];
  210. deleted = (!deleted && SHTS_IS_DELETED(sl)) ? sl : deleted;
  211. ht->collisions++;
  212. }
  213. return (SHTS_IS_UNSET(sl) && deleted) ? deleted : sl;
  214. }
  215. static inline bool simple_hashtable_del_slot_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_SLOT_NAMED *sl) {
  216. if(SHTS_IS_UNSET(sl) || SHTS_IS_DELETED(sl))
  217. return false;
  218. ht->deletions++;
  219. ht->deleted++;
  220. simple_hashtable_del_value_sorted_named(ht, SIMPLE_HASHTABLE_SLOT_DATA(sl));
  221. sl->data = SHTS_DATA_DELETED;
  222. return true;
  223. }
  224. static inline void simple_hashtable_set_slot_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_SLOT_NAMED *sl, SIMPLE_HASHTABLE_HASH hash, SIMPLE_HASHTABLE_VALUE_TYPE *data) {
  225. if(data == NULL)
  226. data = SHTS_DATA_USERNULL;
  227. if(unlikely(data == SHTS_DATA_UNSET || data == SHTS_DATA_DELETED)) {
  228. simple_hashtable_del_slot_named(ht, sl);
  229. return;
  230. }
  231. if(likely(SHTS_IS_UNSET(sl))) {
  232. simple_hashtable_add_value_sorted_named(ht, data);
  233. ht->used++;
  234. }
  235. else if(unlikely(SHTS_IS_DELETED(sl))) {
  236. ht->deleted--;
  237. }
  238. else
  239. simple_hashtable_replace_value_sorted_named(ht, SIMPLE_HASHTABLE_SLOT_DATA(sl), data);
  240. sl->hash = hash;
  241. sl->data = data;
  242. }
  243. // IMPORTANT
  244. // this call invalidates all SIMPLE_HASHTABLE_SLOT_NAMED pointers
  245. static inline void simple_hashtable_resize_named(SIMPLE_HASHTABLE_NAMED *ht) {
  246. SIMPLE_HASHTABLE_SLOT_NAMED *old = ht->hashtable;
  247. size_t old_size = ht->size;
  248. ht->resizes++;
  249. ht->size = (ht->size << 1) - ((ht->size > 16) ? 1 : 0);
  250. ht->hashtable = callocz(ht->size, sizeof(*ht->hashtable));
  251. ht->used = ht->deleted = 0;
  252. for(size_t i = 0 ; i < old_size ; i++) {
  253. if(!SIMPLE_HASHTABLE_SLOT_UNSET_OR_DELETED(&old[i]))
  254. continue;
  255. SIMPLE_HASHTABLE_SLOT_NAMED *slot = simple_hashtable_get_slot_named(ht, old[i].hash, false);
  256. *slot = old[i];
  257. ht->used++;
  258. }
  259. freez(old);
  260. }
  261. // ----------------------------------------------------------------------------
  262. // hashtable traversal, in read-only mode
  263. // the hashtable should not be modified while the traversal is taking place
  264. static inline SIMPLE_HASHTABLE_SLOT_NAMED *simple_hashtable_first_read_only_named(SIMPLE_HASHTABLE_NAMED *ht) {
  265. for(size_t i = 0; i < ht->used ;i++) {
  266. SIMPLE_HASHTABLE_SLOT_NAMED *sl = &ht->hashtable[i];
  267. if(!SIMPLE_HASHTABLE_SLOT_UNSET_OR_DELETED(sl))
  268. return sl;
  269. }
  270. return NULL;
  271. }
  272. static inline SIMPLE_HASHTABLE_SLOT_NAMED *simple_hashtable_next_read_only_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_SLOT_NAMED *last) {
  273. if (!last) return NULL;
  274. // Calculate the current position in the array
  275. size_t currentIndex = last - ht->hashtable;
  276. // Iterate over the hashtable starting from the next element
  277. for (size_t i = currentIndex + 1; i < ht->size; i++) {
  278. SIMPLE_HASHTABLE_SLOT_NAMED *sl = &ht->hashtable[i];
  279. if (!SIMPLE_HASHTABLE_SLOT_UNSET_OR_DELETED(sl)) {
  280. return sl;
  281. }
  282. }
  283. // If no more data slots are found, return NULL
  284. return NULL;
  285. }
  286. #define SIMPLE_HASHTABLE_FOREACH_READ_ONLY(ht, var, name) \
  287. for(struct simple_hashtable_slot ## name *(var) = simple_hashtable_first_read_only ## name(ht); \
  288. var; \
  289. (var) = simple_hashtable_next_read_only ## name(ht, var))
  290. #define SIMPLE_HASHTABLE_FOREACH_READ_ONLY_VALUE(var) SIMPLE_HASHTABLE_SLOT_DATA(var)
  291. // ----------------------------------------------------------------------------
  292. // high level implementation
  293. #ifdef SIMPLE_HASHTABLE_SAMPLE_IMPLEMENTATION
  294. #define simple_hashtable_set_named CONCAT(simple_hashtable_set, SIMPLE_HASHTABLE_NAME)
  295. #define simple_hashtable_get_named CONCAT(simple_hashtable_get, SIMPLE_HASHTABLE_NAME)
  296. #define simple_hashtable_del_named CONCAT(simple_hashtable_del, SIMPLE_HASHTABLE_NAME)
  297. static inline SIMPLE_HASHTABLE_VALUE_TYPE *simple_hashtable_set_named(SIMPLE_HASHTABLE_NAMED *ht, void *key, size_t key_len, SIMPLE_HASHTABLE_VALUE_TYPE *data) {
  298. XXH64_hash_t hash = XXH3_64bits(key, key_len);
  299. SIMPLE_HASHTABLE_SLOT_NAMED *sl = simple_hashtable_get_slot_named(ht, hash, true);
  300. simple_hashtable_set_slot_named(ht, sl, hash, data);
  301. return SIMPLE_HASHTABLE_SLOT_DATA(sl);
  302. }
  303. static inline SIMPLE_HASHTABLE_VALUE_TYPE *simple_hashtable_get_named(SIMPLE_HASHTABLE_NAMED *ht, void *key, size_t key_len, SIMPLE_HASHTABLE_VALUE_TYPE *data) {
  304. XXH64_hash_t hash = XXH3_64bits(key, key_len);
  305. SIMPLE_HASHTABLE_SLOT_NAMED *sl = simple_hashtable_get_slot_named(ht, hash, true);
  306. return SIMPLE_HASHTABLE_SLOT_DATA(sl);
  307. }
  308. static inline bool simple_hashtable_del_named(SIMPLE_HASHTABLE_NAMED *ht, void *key, size_t key_len, SIMPLE_HASHTABLE_VALUE_TYPE *data) {
  309. XXH64_hash_t hash = XXH3_64bits(key, key_len);
  310. SIMPLE_HASHTABLE_SLOT_NAMED *sl = simple_hashtable_get_slot_named(ht, hash, true);
  311. return simple_hashtable_del_slot_named(ht, sl);
  312. }
  313. #endif // SIMPLE_HASHTABLE_SAMPLE_IMPLEMENTATION
  314. #endif //NETDATA_SIMPLE_HASHTABLE_H