hashtable.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. /* The implementation of the hash table (_Py_hashtable_t) is based on the
  2. cfuhash project:
  3. http://sourceforge.net/projects/libcfu/
  4. Copyright of cfuhash:
  5. ----------------------------------
  6. Creation date: 2005-06-24 21:22:40
  7. Authors: Don
  8. Change log:
  9. Copyright (c) 2005 Don Owens
  10. All rights reserved.
  11. This code is released under the BSD license:
  12. Redistribution and use in source and binary forms, with or without
  13. modification, are permitted provided that the following conditions
  14. are met:
  15. * Redistributions of source code must retain the above copyright
  16. notice, this list of conditions and the following disclaimer.
  17. * Redistributions in binary form must reproduce the above
  18. copyright notice, this list of conditions and the following
  19. disclaimer in the documentation and/or other materials provided
  20. with the distribution.
  21. * Neither the name of the author nor the names of its
  22. contributors may be used to endorse or promote products derived
  23. from this software without specific prior written permission.
  24. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  27. FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  28. COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  29. INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  30. (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  31. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  32. HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  33. STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  35. OF THE POSSIBILITY OF SUCH DAMAGE.
  36. ----------------------------------
  37. */
  38. #include "Python.h"
  39. #include "pycore_hashtable.h"
  40. #define HASHTABLE_MIN_SIZE 16
  41. #define HASHTABLE_HIGH 0.50
  42. #define HASHTABLE_LOW 0.10
  43. #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
  44. #define BUCKETS_HEAD(SLIST) \
  45. ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
  46. #define TABLE_HEAD(HT, BUCKET) \
  47. ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
  48. #define ENTRY_NEXT(ENTRY) \
  49. ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
  50. /* Forward declaration */
  51. static int hashtable_rehash(_Py_hashtable_t *ht);
  52. static void
  53. _Py_slist_init(_Py_slist_t *list)
  54. {
  55. list->head = NULL;
  56. }
  57. static void
  58. _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
  59. {
  60. item->next = list->head;
  61. list->head = item;
  62. }
  63. static void
  64. _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
  65. _Py_slist_item_t *item)
  66. {
  67. if (previous != NULL)
  68. previous->next = item->next;
  69. else
  70. list->head = item->next;
  71. }
  72. Py_uhash_t
  73. _Py_hashtable_hash_ptr(const void *key)
  74. {
  75. return (Py_uhash_t)_Py_HashPointerRaw(key);
  76. }
  77. int
  78. _Py_hashtable_compare_direct(const void *key1, const void *key2)
  79. {
  80. return (key1 == key2);
  81. }
  82. /* makes sure the real size of the buckets array is a power of 2 */
  83. static size_t
  84. round_size(size_t s)
  85. {
  86. size_t i;
  87. if (s < HASHTABLE_MIN_SIZE)
  88. return HASHTABLE_MIN_SIZE;
  89. i = 1;
  90. while (i < s)
  91. i <<= 1;
  92. return i;
  93. }
  94. size_t
  95. _Py_hashtable_size(const _Py_hashtable_t *ht)
  96. {
  97. size_t size = sizeof(_Py_hashtable_t);
  98. /* buckets */
  99. size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
  100. /* entries */
  101. size += ht->nentries * sizeof(_Py_hashtable_entry_t);
  102. return size;
  103. }
  104. size_t
  105. _Py_hashtable_len(const _Py_hashtable_t *ht)
  106. {
  107. return ht->nentries;
  108. }
  109. _Py_hashtable_entry_t *
  110. _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
  111. {
  112. Py_uhash_t key_hash = ht->hash_func(key);
  113. size_t index = key_hash & (ht->nbuckets - 1);
  114. _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
  115. while (1) {
  116. if (entry == NULL) {
  117. return NULL;
  118. }
  119. if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
  120. break;
  121. }
  122. entry = ENTRY_NEXT(entry);
  123. }
  124. return entry;
  125. }
  126. // Specialized for:
  127. // hash_func == _Py_hashtable_hash_ptr
  128. // compare_func == _Py_hashtable_compare_direct
  129. static _Py_hashtable_entry_t *
  130. _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
  131. {
  132. Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
  133. size_t index = key_hash & (ht->nbuckets - 1);
  134. _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
  135. while (1) {
  136. if (entry == NULL) {
  137. return NULL;
  138. }
  139. // Compare directly keys (ignore entry->key_hash)
  140. if (entry->key == key) {
  141. break;
  142. }
  143. entry = ENTRY_NEXT(entry);
  144. }
  145. return entry;
  146. }
  147. void*
  148. _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
  149. {
  150. Py_uhash_t key_hash = ht->hash_func(key);
  151. size_t index = key_hash & (ht->nbuckets - 1);
  152. _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
  153. _Py_hashtable_entry_t *previous = NULL;
  154. while (1) {
  155. if (entry == NULL) {
  156. // not found
  157. return NULL;
  158. }
  159. if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
  160. break;
  161. }
  162. previous = entry;
  163. entry = ENTRY_NEXT(entry);
  164. }
  165. _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
  166. (_Py_slist_item_t *)entry);
  167. ht->nentries--;
  168. void *value = entry->value;
  169. ht->alloc.free(entry);
  170. if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
  171. // Ignore failure: error cannot be reported to the caller
  172. hashtable_rehash(ht);
  173. }
  174. return value;
  175. }
  176. int
  177. _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
  178. {
  179. _Py_hashtable_entry_t *entry;
  180. #ifndef NDEBUG
  181. /* Don't write the assertion on a single line because it is interesting
  182. to know the duplicated entry if the assertion failed. The entry can
  183. be read using a debugger. */
  184. entry = ht->get_entry_func(ht, key);
  185. assert(entry == NULL);
  186. #endif
  187. entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
  188. if (entry == NULL) {
  189. /* memory allocation failed */
  190. return -1;
  191. }
  192. entry->key_hash = ht->hash_func(key);
  193. entry->key = (void *)key;
  194. entry->value = value;
  195. ht->nentries++;
  196. if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
  197. if (hashtable_rehash(ht) < 0) {
  198. ht->nentries--;
  199. ht->alloc.free(entry);
  200. return -1;
  201. }
  202. }
  203. size_t index = entry->key_hash & (ht->nbuckets - 1);
  204. _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
  205. return 0;
  206. }
  207. void*
  208. _Py_hashtable_get(_Py_hashtable_t *ht, const void *key)
  209. {
  210. _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key);
  211. if (entry != NULL) {
  212. return entry->value;
  213. }
  214. else {
  215. return NULL;
  216. }
  217. }
  218. int
  219. _Py_hashtable_foreach(_Py_hashtable_t *ht,
  220. _Py_hashtable_foreach_func func,
  221. void *user_data)
  222. {
  223. for (size_t hv = 0; hv < ht->nbuckets; hv++) {
  224. _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
  225. while (entry != NULL) {
  226. int res = func(ht, entry->key, entry->value, user_data);
  227. if (res) {
  228. return res;
  229. }
  230. entry = ENTRY_NEXT(entry);
  231. }
  232. }
  233. return 0;
  234. }
  235. static int
  236. hashtable_rehash(_Py_hashtable_t *ht)
  237. {
  238. size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
  239. if (new_size == ht->nbuckets) {
  240. return 0;
  241. }
  242. size_t buckets_size = new_size * sizeof(ht->buckets[0]);
  243. _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
  244. if (new_buckets == NULL) {
  245. /* memory allocation failed */
  246. return -1;
  247. }
  248. memset(new_buckets, 0, buckets_size);
  249. for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
  250. _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
  251. while (entry != NULL) {
  252. assert(ht->hash_func(entry->key) == entry->key_hash);
  253. _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
  254. size_t entry_index = entry->key_hash & (new_size - 1);
  255. _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
  256. entry = next;
  257. }
  258. }
  259. ht->alloc.free(ht->buckets);
  260. ht->nbuckets = new_size;
  261. ht->buckets = new_buckets;
  262. return 0;
  263. }
  264. _Py_hashtable_t *
  265. _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
  266. _Py_hashtable_compare_func compare_func,
  267. _Py_hashtable_destroy_func key_destroy_func,
  268. _Py_hashtable_destroy_func value_destroy_func,
  269. _Py_hashtable_allocator_t *allocator)
  270. {
  271. _Py_hashtable_allocator_t alloc;
  272. if (allocator == NULL) {
  273. alloc.malloc = PyMem_Malloc;
  274. alloc.free = PyMem_Free;
  275. }
  276. else {
  277. alloc = *allocator;
  278. }
  279. _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
  280. if (ht == NULL) {
  281. return ht;
  282. }
  283. ht->nbuckets = HASHTABLE_MIN_SIZE;
  284. ht->nentries = 0;
  285. size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
  286. ht->buckets = alloc.malloc(buckets_size);
  287. if (ht->buckets == NULL) {
  288. alloc.free(ht);
  289. return NULL;
  290. }
  291. memset(ht->buckets, 0, buckets_size);
  292. ht->get_entry_func = _Py_hashtable_get_entry_generic;
  293. ht->hash_func = hash_func;
  294. ht->compare_func = compare_func;
  295. ht->key_destroy_func = key_destroy_func;
  296. ht->value_destroy_func = value_destroy_func;
  297. ht->alloc = alloc;
  298. if (ht->hash_func == _Py_hashtable_hash_ptr
  299. && ht->compare_func == _Py_hashtable_compare_direct)
  300. {
  301. ht->get_entry_func = _Py_hashtable_get_entry_ptr;
  302. }
  303. return ht;
  304. }
  305. _Py_hashtable_t *
  306. _Py_hashtable_new(_Py_hashtable_hash_func hash_func,
  307. _Py_hashtable_compare_func compare_func)
  308. {
  309. return _Py_hashtable_new_full(hash_func, compare_func,
  310. NULL, NULL, NULL);
  311. }
  312. static void
  313. _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
  314. {
  315. if (ht->key_destroy_func) {
  316. ht->key_destroy_func(entry->key);
  317. }
  318. if (ht->value_destroy_func) {
  319. ht->value_destroy_func(entry->value);
  320. }
  321. ht->alloc.free(entry);
  322. }
  323. void
  324. _Py_hashtable_clear(_Py_hashtable_t *ht)
  325. {
  326. for (size_t i=0; i < ht->nbuckets; i++) {
  327. _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
  328. while (entry != NULL) {
  329. _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
  330. _Py_hashtable_destroy_entry(ht, entry);
  331. entry = next;
  332. }
  333. _Py_slist_init(&ht->buckets[i]);
  334. }
  335. ht->nentries = 0;
  336. // Ignore failure: clear function is not expected to fail
  337. // because of a memory allocation failure.
  338. (void)hashtable_rehash(ht);
  339. }
  340. void
  341. _Py_hashtable_destroy(_Py_hashtable_t *ht)
  342. {
  343. for (size_t i = 0; i < ht->nbuckets; i++) {
  344. _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
  345. while (entry) {
  346. _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);
  347. _Py_hashtable_destroy_entry(ht, entry);
  348. entry = entry_next;
  349. }
  350. }
  351. ht->alloc.free(ht->buckets);
  352. ht->alloc.free(ht);
  353. }