hash_table.c 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106
  1. /**
  2. * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3. * SPDX-License-Identifier: Apache-2.0.
  4. */
  5. /* For more information on how the RH hash works and in particular how we do
  6. * deletions, see:
  7. * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
  8. */
  9. #include <aws/common/hash_table.h>
  10. #include <aws/common/math.h>
  11. #include <aws/common/private/hash_table_impl.h>
  12. #include <aws/common/string.h>
  13. #include <limits.h>
  14. #include <stdio.h>
  15. #include <stdlib.h>
  16. /* Include lookup3.c so we can (potentially) inline it and make use of the mix()
  17. * macro. */
  18. #include <aws/common/private/lookup3.inl>
  19. static void s_suppress_unused_lookup3_func_warnings(void) {
  20. /* We avoid making changes to lookup3 if we can avoid it, but since it has functions
  21. * we're not using, reference them somewhere to suppress the unused function warning.
  22. */
  23. (void)hashword;
  24. (void)hashword2;
  25. (void)hashlittle;
  26. (void)hashbig;
  27. }
  28. /**
  29. * Calculate the hash for the given key.
  30. * Ensures a reasonable semantics for null keys.
  31. * Ensures that no object ever hashes to 0, which is the sentinal value for an empty hash element.
  32. */
  33. static uint64_t s_hash_for(struct hash_table_state *state, const void *key) {
  34. AWS_PRECONDITION(hash_table_state_is_valid(state));
  35. s_suppress_unused_lookup3_func_warnings();
  36. if (key == NULL) {
  37. /* The best answer */
  38. return 42;
  39. }
  40. uint64_t hash_code = state->hash_fn(key);
  41. if (!hash_code) {
  42. hash_code = 1;
  43. }
  44. AWS_RETURN_WITH_POSTCONDITION(hash_code, hash_code != 0);
  45. }
  46. /**
  47. * Check equality of two objects, with a reasonable semantics for null.
  48. */
  49. static bool s_safe_eq_check(aws_hash_callback_eq_fn *equals_fn, const void *a, const void *b) {
  50. /* Short circuit if the pointers are the same */
  51. if (a == b) {
  52. return true;
  53. }
  54. /* If one but not both are null, the objects are not equal */
  55. if (a == NULL || b == NULL) {
  56. return false;
  57. }
  58. /* If both are non-null, call the underlying equals fn */
  59. return equals_fn(a, b);
  60. }
  61. /**
  62. * Check equality of two hash keys, with a reasonable semantics for null keys.
  63. */
  64. static bool s_hash_keys_eq(struct hash_table_state *state, const void *a, const void *b) {
  65. AWS_PRECONDITION(hash_table_state_is_valid(state));
  66. bool rval = s_safe_eq_check(state->equals_fn, a, b);
  67. AWS_RETURN_WITH_POSTCONDITION(rval, hash_table_state_is_valid(state));
  68. }
  69. static size_t s_index_for(struct hash_table_state *map, struct hash_table_entry *entry) {
  70. AWS_PRECONDITION(hash_table_state_is_valid(map));
  71. size_t index = entry - map->slots;
  72. AWS_RETURN_WITH_POSTCONDITION(index, index < map->size && hash_table_state_is_valid(map));
  73. }
  74. #if 0
  75. /* Useful debugging code for anyone working on this in the future */
  76. static uint64_t s_distance(struct hash_table_state *state, int index) {
  77. return (index - state->slots[index].hash_code) & state->mask;
  78. }
  79. void hash_dump(struct aws_hash_table *tbl) {
  80. struct hash_table_state *state = tbl->p_impl;
  81. printf("Dumping hash table contents:\n");
  82. for (int i = 0; i < state->size; i++) {
  83. printf("%7d: ", i);
  84. struct hash_table_entry *e = &state->slots[i];
  85. if (!e->hash_code) {
  86. printf("EMPTY\n");
  87. } else {
  88. printf("k: %p v: %p hash_code: %lld displacement: %lld\n",
  89. e->element.key, e->element.value, e->hash_code,
  90. (i - e->hash_code) & state->mask);
  91. }
  92. }
  93. }
  94. #endif
  95. #if 0
  96. /* Not currently exposed as an API. Should we have something like this? Useful for benchmarks */
  97. AWS_COMMON_API
  98. void aws_hash_table_print_stats(struct aws_hash_table *table) {
  99. struct hash_table_state *state = table->p_impl;
  100. uint64_t total_disp = 0;
  101. uint64_t max_disp = 0;
  102. printf("\n=== Hash table statistics ===\n");
  103. printf("Table size: %zu/%zu (max load %zu, remaining %zu)\n", state->entry_count, state->size, state->max_load, state->max_load - state->entry_count);
  104. printf("Load factor: %02.2lf%% (max %02.2lf%%)\n",
  105. 100.0 * ((double)state->entry_count / (double)state->size),
  106. state->max_load_factor);
  107. for (size_t i = 0; i < state->size; i++) {
  108. if (state->slots[i].hash_code) {
  109. int displacement = distance(state, i);
  110. total_disp += displacement;
  111. if (displacement > max_disp) {
  112. max_disp = displacement;
  113. }
  114. }
  115. }
  116. size_t *disp_counts = calloc(sizeof(*disp_counts), max_disp + 1);
  117. for (size_t i = 0; i < state->size; i++) {
  118. if (state->slots[i].hash_code) {
  119. disp_counts[distance(state, i)]++;
  120. }
  121. }
  122. uint64_t median = 0;
  123. uint64_t passed = 0;
  124. for (uint64_t i = 0; i <= max_disp && passed < total_disp / 2; i++) {
  125. median = i;
  126. passed += disp_counts[i];
  127. }
  128. printf("Displacement statistics: Avg %02.2lf max %llu median %llu\n", (double)total_disp / (double)state->entry_count, max_disp, median);
  129. for (uint64_t i = 0; i <= max_disp; i++) {
  130. printf("Displacement %2lld: %zu entries\n", i, disp_counts[i]);
  131. }
  132. free(disp_counts);
  133. printf("\n");
  134. }
  135. #endif
  136. size_t aws_hash_table_get_entry_count(const struct aws_hash_table *map) {
  137. struct hash_table_state *state = map->p_impl;
  138. return state->entry_count;
  139. }
  140. /* Given a header template, allocates space for a hash table of the appropriate
  141. * size, and copies the state header into this allocated memory, which is
  142. * returned.
  143. */
  144. static struct hash_table_state *s_alloc_state(const struct hash_table_state *template) {
  145. size_t required_bytes;
  146. if (hash_table_state_required_bytes(template->size, &required_bytes)) {
  147. return NULL;
  148. }
  149. /* An empty slot has hashcode 0. So this marks all slots as empty */
  150. struct hash_table_state *state = aws_mem_calloc(template->alloc, 1, required_bytes);
  151. if (state == NULL) {
  152. return state;
  153. }
  154. *state = *template;
  155. return state;
  156. }
  157. /* Computes the correct size and max_load based on a requested size. */
  158. static int s_update_template_size(struct hash_table_state *template, size_t expected_elements) {
  159. size_t min_size = expected_elements;
  160. if (min_size < 2) {
  161. min_size = 2;
  162. }
  163. /* size is always a power of 2 */
  164. size_t size;
  165. if (aws_round_up_to_power_of_two(min_size, &size)) {
  166. return AWS_OP_ERR;
  167. }
  168. /* Update the template once we've calculated everything successfully */
  169. template->size = size;
  170. template->max_load = (size_t)(template->max_load_factor * (double)template->size);
  171. /* Ensure that there is always at least one empty slot in the hash table */
  172. if (template->max_load >= size) {
  173. template->max_load = size - 1;
  174. }
  175. /* Since size is a power of 2: (index & (size - 1)) == (index % size) */
  176. template->mask = size - 1;
  177. return AWS_OP_SUCCESS;
  178. }
  179. int aws_hash_table_init(
  180. struct aws_hash_table *map,
  181. struct aws_allocator *alloc,
  182. size_t size,
  183. aws_hash_fn *hash_fn,
  184. aws_hash_callback_eq_fn *equals_fn,
  185. aws_hash_callback_destroy_fn *destroy_key_fn,
  186. aws_hash_callback_destroy_fn *destroy_value_fn) {
  187. AWS_PRECONDITION(map != NULL);
  188. AWS_PRECONDITION(alloc != NULL);
  189. AWS_PRECONDITION(hash_fn != NULL);
  190. AWS_PRECONDITION(equals_fn != NULL);
  191. struct hash_table_state template;
  192. template.hash_fn = hash_fn;
  193. template.equals_fn = equals_fn;
  194. template.destroy_key_fn = destroy_key_fn;
  195. template.destroy_value_fn = destroy_value_fn;
  196. template.alloc = alloc;
  197. template.entry_count = 0;
  198. template.max_load_factor = 0.95; /* TODO - make configurable? */
  199. if (s_update_template_size(&template, size)) {
  200. return AWS_OP_ERR;
  201. }
  202. map->p_impl = s_alloc_state(&template);
  203. if (!map->p_impl) {
  204. return AWS_OP_ERR;
  205. }
  206. AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
  207. }
  208. void aws_hash_table_clean_up(struct aws_hash_table *map) {
  209. AWS_PRECONDITION(map != NULL);
  210. AWS_PRECONDITION(
  211. map->p_impl == NULL || aws_hash_table_is_valid(map),
  212. "Input aws_hash_table [map] must be valid or hash_table_state pointer [map->p_impl] must be NULL, in case "
  213. "aws_hash_table_clean_up was called twice.");
  214. struct hash_table_state *state = map->p_impl;
  215. /* Ensure that we're idempotent */
  216. if (!state) {
  217. return;
  218. }
  219. aws_hash_table_clear(map);
  220. aws_mem_release(map->p_impl->alloc, map->p_impl);
  221. map->p_impl = NULL;
  222. AWS_POSTCONDITION(map->p_impl == NULL);
  223. }
  224. void aws_hash_table_swap(struct aws_hash_table *AWS_RESTRICT a, struct aws_hash_table *AWS_RESTRICT b) {
  225. AWS_PRECONDITION(a != b);
  226. struct aws_hash_table tmp = *a;
  227. *a = *b;
  228. *b = tmp;
  229. }
  230. void aws_hash_table_move(struct aws_hash_table *AWS_RESTRICT to, struct aws_hash_table *AWS_RESTRICT from) {
  231. AWS_PRECONDITION(to != NULL);
  232. AWS_PRECONDITION(from != NULL);
  233. AWS_PRECONDITION(to != from);
  234. AWS_PRECONDITION(aws_hash_table_is_valid(from));
  235. *to = *from;
  236. AWS_ZERO_STRUCT(*from);
  237. AWS_POSTCONDITION(aws_hash_table_is_valid(to));
  238. }
  239. /* Tries to find where the requested key is or where it should go if put.
  240. * Returns AWS_ERROR_SUCCESS if the item existed (leaving it in *entry),
  241. * or AWS_ERROR_HASHTBL_ITEM_NOT_FOUND if it did not (putting its destination
  242. * in *entry). Note that this does not take care of displacing whatever was in
  243. * that entry before.
  244. *
  245. * probe_idx is set to the probe index of the entry found.
  246. */
  247. static int s_find_entry1(
  248. struct hash_table_state *state,
  249. uint64_t hash_code,
  250. const void *key,
  251. struct hash_table_entry **p_entry,
  252. size_t *p_probe_idx);
  253. /* Inlined fast path: Check the first slot, only. */
  254. /* TODO: Force inlining? */
  255. static int inline s_find_entry(
  256. struct hash_table_state *state,
  257. uint64_t hash_code,
  258. const void *key,
  259. struct hash_table_entry **p_entry,
  260. size_t *p_probe_idx) {
  261. struct hash_table_entry *entry = &state->slots[hash_code & state->mask];
  262. if (entry->hash_code == 0) {
  263. if (p_probe_idx) {
  264. *p_probe_idx = 0;
  265. }
  266. *p_entry = entry;
  267. return AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
  268. }
  269. if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) {
  270. if (p_probe_idx) {
  271. *p_probe_idx = 0;
  272. }
  273. *p_entry = entry;
  274. return AWS_OP_SUCCESS;
  275. }
  276. return s_find_entry1(state, hash_code, key, p_entry, p_probe_idx);
  277. }
  278. static int s_find_entry1(
  279. struct hash_table_state *state,
  280. uint64_t hash_code,
  281. const void *key,
  282. struct hash_table_entry **p_entry,
  283. size_t *p_probe_idx) {
  284. size_t probe_idx = 1;
  285. /* If we find a deleted entry, we record that index and return it as our probe index (i.e. we'll keep searching to
  286. * see if it already exists, but if not we'll overwrite the deleted entry).
  287. */
  288. int rv;
  289. struct hash_table_entry *entry;
  290. /* This loop is guaranteed to terminate because entry_probe is bounded above by state->mask (i.e. state->size - 1).
  291. * Since probe_idx increments every loop iteration, it will become larger than entry_probe after at most state->size
  292. * transitions and the loop will exit (if it hasn't already)
  293. */
  294. while (1) {
  295. #ifdef CBMC
  296. # pragma CPROVER check push
  297. # pragma CPROVER check disable "unsigned-overflow"
  298. #endif
  299. uint64_t index = (hash_code + probe_idx) & state->mask;
  300. #ifdef CBMC
  301. # pragma CPROVER check pop
  302. #endif
  303. entry = &state->slots[index];
  304. if (!entry->hash_code) {
  305. rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
  306. break;
  307. }
  308. if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) {
  309. rv = AWS_ERROR_SUCCESS;
  310. break;
  311. }
  312. #ifdef CBMC
  313. # pragma CPROVER check push
  314. # pragma CPROVER check disable "unsigned-overflow"
  315. #endif
  316. uint64_t entry_probe = (index - entry->hash_code) & state->mask;
  317. #ifdef CBMC
  318. # pragma CPROVER check pop
  319. #endif
  320. if (entry_probe < probe_idx) {
  321. /* We now know that our target entry cannot exist; if it did exist,
  322. * it would be at the current location as it has a higher probe
  323. * length than the entry we are examining and thus would have
  324. * preempted that item
  325. */
  326. rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
  327. break;
  328. }
  329. probe_idx++;
  330. }
  331. *p_entry = entry;
  332. if (p_probe_idx) {
  333. *p_probe_idx = probe_idx;
  334. }
  335. return rv;
  336. }
  337. int aws_hash_table_find(const struct aws_hash_table *map, const void *key, struct aws_hash_element **p_elem) {
  338. AWS_PRECONDITION(aws_hash_table_is_valid(map));
  339. AWS_PRECONDITION(AWS_OBJECT_PTR_IS_WRITABLE(p_elem), "Input aws_hash_element pointer [p_elem] must be writable.");
  340. struct hash_table_state *state = map->p_impl;
  341. uint64_t hash_code = s_hash_for(state, key);
  342. struct hash_table_entry *entry;
  343. int rv = s_find_entry(state, hash_code, key, &entry, NULL);
  344. if (rv == AWS_ERROR_SUCCESS) {
  345. *p_elem = &entry->element;
  346. } else {
  347. *p_elem = NULL;
  348. }
  349. AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
  350. }
  351. /**
  352. * Attempts to find a home for the given entry.
  353. * If the entry was empty (i.e. hash-code of 0), then the function does nothing and returns NULL
  354. * Otherwise, it emplaces the item, and returns a pointer to the newly emplaced entry.
  355. * This function is only called after the hash-table has been expanded to fit the new element,
  356. * so it should never fail.
  357. */
  358. static struct hash_table_entry *s_emplace_item(
  359. struct hash_table_state *state,
  360. struct hash_table_entry entry,
  361. size_t probe_idx) {
  362. AWS_PRECONDITION(hash_table_state_is_valid(state));
  363. if (entry.hash_code == 0) {
  364. AWS_RETURN_WITH_POSTCONDITION(NULL, hash_table_state_is_valid(state));
  365. }
  366. struct hash_table_entry *rval = NULL;
  367. /* Since a valid hash_table has at least one empty element, this loop will always terminate in at most linear time
  368. */
  369. while (entry.hash_code != 0) {
  370. #ifdef CBMC
  371. # pragma CPROVER check push
  372. # pragma CPROVER check disable "unsigned-overflow"
  373. #endif
  374. size_t index = (size_t)(entry.hash_code + probe_idx) & state->mask;
  375. #ifdef CBMC
  376. # pragma CPROVER check pop
  377. #endif
  378. struct hash_table_entry *victim = &state->slots[index];
  379. #ifdef CBMC
  380. # pragma CPROVER check push
  381. # pragma CPROVER check disable "unsigned-overflow"
  382. #endif
  383. size_t victim_probe_idx = (size_t)(index - victim->hash_code) & state->mask;
  384. #ifdef CBMC
  385. # pragma CPROVER check pop
  386. #endif
  387. if (!victim->hash_code || victim_probe_idx < probe_idx) {
  388. /* The first thing we emplace is the entry itself. A pointer to its location becomes the rval */
  389. if (!rval) {
  390. rval = victim;
  391. }
  392. struct hash_table_entry tmp = *victim;
  393. *victim = entry;
  394. entry = tmp;
  395. probe_idx = victim_probe_idx + 1;
  396. } else {
  397. probe_idx++;
  398. }
  399. }
  400. AWS_RETURN_WITH_POSTCONDITION(
  401. rval,
  402. hash_table_state_is_valid(state) && rval >= &state->slots[0] && rval < &state->slots[state->size],
  403. "Output hash_table_entry pointer [rval] must point in the slots of [state].");
  404. }
  405. static int s_expand_table(struct aws_hash_table *map) {
  406. struct hash_table_state *old_state = map->p_impl;
  407. struct hash_table_state template = *old_state;
  408. size_t new_size;
  409. if (aws_mul_size_checked(template.size, 2, &new_size)) {
  410. return AWS_OP_ERR;
  411. }
  412. if (s_update_template_size(&template, new_size)) {
  413. return AWS_OP_ERR;
  414. }
  415. struct hash_table_state *new_state = s_alloc_state(&template);
  416. if (!new_state) {
  417. return AWS_OP_ERR;
  418. }
  419. for (size_t i = 0; i < old_state->size; i++) {
  420. struct hash_table_entry entry = old_state->slots[i];
  421. if (entry.hash_code) {
  422. /* We can directly emplace since we know we won't put the same item twice */
  423. s_emplace_item(new_state, entry, 0);
  424. }
  425. }
  426. map->p_impl = new_state;
  427. aws_mem_release(new_state->alloc, old_state);
  428. return AWS_OP_SUCCESS;
  429. }
  430. int aws_hash_table_create(
  431. struct aws_hash_table *map,
  432. const void *key,
  433. struct aws_hash_element **p_elem,
  434. int *was_created) {
  435. struct hash_table_state *state = map->p_impl;
  436. uint64_t hash_code = s_hash_for(state, key);
  437. struct hash_table_entry *entry;
  438. size_t probe_idx;
  439. int ignored;
  440. if (!was_created) {
  441. was_created = &ignored;
  442. }
  443. int rv = s_find_entry(state, hash_code, key, &entry, &probe_idx);
  444. if (rv == AWS_ERROR_SUCCESS) {
  445. if (p_elem) {
  446. *p_elem = &entry->element;
  447. }
  448. *was_created = 0;
  449. return AWS_OP_SUCCESS;
  450. }
  451. /* Okay, we need to add an entry. Check the load factor first. */
  452. size_t incr_entry_count;
  453. if (aws_add_size_checked(state->entry_count, 1, &incr_entry_count)) {
  454. return AWS_OP_ERR;
  455. }
  456. if (incr_entry_count > state->max_load) {
  457. rv = s_expand_table(map);
  458. if (rv != AWS_OP_SUCCESS) {
  459. /* Any error was already raised in expand_table */
  460. return rv;
  461. }
  462. state = map->p_impl;
  463. /* If we expanded the table, we need to discard the probe index returned from find_entry,
  464. * as it's likely that we can find a more desirable slot. If we don't, then later gets will
  465. * terminate before reaching our probe index.
  466. * n.b. currently we ignore this probe_idx subsequently, but leaving
  467. this here so we don't
  468. * forget when we optimize later. */
  469. probe_idx = 0;
  470. }
  471. state->entry_count++;
  472. struct hash_table_entry new_entry;
  473. new_entry.element.key = key;
  474. new_entry.element.value = NULL;
  475. new_entry.hash_code = hash_code;
  476. entry = s_emplace_item(state, new_entry, probe_idx);
  477. if (p_elem) {
  478. *p_elem = &entry->element;
  479. }
  480. *was_created = 1;
  481. return AWS_OP_SUCCESS;
  482. }
  483. AWS_COMMON_API
  484. int aws_hash_table_put(struct aws_hash_table *map, const void *key, void *value, int *was_created) {
  485. struct aws_hash_element *p_elem;
  486. int was_created_fallback;
  487. if (!was_created) {
  488. was_created = &was_created_fallback;
  489. }
  490. if (aws_hash_table_create(map, key, &p_elem, was_created)) {
  491. return AWS_OP_ERR;
  492. }
  493. /*
  494. * aws_hash_table_create might resize the table, which results in map->p_impl changing.
  495. * It is therefore important to wait to read p_impl until after we return.
  496. */
  497. struct hash_table_state *state = map->p_impl;
  498. if (!*was_created) {
  499. if (p_elem->key != key && state->destroy_key_fn) {
  500. state->destroy_key_fn((void *)p_elem->key);
  501. }
  502. if (state->destroy_value_fn) {
  503. state->destroy_value_fn((void *)p_elem->value);
  504. }
  505. }
  506. p_elem->key = key;
  507. p_elem->value = value;
  508. return AWS_OP_SUCCESS;
  509. }
  510. /* Clears an entry. Does _not_ invoke destructor callbacks.
  511. * Returns the last slot touched (note that if we wrap, we'll report an index
  512. * lower than the original entry's index)
  513. */
  514. static size_t s_remove_entry(struct hash_table_state *state, struct hash_table_entry *entry) {
  515. AWS_PRECONDITION(hash_table_state_is_valid(state));
  516. AWS_PRECONDITION(state->entry_count > 0);
  517. AWS_PRECONDITION(
  518. entry >= &state->slots[0] && entry < &state->slots[state->size],
  519. "Input hash_table_entry [entry] pointer must point in the available slots.");
  520. state->entry_count--;
  521. /* Shift subsequent entries back until we find an entry that belongs at its
  522. * current position. This is important to ensure that subsequent searches
  523. * don't terminate at the removed element.
  524. */
  525. size_t index = s_index_for(state, entry);
  526. /* There is always at least one empty slot in the hash table, so this loop always terminates */
  527. while (1) {
  528. size_t next_index = (index + 1) & state->mask;
  529. /* If we hit an empty slot, stop */
  530. if (!state->slots[next_index].hash_code) {
  531. break;
  532. }
  533. /* If the next slot is at the start of the probe sequence, stop.
  534. * We know that nothing with an earlier home slot is after this;
  535. * otherwise this index-zero entry would have been evicted from its
  536. * home.
  537. */
  538. if ((state->slots[next_index].hash_code & state->mask) == next_index) {
  539. break;
  540. }
  541. /* Okay, shift this one back */
  542. state->slots[index] = state->slots[next_index];
  543. index = next_index;
  544. }
  545. /* Clear the entry we shifted out of */
  546. AWS_ZERO_STRUCT(state->slots[index]);
  547. AWS_RETURN_WITH_POSTCONDITION(index, hash_table_state_is_valid(state) && index <= state->size);
  548. }
  549. int aws_hash_table_remove(
  550. struct aws_hash_table *map,
  551. const void *key,
  552. struct aws_hash_element *p_value,
  553. int *was_present) {
  554. AWS_PRECONDITION(aws_hash_table_is_valid(map));
  555. AWS_PRECONDITION(
  556. p_value == NULL || AWS_OBJECT_PTR_IS_WRITABLE(p_value), "Input pointer [p_value] must be NULL or writable.");
  557. AWS_PRECONDITION(
  558. was_present == NULL || AWS_OBJECT_PTR_IS_WRITABLE(was_present),
  559. "Input pointer [was_present] must be NULL or writable.");
  560. struct hash_table_state *state = map->p_impl;
  561. uint64_t hash_code = s_hash_for(state, key);
  562. struct hash_table_entry *entry;
  563. int ignored;
  564. if (!was_present) {
  565. was_present = &ignored;
  566. }
  567. int rv = s_find_entry(state, hash_code, key, &entry, NULL);
  568. if (rv != AWS_ERROR_SUCCESS) {
  569. *was_present = 0;
  570. AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
  571. }
  572. *was_present = 1;
  573. if (p_value) {
  574. *p_value = entry->element;
  575. } else {
  576. if (state->destroy_key_fn) {
  577. state->destroy_key_fn((void *)entry->element.key);
  578. }
  579. if (state->destroy_value_fn) {
  580. state->destroy_value_fn(entry->element.value);
  581. }
  582. }
  583. s_remove_entry(state, entry);
  584. AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
  585. }
  586. int aws_hash_table_remove_element(struct aws_hash_table *map, struct aws_hash_element *p_value) {
  587. AWS_PRECONDITION(aws_hash_table_is_valid(map));
  588. AWS_PRECONDITION(p_value != NULL);
  589. struct hash_table_state *state = map->p_impl;
  590. struct hash_table_entry *entry = AWS_CONTAINER_OF(p_value, struct hash_table_entry, element);
  591. s_remove_entry(state, entry);
  592. AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
  593. }
  594. int aws_hash_table_foreach(
  595. struct aws_hash_table *map,
  596. int (*callback)(void *context, struct aws_hash_element *pElement),
  597. void *context) {
  598. for (struct aws_hash_iter iter = aws_hash_iter_begin(map); !aws_hash_iter_done(&iter); aws_hash_iter_next(&iter)) {
  599. int rv = callback(context, &iter.element);
  600. if (rv & AWS_COMMON_HASH_TABLE_ITER_ERROR) {
  601. int error = aws_last_error();
  602. if (error == AWS_ERROR_SUCCESS) {
  603. aws_raise_error(AWS_ERROR_UNKNOWN);
  604. }
  605. return AWS_OP_ERR;
  606. }
  607. if (rv & AWS_COMMON_HASH_TABLE_ITER_DELETE) {
  608. aws_hash_iter_delete(&iter, false);
  609. }
  610. if (!(rv & AWS_COMMON_HASH_TABLE_ITER_CONTINUE)) {
  611. break;
  612. }
  613. }
  614. return AWS_OP_SUCCESS;
  615. }
  616. bool aws_hash_table_eq(
  617. const struct aws_hash_table *a,
  618. const struct aws_hash_table *b,
  619. aws_hash_callback_eq_fn *value_eq) {
  620. AWS_PRECONDITION(aws_hash_table_is_valid(a));
  621. AWS_PRECONDITION(aws_hash_table_is_valid(b));
  622. AWS_PRECONDITION(value_eq != NULL);
  623. if (aws_hash_table_get_entry_count(a) != aws_hash_table_get_entry_count(b)) {
  624. AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
  625. }
  626. /*
  627. * Now that we have established that the two tables have the same number of
  628. * entries, we can simply iterate one and compare against the same key in
  629. * the other.
  630. */
  631. for (size_t i = 0; i < a->p_impl->size; ++i) {
  632. const struct hash_table_entry *const a_entry = &a->p_impl->slots[i];
  633. if (a_entry->hash_code == 0) {
  634. continue;
  635. }
  636. struct aws_hash_element *b_element = NULL;
  637. aws_hash_table_find(b, a_entry->element.key, &b_element);
  638. if (!b_element) {
  639. /* Key is present in A only */
  640. AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
  641. }
  642. if (!s_safe_eq_check(value_eq, a_entry->element.value, b_element->value)) {
  643. AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
  644. }
  645. }
  646. AWS_RETURN_WITH_POSTCONDITION(true, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
  647. }
  648. /**
  649. * Given an iterator, and a start slot, find the next available filled slot if it exists
  650. * Otherwise, return an iter that will return true for aws_hash_iter_done().
  651. * Note that aws_hash_iter_is_valid() need not hold on entry to the function, since
  652. * it can be called on a partially constructed iter from aws_hash_iter_begin().
  653. *
  654. * Note that calling this on an iterator which is "done" is idempotent: it will return another
  655. * iterator which is "done".
  656. */
  657. static inline void s_get_next_element(struct aws_hash_iter *iter, size_t start_slot) {
  658. AWS_PRECONDITION(iter != NULL);
  659. AWS_PRECONDITION(aws_hash_table_is_valid(iter->map));
  660. struct hash_table_state *state = iter->map->p_impl;
  661. size_t limit = iter->limit;
  662. for (size_t i = start_slot; i < limit; i++) {
  663. struct hash_table_entry *entry = &state->slots[i];
  664. if (entry->hash_code) {
  665. iter->element = entry->element;
  666. iter->slot = i;
  667. iter->status = AWS_HASH_ITER_STATUS_READY_FOR_USE;
  668. return;
  669. }
  670. }
  671. iter->element.key = NULL;
  672. iter->element.value = NULL;
  673. iter->slot = iter->limit;
  674. iter->status = AWS_HASH_ITER_STATUS_DONE;
  675. AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
  676. }
  677. struct aws_hash_iter aws_hash_iter_begin(const struct aws_hash_table *map) {
  678. AWS_PRECONDITION(aws_hash_table_is_valid(map));
  679. struct hash_table_state *state = map->p_impl;
  680. struct aws_hash_iter iter;
  681. AWS_ZERO_STRUCT(iter);
  682. iter.map = map;
  683. iter.limit = state->size;
  684. s_get_next_element(&iter, 0);
  685. AWS_RETURN_WITH_POSTCONDITION(
  686. iter,
  687. aws_hash_iter_is_valid(&iter) &&
  688. (iter.status == AWS_HASH_ITER_STATUS_DONE || iter.status == AWS_HASH_ITER_STATUS_READY_FOR_USE),
  689. "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
  690. }
  691. bool aws_hash_iter_done(const struct aws_hash_iter *iter) {
  692. AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
  693. AWS_PRECONDITION(
  694. iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
  695. "Input aws_hash_iter [iter] must either be done, or ready to use.");
  696. /*
  697. * SIZE_MAX is a valid (non-terminal) value for iter->slot in the event that
  698. * we delete slot 0. See comments in aws_hash_iter_delete.
  699. *
  700. * As such we must use == rather than >= here.
  701. */
  702. bool rval = (iter->slot == iter->limit);
  703. AWS_POSTCONDITION(
  704. iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
  705. "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
  706. AWS_POSTCONDITION(
  707. rval == (iter->status == AWS_HASH_ITER_STATUS_DONE),
  708. "Output bool [rval] must be true if and only if the status of [iter] is DONE.");
  709. AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
  710. return rval;
  711. }
  712. void aws_hash_iter_next(struct aws_hash_iter *iter) {
  713. AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
  714. #ifdef CBMC
  715. # pragma CPROVER check push
  716. # pragma CPROVER check disable "unsigned-overflow"
  717. #endif
  718. s_get_next_element(iter, iter->slot + 1);
  719. #ifdef CBMC
  720. # pragma CPROVER check pop
  721. #endif
  722. AWS_POSTCONDITION(
  723. iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
  724. "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
  725. AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
  726. }
  727. void aws_hash_iter_delete(struct aws_hash_iter *iter, bool destroy_contents) {
  728. AWS_PRECONDITION(
  729. iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, "Input aws_hash_iter [iter] must be ready for use.");
  730. AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
  731. AWS_PRECONDITION(
  732. iter->map->p_impl->entry_count > 0,
  733. "The hash_table_state pointed by input [iter] must contain at least one entry.");
  734. struct hash_table_state *state = iter->map->p_impl;
  735. if (destroy_contents) {
  736. if (state->destroy_key_fn) {
  737. state->destroy_key_fn((void *)iter->element.key);
  738. }
  739. if (state->destroy_value_fn) {
  740. state->destroy_value_fn(iter->element.value);
  741. }
  742. }
  743. size_t last_index = s_remove_entry(state, &state->slots[iter->slot]);
  744. /* If we shifted elements that are not part of the window we intend to iterate
  745. * over, it means we shifted an element that we already visited into the
  746. * iter->limit - 1 position. To avoid double iteration, we'll now reduce the
  747. * limit to compensate.
  748. *
  749. * Note that last_index cannot equal iter->slot, because slots[iter->slot]
  750. * is empty before we start walking the table.
  751. */
  752. if (last_index < iter->slot || last_index >= iter->limit) {
  753. iter->limit--;
  754. }
  755. /*
  756. * After removing this entry, the next entry might be in the same slot, or
  757. * in some later slot, or we might have no further entries.
  758. *
  759. * We also expect that the caller will call aws_hash_iter_done and aws_hash_iter_next
  760. * after this delete call. This gets a bit tricky if we just deleted the value
  761. * in slot 0, and a new value has shifted in.
  762. *
  763. * To deal with this, we'll just step back one slot, and let _next start iteration
  764. * at our current slot. Note that if we just deleted slot 0, this will result in
  765. * underflowing to SIZE_MAX; we have to take care in aws_hash_iter_done to avoid
  766. * treating this as an end-of-iteration condition.
  767. */
  768. #ifdef CBMC
  769. # pragma CPROVER check push
  770. # pragma CPROVER check disable "unsigned-overflow"
  771. #endif
  772. iter->slot--;
  773. #ifdef CBMC
  774. # pragma CPROVER check pop
  775. #endif
  776. iter->status = AWS_HASH_ITER_STATUS_DELETE_CALLED;
  777. AWS_POSTCONDITION(
  778. iter->status == AWS_HASH_ITER_STATUS_DELETE_CALLED,
  779. "The status of output aws_hash_iter [iter] must be DELETE_CALLED.");
  780. AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
  781. }
  782. void aws_hash_table_clear(struct aws_hash_table *map) {
  783. AWS_PRECONDITION(aws_hash_table_is_valid(map));
  784. struct hash_table_state *state = map->p_impl;
  785. /* Check that we have at least one destructor before iterating over the table */
  786. if (state->destroy_key_fn || state->destroy_value_fn) {
  787. for (size_t i = 0; i < state->size; ++i) {
  788. struct hash_table_entry *entry = &state->slots[i];
  789. if (!entry->hash_code) {
  790. continue;
  791. }
  792. if (state->destroy_key_fn) {
  793. state->destroy_key_fn((void *)entry->element.key);
  794. }
  795. if (state->destroy_value_fn) {
  796. state->destroy_value_fn(entry->element.value);
  797. }
  798. }
  799. }
  800. /* Since hash code 0 represents an empty slot we can just zero out the
  801. * entire table. */
  802. memset(state->slots, 0, sizeof(*state->slots) * state->size);
  803. state->entry_count = 0;
  804. AWS_POSTCONDITION(aws_hash_table_is_valid(map));
  805. }
  806. uint64_t aws_hash_c_string(const void *item) {
  807. AWS_PRECONDITION(aws_c_string_is_valid(item));
  808. const char *str = item;
  809. /* first digits of pi in hex */
  810. uint32_t b = 0x3243F6A8, c = 0x885A308D;
  811. hashlittle2(str, strlen(str), &c, &b);
  812. return ((uint64_t)b << 32) | c;
  813. }
  814. uint64_t aws_hash_string(const void *item) {
  815. AWS_PRECONDITION(aws_string_is_valid(item));
  816. const struct aws_string *str = item;
  817. /* first digits of pi in hex */
  818. uint32_t b = 0x3243F6A8, c = 0x885A308D;
  819. hashlittle2(aws_string_bytes(str), str->len, &c, &b);
  820. AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_string_is_valid(str));
  821. }
  822. uint64_t aws_hash_byte_cursor_ptr(const void *item) {
  823. AWS_PRECONDITION(aws_byte_cursor_is_valid(item));
  824. const struct aws_byte_cursor *cur = item;
  825. /* first digits of pi in hex */
  826. uint32_t b = 0x3243F6A8, c = 0x885A308D;
  827. hashlittle2(cur->ptr, cur->len, &c, &b);
  828. AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_byte_cursor_is_valid(cur));
  829. }
  830. uint64_t aws_hash_ptr(const void *item) {
  831. /* Since the numeric value of the pointer is considered, not the memory behind it, 0 is an acceptable value */
  832. /* first digits of e in hex
  833. * 2.b7e 1516 28ae d2a6 */
  834. uint32_t b = 0x2b7e1516, c = 0x28aed2a6;
  835. hashlittle2(&item, sizeof(item), &c, &b);
  836. return ((uint64_t)b << 32) | c;
  837. }
  838. uint64_t aws_hash_combine(uint64_t item1, uint64_t item2) {
  839. uint32_t b = item2 & 0xFFFFFFFF; /* LSB */
  840. uint32_t c = item2 >> 32; /* MSB */
  841. hashlittle2(&item1, sizeof(item1), &c, &b);
  842. return ((uint64_t)b << 32) | c;
  843. }
  844. bool aws_hash_callback_c_str_eq(const void *a, const void *b) {
  845. AWS_PRECONDITION(aws_c_string_is_valid(a));
  846. AWS_PRECONDITION(aws_c_string_is_valid(b));
  847. bool rval = !strcmp(a, b);
  848. AWS_RETURN_WITH_POSTCONDITION(rval, aws_c_string_is_valid(a) && aws_c_string_is_valid(b));
  849. }
  850. bool aws_hash_callback_string_eq(const void *a, const void *b) {
  851. AWS_PRECONDITION(aws_string_is_valid(a));
  852. AWS_PRECONDITION(aws_string_is_valid(b));
  853. bool rval = aws_string_eq(a, b);
  854. AWS_RETURN_WITH_POSTCONDITION(rval, aws_string_is_valid(a) && aws_string_is_valid(b));
  855. }
  856. void aws_hash_callback_string_destroy(void *a) {
  857. AWS_PRECONDITION(aws_string_is_valid(a));
  858. aws_string_destroy(a);
  859. }
  860. bool aws_ptr_eq(const void *a, const void *b) {
  861. return a == b;
  862. }
  863. /**
  864. * Best-effort check of hash_table_state data-structure invariants
  865. * Some invariants, such as that the number of entries is actually the
  866. * same as the entry_count field, would require a loop to check
  867. */
  868. bool aws_hash_table_is_valid(const struct aws_hash_table *map) {
  869. return map && map->p_impl && hash_table_state_is_valid(map->p_impl);
  870. }
  871. /**
  872. * Best-effort check of hash_table_state data-structure invariants
  873. * Some invariants, such as that the number of entries is actually the
  874. * same as the entry_count field, would require a loop to check
  875. */
  876. bool hash_table_state_is_valid(const struct hash_table_state *map) {
  877. if (!map) {
  878. return false;
  879. }
  880. bool hash_fn_nonnull = (map->hash_fn != NULL);
  881. bool equals_fn_nonnull = (map->equals_fn != NULL);
  882. /*destroy_key_fn and destroy_value_fn are both allowed to be NULL*/
  883. bool alloc_nonnull = (map->alloc != NULL);
  884. bool size_at_least_two = (map->size >= 2);
  885. bool size_is_power_of_two = aws_is_power_of_two(map->size);
  886. bool entry_count = (map->entry_count <= map->max_load);
  887. bool max_load = (map->max_load < map->size);
  888. bool mask_is_correct = (map->mask == (map->size - 1));
  889. bool max_load_factor_bounded = map->max_load_factor == 0.95; //(map->max_load_factor < 1.0);
  890. bool slots_allocated = AWS_MEM_IS_WRITABLE(&map->slots[0], sizeof(map->slots[0]) * map->size);
  891. return hash_fn_nonnull && equals_fn_nonnull && alloc_nonnull && size_at_least_two && size_is_power_of_two &&
  892. entry_count && max_load && mask_is_correct && max_load_factor_bounded && slots_allocated;
  893. }
  894. /**
  895. * Given a pointer to a hash_iter, checks that it is well-formed, with all data-structure invariants.
  896. */
  897. bool aws_hash_iter_is_valid(const struct aws_hash_iter *iter) {
  898. if (!iter) {
  899. return false;
  900. }
  901. if (!iter->map) {
  902. return false;
  903. }
  904. if (!aws_hash_table_is_valid(iter->map)) {
  905. return false;
  906. }
  907. if (iter->limit > iter->map->p_impl->size) {
  908. return false;
  909. }
  910. switch (iter->status) {
  911. case AWS_HASH_ITER_STATUS_DONE:
  912. /* Done iff slot == limit */
  913. return iter->slot == iter->limit;
  914. case AWS_HASH_ITER_STATUS_DELETE_CALLED:
  915. /* iter->slot can underflow to SIZE_MAX after a delete
  916. * see the comments for aws_hash_iter_delete() */
  917. return iter->slot <= iter->limit || iter->slot == SIZE_MAX;
  918. case AWS_HASH_ITER_STATUS_READY_FOR_USE:
  919. /* A slot must point to a valid location (i.e. hash_code != 0) */
  920. return iter->slot < iter->limit && iter->map->p_impl->slots[iter->slot].hash_code != 0;
  921. }
  922. /* Invalid status code */
  923. return false;
  924. }
  925. /**
  926. * Determine the total number of bytes needed for a hash-table with
  927. * "size" slots. If the result would overflow a size_t, return
  928. * AWS_OP_ERR; otherwise, return AWS_OP_SUCCESS with the result in
  929. * "required_bytes".
  930. */
  931. int hash_table_state_required_bytes(size_t size, size_t *required_bytes) {
  932. size_t elemsize;
  933. if (aws_mul_size_checked(size, sizeof(struct hash_table_entry), &elemsize)) {
  934. return AWS_OP_ERR;
  935. }
  936. if (aws_add_size_checked(elemsize, sizeof(struct hash_table_state), required_bytes)) {
  937. return AWS_OP_ERR;
  938. }
  939. return AWS_OP_SUCCESS;
  940. }