uhash.cpp 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. * Copyright (C) 1997-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. ******************************************************************************
  8. * Date Name Description
  9. * 03/22/00 aliu Adapted from original C++ ICU Hashtable.
  10. * 07/06/01 aliu Modified to support int32_t keys on
  11. * platforms with sizeof(void*) < 32.
  12. ******************************************************************************
  13. */
  14. #include <string_view>
  15. #include "uhash.h"
  16. #include "unicode/ustring.h"
  17. #include "cstring.h"
  18. #include "cmemory.h"
  19. #include "uassert.h"
  20. #include "ustr_imp.h"
  21. /* This hashtable is implemented as a double hash. All elements are
  22. * stored in a single array with no secondary storage for collision
  23. * resolution (no linked list, etc.). When there is a hash collision
  24. * (when two unequal keys have the same hashcode) we resolve this by
  25. * using a secondary hash. The secondary hash is an increment
  26. * computed as a hash function (a different one) of the primary
  27. * hashcode. This increment is added to the initial hash value to
  28. * obtain further slots assigned to the same hash code. For this to
  29. * work, the length of the array and the increment must be relatively
  30. * prime. The easiest way to achieve this is to have the length of
  31. * the array be prime, and the increment be any value from
  32. * 1..length-1.
  33. *
  34. * Hashcodes are 32-bit integers. We make sure all hashcodes are
  35. * non-negative by masking off the top bit. This has two effects: (1)
  36. * modulo arithmetic is simplified. If we allowed negative hashcodes,
  37. * then when we computed hashcode % length, we could get a negative
  38. * result, which we would then have to adjust back into range. It's
  39. * simpler to just make hashcodes non-negative. (2) It makes it easy
  40. * to check for empty vs. occupied slots in the table. We just mark
  41. * empty or deleted slots with a negative hashcode.
  42. *
  43. * The central function is _uhash_find(). This function looks for a
  44. * slot matching the given key and hashcode. If one is found, it
  45. * returns a pointer to that slot. If the table is full, and no match
  46. * is found, it returns nullptr -- in theory. This would make the code
  47. * more complicated, since all callers of _uhash_find() would then
  48. * have to check for a nullptr result. To keep this from happening, we
  49. * don't allow the table to fill. When there is only one
  50. * empty/deleted slot left, uhash_put() will refuse to increase the
  51. * count, and fail. This simplifies the code. In practice, one will
  52. * seldom encounter this using default UHashtables. However, if a
  53. * hashtable is set to a U_FIXED resize policy, or if memory is
  54. * exhausted, then the table may fill.
  55. *
  56. * High and low water ratios control rehashing. They establish levels
  57. * of fullness (from 0 to 1) outside of which the data array is
  58. * reallocated and repopulated. Setting the low water ratio to zero
  59. * means the table will never shrink. Setting the high water ratio to
  60. * one means the table will never grow. The ratios should be
  61. * coordinated with the ratio between successive elements of the
  62. * PRIMES table, so that when the primeIndex is incremented or
  63. * decremented during rehashing, it brings the ratio of count / length
  64. * back into the desired range (between low and high water ratios).
  65. */
  66. /********************************************************************
  67. * PRIVATE Constants, Macros
  68. ********************************************************************/
  69. /* This is a list of non-consecutive primes chosen such that
  70. * PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81
  71. * to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this
  72. * ratio is changed, the low and high water ratios should also be
  73. * adjusted to suit.
  74. *
  75. * These prime numbers were also chosen so that they are the largest
  76. * prime number while being less than a power of two.
  77. */
  78. static const int32_t PRIMES[] = {
  79. 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
  80. 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
  81. 16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
  82. 1073741789, 2147483647 /*, 4294967291 */
  83. };
  84. #define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
  85. #define DEFAULT_PRIME_INDEX 4
  86. /* These ratios are tuned to the PRIMES array such that a resize
  87. * places the table back into the zone of non-resizing. That is,
  88. * after a call to _uhash_rehash(), a subsequent call to
  89. * _uhash_rehash() should do nothing (should not churn). This is only
  90. * a potential problem with U_GROW_AND_SHRINK.
  91. */
  92. static const float RESIZE_POLICY_RATIO_TABLE[6] = {
  93. /* low, high water ratio */
  94. 0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */
  95. 0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */
  96. 0.0F, 1.0F /* U_FIXED: Never change size */
  97. };
  98. /*
  99. Invariants for hashcode values:
  100. * DELETED < 0
  101. * EMPTY < 0
  102. * Real hashes >= 0
  103. Hashcodes may not start out this way, but internally they are
  104. adjusted so that they are always positive. We assume 32-bit
  105. hashcodes; adjust these constants for other hashcode sizes.
  106. */
  107. #define HASH_DELETED ((int32_t) 0x80000000)
  108. #define HASH_EMPTY ((int32_t) HASH_DELETED + 1)
  109. #define IS_EMPTY_OR_DELETED(x) ((x) < 0)
  110. /* This macro expects a UHashTok.pointer as its keypointer and
  111. valuepointer parameters */
  112. #define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) UPRV_BLOCK_MACRO_BEGIN { \
  113. if (hash->keyDeleter != nullptr && keypointer != nullptr) { \
  114. (*hash->keyDeleter)(keypointer); \
  115. } \
  116. if (hash->valueDeleter != nullptr && valuepointer != nullptr) { \
  117. (*hash->valueDeleter)(valuepointer); \
  118. } \
  119. } UPRV_BLOCK_MACRO_END
  120. /*
  121. * Constants for hinting whether a key or value is an integer
  122. * or a pointer. If a hint bit is zero, then the associated
  123. * token is assumed to be an integer.
  124. */
  125. #define HINT_BOTH_INTEGERS (0)
  126. #define HINT_KEY_POINTER (1)
  127. #define HINT_VALUE_POINTER (2)
  128. #define HINT_ALLOW_ZERO (4)
  129. /********************************************************************
  130. * PRIVATE Implementation
  131. ********************************************************************/
  132. static UHashTok
  133. _uhash_setElement(UHashtable *hash, UHashElement* e,
  134. int32_t hashcode,
  135. UHashTok key, UHashTok value, int8_t hint) {
  136. UHashTok oldValue = e->value;
  137. if (hash->keyDeleter != nullptr && e->key.pointer != nullptr &&
  138. e->key.pointer != key.pointer) { /* Avoid double deletion */
  139. (*hash->keyDeleter)(e->key.pointer);
  140. }
  141. if (hash->valueDeleter != nullptr) {
  142. if (oldValue.pointer != nullptr &&
  143. oldValue.pointer != value.pointer) { /* Avoid double deletion */
  144. (*hash->valueDeleter)(oldValue.pointer);
  145. }
  146. oldValue.pointer = nullptr;
  147. }
  148. /* Compilers should copy the UHashTok union correctly, but even if
  149. * they do, memory heap tools (e.g. BoundsChecker) can get
  150. * confused when a pointer is cloaked in a union and then copied.
  151. * TO ALLEVIATE THIS, we use hints (based on what API the user is
  152. * calling) to copy pointers when we know the user thinks
  153. * something is a pointer. */
  154. if (hint & HINT_KEY_POINTER) {
  155. e->key.pointer = key.pointer;
  156. } else {
  157. e->key = key;
  158. }
  159. if (hint & HINT_VALUE_POINTER) {
  160. e->value.pointer = value.pointer;
  161. } else {
  162. e->value = value;
  163. }
  164. e->hashcode = hashcode;
  165. return oldValue;
  166. }
  167. /**
  168. * Assumes that the given element is not empty or deleted.
  169. */
  170. static UHashTok
  171. _uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) {
  172. UHashTok empty;
  173. U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode));
  174. --hash->count;
  175. empty.pointer = nullptr; empty.integer = 0;
  176. return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0);
  177. }
  178. static void
  179. _uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
  180. U_ASSERT(hash != nullptr);
  181. U_ASSERT(((int32_t)policy) >= 0);
  182. U_ASSERT(((int32_t)policy) < 3);
  183. hash->lowWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2];
  184. hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1];
  185. }
  186. /**
  187. * Allocate internal data array of a size determined by the given
  188. * prime index. If the index is out of range it is pinned into range.
  189. * If the allocation fails the status is set to
  190. * U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In
  191. * either case the previous array pointer is overwritten.
  192. *
  193. * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1.
  194. */
  195. static void
  196. _uhash_allocate(UHashtable *hash,
  197. int32_t primeIndex,
  198. UErrorCode *status) {
  199. UHashElement *p, *limit;
  200. UHashTok emptytok;
  201. if (U_FAILURE(*status)) return;
  202. U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH);
  203. hash->primeIndex = static_cast<int8_t>(primeIndex);
  204. hash->length = PRIMES[primeIndex];
  205. p = hash->elements = static_cast<UHashElement*>(
  206. uprv_malloc(sizeof(UHashElement) * hash->length));
  207. if (hash->elements == nullptr) {
  208. *status = U_MEMORY_ALLOCATION_ERROR;
  209. return;
  210. }
  211. emptytok.pointer = nullptr; /* Only one of these two is needed */
  212. emptytok.integer = 0; /* but we don't know which one. */
  213. limit = p + hash->length;
  214. while (p < limit) {
  215. p->key = emptytok;
  216. p->value = emptytok;
  217. p->hashcode = HASH_EMPTY;
  218. ++p;
  219. }
  220. hash->count = 0;
  221. hash->lowWaterMark = static_cast<int32_t>(hash->length * hash->lowWaterRatio);
  222. hash->highWaterMark = static_cast<int32_t>(hash->length * hash->highWaterRatio);
  223. }
  224. static UHashtable*
  225. _uhash_init(UHashtable *result,
  226. UHashFunction *keyHash,
  227. UKeyComparator *keyComp,
  228. UValueComparator *valueComp,
  229. int32_t primeIndex,
  230. UErrorCode *status)
  231. {
  232. if (U_FAILURE(*status)) return nullptr;
  233. U_ASSERT(keyHash != nullptr);
  234. U_ASSERT(keyComp != nullptr);
  235. result->keyHasher = keyHash;
  236. result->keyComparator = keyComp;
  237. result->valueComparator = valueComp;
  238. result->keyDeleter = nullptr;
  239. result->valueDeleter = nullptr;
  240. result->allocated = false;
  241. _uhash_internalSetResizePolicy(result, U_GROW);
  242. _uhash_allocate(result, primeIndex, status);
  243. if (U_FAILURE(*status)) {
  244. return nullptr;
  245. }
  246. return result;
  247. }
  248. static UHashtable*
  249. _uhash_create(UHashFunction *keyHash,
  250. UKeyComparator *keyComp,
  251. UValueComparator *valueComp,
  252. int32_t primeIndex,
  253. UErrorCode *status) {
  254. UHashtable *result;
  255. if (U_FAILURE(*status)) return nullptr;
  256. result = static_cast<UHashtable*>(uprv_malloc(sizeof(UHashtable)));
  257. if (result == nullptr) {
  258. *status = U_MEMORY_ALLOCATION_ERROR;
  259. return nullptr;
  260. }
  261. _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status);
  262. result->allocated = true;
  263. if (U_FAILURE(*status)) {
  264. uprv_free(result);
  265. return nullptr;
  266. }
  267. return result;
  268. }
  269. /**
  270. * Look for a key in the table, or if no such key exists, the first
  271. * empty slot matching the given hashcode. Keys are compared using
  272. * the keyComparator function.
  273. *
  274. * First find the start position, which is the hashcode modulo
  275. * the length. Test it to see if it is:
  276. *
  277. * a. identical: First check the hash values for a quick check,
  278. * then compare keys for equality using keyComparator.
  279. * b. deleted
  280. * c. empty
  281. *
  282. * Stop if it is identical or empty, otherwise continue by adding a
  283. * "jump" value (moduloing by the length again to keep it within
  284. * range) and retesting. For efficiency, there need enough empty
  285. * values so that the searches stop within a reasonable amount of time.
  286. * This can be changed by changing the high/low water marks.
  287. *
  288. * In theory, this function can return nullptr, if it is full (no empty
  289. * or deleted slots) and if no matching key is found. In practice, we
  290. * prevent this elsewhere (in uhash_put) by making sure the last slot
  291. * in the table is never filled.
  292. *
  293. * The size of the table should be prime for this algorithm to work;
  294. * otherwise we are not guaranteed that the jump value (the secondary
  295. * hash) is relatively prime to the table length.
  296. */
  297. static UHashElement*
  298. _uhash_find(const UHashtable *hash, UHashTok key,
  299. int32_t hashcode) {
  300. int32_t firstDeleted = -1; /* assume invalid index */
  301. int32_t theIndex, startIndex;
  302. int32_t jump = 0; /* lazy evaluate */
  303. int32_t tableHash;
  304. UHashElement *elements = hash->elements;
  305. hashcode &= 0x7FFFFFFF; /* must be positive */
  306. startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length;
  307. do {
  308. tableHash = elements[theIndex].hashcode;
  309. if (tableHash == hashcode) { /* quick check */
  310. if ((*hash->keyComparator)(key, elements[theIndex].key)) {
  311. return &(elements[theIndex]);
  312. }
  313. } else if (!IS_EMPTY_OR_DELETED(tableHash)) {
  314. /* We have hit a slot which contains a key-value pair,
  315. * but for which the hash code does not match. Keep
  316. * looking.
  317. */
  318. } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */
  319. break;
  320. } else if (firstDeleted < 0) { /* remember first deleted */
  321. firstDeleted = theIndex;
  322. }
  323. if (jump == 0) { /* lazy compute jump */
  324. /* The jump value must be relatively prime to the table
  325. * length. As long as the length is prime, then any value
  326. * 1..length-1 will be relatively prime to it.
  327. */
  328. jump = (hashcode % (hash->length - 1)) + 1;
  329. }
  330. theIndex = (theIndex + jump) % hash->length;
  331. } while (theIndex != startIndex);
  332. if (firstDeleted >= 0) {
  333. theIndex = firstDeleted; /* reset if had deleted slot */
  334. } else if (tableHash != HASH_EMPTY) {
  335. /* We get to this point if the hashtable is full (no empty or
  336. * deleted slots), and we've failed to find a match. THIS
  337. * WILL NEVER HAPPEN as long as uhash_put() makes sure that
  338. * count is always < length.
  339. */
  340. UPRV_UNREACHABLE_EXIT;
  341. }
  342. return &(elements[theIndex]);
  343. }
  344. /**
  345. * Attempt to grow or shrink the data arrays in order to make the
  346. * count fit between the high and low water marks. hash_put() and
  347. * hash_remove() call this method when the count exceeds the high or
  348. * low water marks. This method may do nothing, if memory allocation
  349. * fails, or if the count is already in range, or if the length is
  350. * already at the low or high limit. In any case, upon return the
  351. * arrays will be valid.
  352. */
  353. static void
  354. _uhash_rehash(UHashtable *hash, UErrorCode *status) {
  355. UHashElement *old = hash->elements;
  356. int32_t oldLength = hash->length;
  357. int32_t newPrimeIndex = hash->primeIndex;
  358. int32_t i;
  359. if (hash->count > hash->highWaterMark) {
  360. if (++newPrimeIndex >= PRIMES_LENGTH) {
  361. return;
  362. }
  363. } else if (hash->count < hash->lowWaterMark) {
  364. if (--newPrimeIndex < 0) {
  365. return;
  366. }
  367. } else {
  368. return;
  369. }
  370. _uhash_allocate(hash, newPrimeIndex, status);
  371. if (U_FAILURE(*status)) {
  372. hash->elements = old;
  373. hash->length = oldLength;
  374. return;
  375. }
  376. for (i = oldLength - 1; i >= 0; --i) {
  377. if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) {
  378. UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode);
  379. U_ASSERT(e != nullptr);
  380. U_ASSERT(e->hashcode == HASH_EMPTY);
  381. e->key = old[i].key;
  382. e->value = old[i].value;
  383. e->hashcode = old[i].hashcode;
  384. ++hash->count;
  385. }
  386. }
  387. uprv_free(old);
  388. }
  389. static UHashTok
  390. _uhash_remove(UHashtable *hash,
  391. UHashTok key) {
  392. /* First find the position of the key in the table. If the object
  393. * has not been removed already, remove it. If the user wanted
  394. * keys deleted, then delete it also. We have to put a special
  395. * hashcode in that position that means that something has been
  396. * deleted, since when we do a find, we have to continue PAST any
  397. * deleted values.
  398. */
  399. UHashTok result;
  400. UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key));
  401. U_ASSERT(e != nullptr);
  402. result.pointer = nullptr;
  403. result.integer = 0;
  404. if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
  405. result = _uhash_internalRemoveElement(hash, e);
  406. if (hash->count < hash->lowWaterMark) {
  407. UErrorCode status = U_ZERO_ERROR;
  408. _uhash_rehash(hash, &status);
  409. }
  410. }
  411. return result;
  412. }
  413. static UHashTok
  414. _uhash_put(UHashtable *hash,
  415. UHashTok key,
  416. UHashTok value,
  417. int8_t hint,
  418. UErrorCode *status) {
  419. /* Put finds the position in the table for the new value. If the
  420. * key is already in the table, it is deleted, if there is a
  421. * non-nullptr keyDeleter. Then the key, the hash and the value are
  422. * all put at the position in their respective arrays.
  423. */
  424. int32_t hashcode;
  425. UHashElement* e;
  426. UHashTok emptytok;
  427. if (U_FAILURE(*status)) {
  428. goto err;
  429. }
  430. U_ASSERT(hash != nullptr);
  431. if ((hint & HINT_VALUE_POINTER) ?
  432. value.pointer == nullptr :
  433. value.integer == 0 && (hint & HINT_ALLOW_ZERO) == 0) {
  434. /* Disallow storage of nullptr values, since nullptr is returned by
  435. * get() to indicate an absent key. Storing nullptr == removing.
  436. */
  437. return _uhash_remove(hash, key);
  438. }
  439. if (hash->count > hash->highWaterMark) {
  440. _uhash_rehash(hash, status);
  441. if (U_FAILURE(*status)) {
  442. goto err;
  443. }
  444. }
  445. hashcode = (*hash->keyHasher)(key);
  446. e = _uhash_find(hash, key, hashcode);
  447. U_ASSERT(e != nullptr);
  448. if (IS_EMPTY_OR_DELETED(e->hashcode)) {
  449. /* Important: We must never actually fill the table up. If we
  450. * do so, then _uhash_find() will return nullptr, and we'll have
  451. * to check for nullptr after every call to _uhash_find(). To
  452. * avoid this we make sure there is always at least one empty
  453. * or deleted slot in the table. This only is a problem if we
  454. * are out of memory and rehash isn't working.
  455. */
  456. ++hash->count;
  457. if (hash->count == hash->length) {
  458. /* Don't allow count to reach length */
  459. --hash->count;
  460. *status = U_MEMORY_ALLOCATION_ERROR;
  461. goto err;
  462. }
  463. }
  464. /* We must in all cases handle storage properly. If there was an
  465. * old key, then it must be deleted (if the deleter != nullptr).
  466. * Make hashcodes stored in table positive.
  467. */
  468. return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint);
  469. err:
  470. /* If the deleters are non-nullptr, this method adopts its key and/or
  471. * value arguments, and we must be sure to delete the key and/or
  472. * value in all cases, even upon failure.
  473. */
  474. HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer);
  475. emptytok.pointer = nullptr; emptytok.integer = 0;
  476. return emptytok;
  477. }
  478. /********************************************************************
  479. * PUBLIC API
  480. ********************************************************************/
  481. U_CAPI UHashtable* U_EXPORT2
  482. uhash_open(UHashFunction *keyHash,
  483. UKeyComparator *keyComp,
  484. UValueComparator *valueComp,
  485. UErrorCode *status) {
  486. return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
  487. }
  488. U_CAPI UHashtable* U_EXPORT2
  489. uhash_openSize(UHashFunction *keyHash,
  490. UKeyComparator *keyComp,
  491. UValueComparator *valueComp,
  492. int32_t size,
  493. UErrorCode *status) {
  494. /* Find the smallest index i for which PRIMES[i] >= size. */
  495. int32_t i = 0;
  496. while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
  497. ++i;
  498. }
  499. return _uhash_create(keyHash, keyComp, valueComp, i, status);
  500. }
  501. U_CAPI UHashtable* U_EXPORT2
  502. uhash_init(UHashtable *fillinResult,
  503. UHashFunction *keyHash,
  504. UKeyComparator *keyComp,
  505. UValueComparator *valueComp,
  506. UErrorCode *status) {
  507. return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
  508. }
  509. U_CAPI UHashtable* U_EXPORT2
  510. uhash_initSize(UHashtable *fillinResult,
  511. UHashFunction *keyHash,
  512. UKeyComparator *keyComp,
  513. UValueComparator *valueComp,
  514. int32_t size,
  515. UErrorCode *status) {
  516. // Find the smallest index i for which PRIMES[i] >= size.
  517. int32_t i = 0;
  518. while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
  519. ++i;
  520. }
  521. return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
  522. }
  523. U_CAPI void U_EXPORT2
  524. uhash_close(UHashtable *hash) {
  525. if (hash == nullptr) {
  526. return;
  527. }
  528. if (hash->elements != nullptr) {
  529. if (hash->keyDeleter != nullptr || hash->valueDeleter != nullptr) {
  530. int32_t pos=UHASH_FIRST;
  531. UHashElement *e;
  532. while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != nullptr) {
  533. HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer);
  534. }
  535. }
  536. uprv_free(hash->elements);
  537. hash->elements = nullptr;
  538. }
  539. if (hash->allocated) {
  540. uprv_free(hash);
  541. }
  542. }
  543. U_CAPI UHashFunction *U_EXPORT2
  544. uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) {
  545. UHashFunction *result = hash->keyHasher;
  546. hash->keyHasher = fn;
  547. return result;
  548. }
  549. U_CAPI UKeyComparator *U_EXPORT2
  550. uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
  551. UKeyComparator *result = hash->keyComparator;
  552. hash->keyComparator = fn;
  553. return result;
  554. }
  555. U_CAPI UValueComparator *U_EXPORT2
  556. uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
  557. UValueComparator *result = hash->valueComparator;
  558. hash->valueComparator = fn;
  559. return result;
  560. }
  561. U_CAPI UObjectDeleter *U_EXPORT2
  562. uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) {
  563. UObjectDeleter *result = hash->keyDeleter;
  564. hash->keyDeleter = fn;
  565. return result;
  566. }
  567. U_CAPI UObjectDeleter *U_EXPORT2
  568. uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) {
  569. UObjectDeleter *result = hash->valueDeleter;
  570. hash->valueDeleter = fn;
  571. return result;
  572. }
  573. U_CAPI void U_EXPORT2
  574. uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
  575. UErrorCode status = U_ZERO_ERROR;
  576. _uhash_internalSetResizePolicy(hash, policy);
  577. hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
  578. hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
  579. _uhash_rehash(hash, &status);
  580. }
  581. U_CAPI int32_t U_EXPORT2
  582. uhash_count(const UHashtable *hash) {
  583. return hash->count;
  584. }
  585. U_CAPI void* U_EXPORT2
  586. uhash_get(const UHashtable *hash,
  587. const void* key) {
  588. UHashTok keyholder;
  589. keyholder.pointer = (void*) key;
  590. return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
  591. }
  592. U_CAPI void* U_EXPORT2
  593. uhash_iget(const UHashtable *hash,
  594. int32_t key) {
  595. UHashTok keyholder;
  596. keyholder.integer = key;
  597. return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
  598. }
  599. U_CAPI int32_t U_EXPORT2
  600. uhash_geti(const UHashtable *hash,
  601. const void* key) {
  602. UHashTok keyholder;
  603. keyholder.pointer = (void*) key;
  604. return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
  605. }
  606. U_CAPI int32_t U_EXPORT2
  607. uhash_igeti(const UHashtable *hash,
  608. int32_t key) {
  609. UHashTok keyholder;
  610. keyholder.integer = key;
  611. return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
  612. }
  613. U_CAPI int32_t U_EXPORT2
  614. uhash_getiAndFound(const UHashtable *hash,
  615. const void *key,
  616. UBool *found) {
  617. UHashTok keyholder;
  618. keyholder.pointer = (void *)key;
  619. const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
  620. *found = !IS_EMPTY_OR_DELETED(e->hashcode);
  621. return e->value.integer;
  622. }
  623. U_CAPI int32_t U_EXPORT2
  624. uhash_igetiAndFound(const UHashtable *hash,
  625. int32_t key,
  626. UBool *found) {
  627. UHashTok keyholder;
  628. keyholder.integer = key;
  629. const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
  630. *found = !IS_EMPTY_OR_DELETED(e->hashcode);
  631. return e->value.integer;
  632. }
  633. U_CAPI void* U_EXPORT2
  634. uhash_put(UHashtable *hash,
  635. void* key,
  636. void* value,
  637. UErrorCode *status) {
  638. UHashTok keyholder, valueholder;
  639. keyholder.pointer = key;
  640. valueholder.pointer = value;
  641. return _uhash_put(hash, keyholder, valueholder,
  642. HINT_KEY_POINTER | HINT_VALUE_POINTER,
  643. status).pointer;
  644. }
  645. U_CAPI void* U_EXPORT2
  646. uhash_iput(UHashtable *hash,
  647. int32_t key,
  648. void* value,
  649. UErrorCode *status) {
  650. UHashTok keyholder, valueholder;
  651. keyholder.integer = key;
  652. valueholder.pointer = value;
  653. return _uhash_put(hash, keyholder, valueholder,
  654. HINT_VALUE_POINTER,
  655. status).pointer;
  656. }
  657. U_CAPI int32_t U_EXPORT2
  658. uhash_puti(UHashtable *hash,
  659. void* key,
  660. int32_t value,
  661. UErrorCode *status) {
  662. UHashTok keyholder, valueholder;
  663. keyholder.pointer = key;
  664. valueholder.integer = value;
  665. return _uhash_put(hash, keyholder, valueholder,
  666. HINT_KEY_POINTER,
  667. status).integer;
  668. }
  669. U_CAPI int32_t U_EXPORT2
  670. uhash_iputi(UHashtable *hash,
  671. int32_t key,
  672. int32_t value,
  673. UErrorCode *status) {
  674. UHashTok keyholder, valueholder;
  675. keyholder.integer = key;
  676. valueholder.integer = value;
  677. return _uhash_put(hash, keyholder, valueholder,
  678. HINT_BOTH_INTEGERS,
  679. status).integer;
  680. }
  681. U_CAPI int32_t U_EXPORT2
  682. uhash_putiAllowZero(UHashtable *hash,
  683. void *key,
  684. int32_t value,
  685. UErrorCode *status) {
  686. UHashTok keyholder, valueholder;
  687. keyholder.pointer = key;
  688. valueholder.integer = value;
  689. return _uhash_put(hash, keyholder, valueholder,
  690. HINT_KEY_POINTER | HINT_ALLOW_ZERO,
  691. status).integer;
  692. }
  693. U_CAPI int32_t U_EXPORT2
  694. uhash_iputiAllowZero(UHashtable *hash,
  695. int32_t key,
  696. int32_t value,
  697. UErrorCode *status) {
  698. UHashTok keyholder, valueholder;
  699. keyholder.integer = key;
  700. valueholder.integer = value;
  701. return _uhash_put(hash, keyholder, valueholder,
  702. HINT_BOTH_INTEGERS | HINT_ALLOW_ZERO,
  703. status).integer;
  704. }
  705. U_CAPI void* U_EXPORT2
  706. uhash_remove(UHashtable *hash,
  707. const void* key) {
  708. UHashTok keyholder;
  709. keyholder.pointer = (void*) key;
  710. return _uhash_remove(hash, keyholder).pointer;
  711. }
  712. U_CAPI void* U_EXPORT2
  713. uhash_iremove(UHashtable *hash,
  714. int32_t key) {
  715. UHashTok keyholder;
  716. keyholder.integer = key;
  717. return _uhash_remove(hash, keyholder).pointer;
  718. }
  719. U_CAPI int32_t U_EXPORT2
  720. uhash_removei(UHashtable *hash,
  721. const void* key) {
  722. UHashTok keyholder;
  723. keyholder.pointer = (void*) key;
  724. return _uhash_remove(hash, keyholder).integer;
  725. }
  726. U_CAPI int32_t U_EXPORT2
  727. uhash_iremovei(UHashtable *hash,
  728. int32_t key) {
  729. UHashTok keyholder;
  730. keyholder.integer = key;
  731. return _uhash_remove(hash, keyholder).integer;
  732. }
  733. U_CAPI void U_EXPORT2
  734. uhash_removeAll(UHashtable *hash) {
  735. int32_t pos = UHASH_FIRST;
  736. const UHashElement *e;
  737. U_ASSERT(hash != nullptr);
  738. if (hash->count != 0) {
  739. while ((e = uhash_nextElement(hash, &pos)) != nullptr) {
  740. uhash_removeElement(hash, e);
  741. }
  742. }
  743. U_ASSERT(hash->count == 0);
  744. }
  745. U_CAPI UBool U_EXPORT2
  746. uhash_containsKey(const UHashtable *hash, const void *key) {
  747. UHashTok keyholder;
  748. keyholder.pointer = (void *)key;
  749. const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
  750. return !IS_EMPTY_OR_DELETED(e->hashcode);
  751. }
  752. /**
  753. * Returns true if the UHashtable contains an item with this integer key.
  754. *
  755. * @param hash The target UHashtable.
  756. * @param key An integer key stored in a hashtable
  757. * @return true if the key is found.
  758. */
  759. U_CAPI UBool U_EXPORT2
  760. uhash_icontainsKey(const UHashtable *hash, int32_t key) {
  761. UHashTok keyholder;
  762. keyholder.integer = key;
  763. const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
  764. return !IS_EMPTY_OR_DELETED(e->hashcode);
  765. }
  766. U_CAPI const UHashElement* U_EXPORT2
  767. uhash_find(const UHashtable *hash, const void* key) {
  768. UHashTok keyholder;
  769. const UHashElement *e;
  770. keyholder.pointer = (void*) key;
  771. e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
  772. return IS_EMPTY_OR_DELETED(e->hashcode) ? nullptr : e;
  773. }
  774. U_CAPI const UHashElement* U_EXPORT2
  775. uhash_nextElement(const UHashtable *hash, int32_t *pos) {
  776. /* Walk through the array until we find an element that is not
  777. * EMPTY and not DELETED.
  778. */
  779. int32_t i;
  780. U_ASSERT(hash != nullptr);
  781. for (i = *pos + 1; i < hash->length; ++i) {
  782. if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) {
  783. *pos = i;
  784. return &(hash->elements[i]);
  785. }
  786. }
  787. /* No more elements */
  788. return nullptr;
  789. }
  790. U_CAPI void* U_EXPORT2
  791. uhash_removeElement(UHashtable *hash, const UHashElement* e) {
  792. U_ASSERT(hash != nullptr);
  793. U_ASSERT(e != nullptr);
  794. if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
  795. UHashElement *nce = (UHashElement *)e;
  796. return _uhash_internalRemoveElement(hash, nce).pointer;
  797. }
  798. return nullptr;
  799. }
  800. /********************************************************************
  801. * UHashTok convenience
  802. ********************************************************************/
  803. /**
  804. * Return a UHashTok for an integer.
  805. */
  806. /*U_CAPI UHashTok U_EXPORT2
  807. uhash_toki(int32_t i) {
  808. UHashTok tok;
  809. tok.integer = i;
  810. return tok;
  811. }*/
  812. /**
  813. * Return a UHashTok for a pointer.
  814. */
  815. /*U_CAPI UHashTok U_EXPORT2
  816. uhash_tokp(void* p) {
  817. UHashTok tok;
  818. tok.pointer = p;
  819. return tok;
  820. }*/
  821. /********************************************************************
  822. * PUBLIC Key Hash Functions
  823. ********************************************************************/
  824. U_CAPI int32_t U_EXPORT2
  825. uhash_hashUChars(const UHashTok key) {
  826. const char16_t *s = (const char16_t *)key.pointer;
  827. return s == nullptr ? 0 : ustr_hashUCharsN(s, u_strlen(s));
  828. }
  829. U_CAPI int32_t U_EXPORT2
  830. uhash_hashChars(const UHashTok key) {
  831. const char *s = (const char *)key.pointer;
  832. return s == nullptr ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s))));
  833. }
  834. U_CAPI int32_t U_EXPORT2
  835. uhash_hashIChars(const UHashTok key) {
  836. const char *s = (const char *)key.pointer;
  837. return s == nullptr ? 0 : ustr_hashICharsN(s, static_cast<int32_t>(uprv_strlen(s)));
  838. }
  839. U_CAPI int32_t U_EXPORT2
  840. uhash_hashIStringView(const UHashTok key) {
  841. const std::string_view* s = static_cast<std::string_view*>(key.pointer);
  842. return s == nullptr ? 0 : ustr_hashICharsN(s->data(), static_cast<int32_t>(s->size()));
  843. }
  844. U_CAPI UBool U_EXPORT2
  845. uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
  846. int32_t count1, count2, pos, i;
  847. if(hash1==hash2){
  848. return true;
  849. }
  850. /*
  851. * Make sure that we are comparing 2 valid hashes of the same type
  852. * with valid comparison functions.
  853. * Without valid comparison functions, a binary comparison
  854. * of the hash values will yield random results on machines
  855. * with 64-bit pointers and 32-bit integer hashes.
  856. * A valueComparator is normally optional.
  857. */
  858. if (hash1==nullptr || hash2==nullptr ||
  859. hash1->keyComparator != hash2->keyComparator ||
  860. hash1->valueComparator != hash2->valueComparator ||
  861. hash1->valueComparator == nullptr)
  862. {
  863. /*
  864. Normally we would return an error here about incompatible hash tables,
  865. but we return false instead.
  866. */
  867. return false;
  868. }
  869. count1 = uhash_count(hash1);
  870. count2 = uhash_count(hash2);
  871. if(count1!=count2){
  872. return false;
  873. }
  874. pos=UHASH_FIRST;
  875. for(i=0; i<count1; i++){
  876. const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
  877. const UHashTok key1 = elem1->key;
  878. const UHashTok val1 = elem1->value;
  879. /* here the keys are not compared, instead the key form hash1 is used to fetch
  880. * value from hash2. If the hashes are equal then then both hashes should
  881. * contain equal values for the same key!
  882. */
  883. const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
  884. const UHashTok val2 = elem2->value;
  885. if(hash1->valueComparator(val1, val2)==false){
  886. return false;
  887. }
  888. }
  889. return true;
  890. }
  891. /********************************************************************
  892. * PUBLIC Comparator Functions
  893. ********************************************************************/
  894. U_CAPI UBool U_EXPORT2
  895. uhash_compareUChars(const UHashTok key1, const UHashTok key2) {
  896. const char16_t *p1 = (const char16_t*) key1.pointer;
  897. const char16_t *p2 = (const char16_t*) key2.pointer;
  898. if (p1 == p2) {
  899. return true;
  900. }
  901. if (p1 == nullptr || p2 == nullptr) {
  902. return false;
  903. }
  904. while (*p1 != 0 && *p1 == *p2) {
  905. ++p1;
  906. ++p2;
  907. }
  908. return *p1 == *p2;
  909. }
  910. U_CAPI UBool U_EXPORT2
  911. uhash_compareChars(const UHashTok key1, const UHashTok key2) {
  912. const char *p1 = (const char*) key1.pointer;
  913. const char *p2 = (const char*) key2.pointer;
  914. if (p1 == p2) {
  915. return true;
  916. }
  917. if (p1 == nullptr || p2 == nullptr) {
  918. return false;
  919. }
  920. while (*p1 != 0 && *p1 == *p2) {
  921. ++p1;
  922. ++p2;
  923. }
  924. return *p1 == *p2;
  925. }
  926. U_CAPI UBool U_EXPORT2
  927. uhash_compareIChars(const UHashTok key1, const UHashTok key2) {
  928. const char *p1 = (const char*) key1.pointer;
  929. const char *p2 = (const char*) key2.pointer;
  930. if (p1 == p2) {
  931. return true;
  932. }
  933. if (p1 == nullptr || p2 == nullptr) {
  934. return false;
  935. }
  936. while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) {
  937. ++p1;
  938. ++p2;
  939. }
  940. return *p1 == *p2;
  941. }
  942. U_CAPI UBool U_EXPORT2
  943. uhash_compareIStringView(const UHashTok key1, const UHashTok key2) {
  944. const std::string_view* p1 = static_cast<std::string_view*>(key1.pointer);
  945. const std::string_view* p2 = static_cast<std::string_view*>(key2.pointer);
  946. if (p1 == p2) {
  947. return true;
  948. }
  949. if (p1 == nullptr || p2 == nullptr) {
  950. return false;
  951. }
  952. const std::string_view& v1 = *p1;
  953. const std::string_view& v2 = *p2;
  954. if (v1.size() != v2.size()) {
  955. return false;
  956. }
  957. for (size_t i = 0; i < v1.size(); ++i) {
  958. if (uprv_tolower(v1[i]) != uprv_tolower(v2[i])) {
  959. return false;
  960. }
  961. }
  962. return true;
  963. }
  964. /********************************************************************
  965. * PUBLIC int32_t Support Functions
  966. ********************************************************************/
  967. U_CAPI int32_t U_EXPORT2
  968. uhash_hashLong(const UHashTok key) {
  969. return key.integer;
  970. }
  971. U_CAPI UBool U_EXPORT2
  972. uhash_compareLong(const UHashTok key1, const UHashTok key2) {
  973. return key1.integer == key2.integer;
  974. }