hamt.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. /*
  2. * Hash Array Mapped Trie (HAMT) implementation
  3. *
  4. * Copyright (C) 2001-2007 Peter Johnson
  5. *
  6. * Based on the paper "Ideal Hash Tries" by Phil Bagwell [2000].
  7. * One algorithmic change from that described in the paper: we use the LSB's
  8. * of the key to index the root table and move upward in the key rather than
  9. * use the MSBs as described in the paper. The LSBs have more entropy.
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. * 1. Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * 2. Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in the
  18. * documentation and/or other materials provided with the distribution.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
  21. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
  24. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30. * POSSIBILITY OF SUCH DAMAGE.
  31. */
  32. #include "util.h"
  33. #include <ctype.h>
  34. #include "libyasm-stdint.h"
  35. #include "coretype.h"
  36. #include "hamt.h"
  37. struct HAMTEntry {
  38. STAILQ_ENTRY(HAMTEntry) next; /* next hash table entry */
  39. /*@dependent@*/ const char *str; /* string being hashed */
  40. /*@owned@*/ void *data; /* data pointer being stored */
  41. };
  42. typedef struct HAMTNode {
  43. unsigned long BitMapKey; /* 32 bits, bitmap or hash key */
  44. uintptr_t BaseValue; /* Base of HAMTNode list or value */
  45. } HAMTNode;
  46. struct HAMT {
  47. STAILQ_HEAD(HAMTEntryHead, HAMTEntry) entries;
  48. HAMTNode *root;
  49. /*@exits@*/ void (*error_func) (const char *file, unsigned int line,
  50. const char *message);
  51. unsigned long (*HashKey) (const char *key);
  52. unsigned long (*ReHashKey) (const char *key, int Level);
  53. int (*CmpKey) (const char *s1, const char *s2);
  54. };
  55. /* XXX make a portable version of this. This depends on the pointer being
  56. * 4 or 2-byte aligned (as it uses the LSB of the pointer variable to store
  57. * the subtrie flag!
  58. */
  59. #define IsSubTrie(n) ((n)->BaseValue & 1)
  60. #define SetSubTrie(h, n, v) do { \
  61. if ((uintptr_t)(v) & 1) \
  62. h->error_func(__FILE__, __LINE__, \
  63. N_("Subtrie is seen as subtrie before flag is set (misaligned?)")); \
  64. (n)->BaseValue = (uintptr_t)(v) | 1; \
  65. } while (0)
  66. #define SetValue(h, n, v) do { \
  67. if ((uintptr_t)(v) & 1) \
  68. h->error_func(__FILE__, __LINE__, \
  69. N_("Value is seen as subtrie (misaligned?)")); \
  70. (n)->BaseValue = (uintptr_t)(v); \
  71. } while (0)
  72. #define GetSubTrie(n) (HAMTNode *)(((n)->BaseValue | 1) ^ 1)
  73. static unsigned long
  74. HashKey(const char *key)
  75. {
  76. unsigned long a=31415, b=27183, vHash;
  77. for (vHash=0; *key; key++, a*=b)
  78. vHash = a*vHash + *key;
  79. return vHash;
  80. }
  81. static unsigned long
  82. ReHashKey(const char *key, int Level)
  83. {
  84. unsigned long a=31415, b=27183, vHash;
  85. for (vHash=0; *key; key++, a*=b)
  86. vHash = a*vHash*(unsigned long)Level + *key;
  87. return vHash;
  88. }
  89. static unsigned long
  90. HashKey_nocase(const char *key)
  91. {
  92. unsigned long a=31415, b=27183, vHash;
  93. for (vHash=0; *key; key++, a*=b)
  94. vHash = a*vHash + tolower(*key);
  95. return vHash;
  96. }
  97. static unsigned long
  98. ReHashKey_nocase(const char *key, int Level)
  99. {
  100. unsigned long a=31415, b=27183, vHash;
  101. for (vHash=0; *key; key++, a*=b)
  102. vHash = a*vHash*(unsigned long)Level + tolower(*key);
  103. return vHash;
  104. }
  105. HAMT *
  106. HAMT_create(int nocase, /*@exits@*/ void (*error_func)
  107. (const char *file, unsigned int line, const char *message))
  108. {
  109. /*@out@*/ HAMT *hamt = yasm_xmalloc(sizeof(HAMT));
  110. int i;
  111. STAILQ_INIT(&hamt->entries);
  112. hamt->root = yasm_xmalloc(32*sizeof(HAMTNode));
  113. for (i=0; i<32; i++) {
  114. hamt->root[i].BitMapKey = 0;
  115. hamt->root[i].BaseValue = 0;
  116. }
  117. hamt->error_func = error_func;
  118. if (nocase) {
  119. hamt->HashKey = HashKey_nocase;
  120. hamt->ReHashKey = ReHashKey_nocase;
  121. hamt->CmpKey = yasm__strcasecmp;
  122. } else {
  123. hamt->HashKey = HashKey;
  124. hamt->ReHashKey = ReHashKey;
  125. hamt->CmpKey = strcmp;
  126. }
  127. return hamt;
  128. }
  129. static void
  130. HAMT_delete_trie(HAMTNode *node)
  131. {
  132. if (IsSubTrie(node)) {
  133. unsigned long i, Size;
  134. /* Count total number of bits in bitmap to determine size */
  135. BitCount(Size, node->BitMapKey);
  136. Size &= 0x1F;
  137. if (Size == 0)
  138. Size = 32;
  139. for (i=0; i<Size; i++)
  140. HAMT_delete_trie(&(GetSubTrie(node))[i]);
  141. yasm_xfree(GetSubTrie(node));
  142. }
  143. }
  144. void
  145. HAMT_destroy(HAMT *hamt, void (*deletefunc) (/*@only@*/ void *data))
  146. {
  147. int i;
  148. /* delete entries */
  149. while (!STAILQ_EMPTY(&hamt->entries)) {
  150. HAMTEntry *entry;
  151. entry = STAILQ_FIRST(&hamt->entries);
  152. STAILQ_REMOVE_HEAD(&hamt->entries, next);
  153. deletefunc(entry->data);
  154. yasm_xfree(entry);
  155. }
  156. /* delete trie */
  157. for (i=0; i<32; i++)
  158. HAMT_delete_trie(&hamt->root[i]);
  159. yasm_xfree(hamt->root);
  160. yasm_xfree(hamt);
  161. }
  162. int
  163. HAMT_traverse(HAMT *hamt, void *d,
  164. int (*func) (/*@dependent@*/ /*@null@*/ void *node,
  165. /*@null@*/ void *d))
  166. {
  167. HAMTEntry *entry;
  168. STAILQ_FOREACH(entry, &hamt->entries, next) {
  169. int retval = func(entry->data, d);
  170. if (retval != 0)
  171. return retval;
  172. }
  173. return 0;
  174. }
  175. const HAMTEntry *
  176. HAMT_first(const HAMT *hamt)
  177. {
  178. return STAILQ_FIRST(&hamt->entries);
  179. }
  180. const HAMTEntry *
  181. HAMT_next(const HAMTEntry *prev)
  182. {
  183. return STAILQ_NEXT(prev, next);
  184. }
  185. void *
  186. HAMTEntry_get_data(const HAMTEntry *entry)
  187. {
  188. return entry->data;
  189. }
  190. /*@-temptrans -kepttrans -mustfree@*/
  191. void *
  192. HAMT_insert(HAMT *hamt, const char *str, void *data, int *replace,
  193. void (*deletefunc) (/*@only@*/ void *data))
  194. {
  195. HAMTNode *node, *newnodes;
  196. HAMTEntry *entry;
  197. unsigned long key, keypart, Map;
  198. int keypartbits = 0;
  199. int level = 0;
  200. key = hamt->HashKey(str);
  201. keypart = key & 0x1F;
  202. node = &hamt->root[keypart];
  203. if (!node->BaseValue) {
  204. node->BitMapKey = key;
  205. entry = yasm_xmalloc(sizeof(HAMTEntry));
  206. entry->str = str;
  207. entry->data = data;
  208. STAILQ_INSERT_TAIL(&hamt->entries, entry, next);
  209. SetValue(hamt, node, entry);
  210. if (IsSubTrie(node))
  211. hamt->error_func(__FILE__, __LINE__,
  212. N_("Data is seen as subtrie (misaligned?)"));
  213. *replace = 1;
  214. return data;
  215. }
  216. for (;;) {
  217. if (!(IsSubTrie(node))) {
  218. if (node->BitMapKey == key
  219. && hamt->CmpKey(((HAMTEntry *)(node->BaseValue))->str,
  220. str) == 0) {
  221. /*@-branchstate@*/
  222. if (*replace) {
  223. deletefunc(((HAMTEntry *)(node->BaseValue))->data);
  224. ((HAMTEntry *)(node->BaseValue))->str = str;
  225. ((HAMTEntry *)(node->BaseValue))->data = data;
  226. } else
  227. deletefunc(data);
  228. /*@=branchstate@*/
  229. return ((HAMTEntry *)(node->BaseValue))->data;
  230. } else {
  231. unsigned long key2 = node->BitMapKey;
  232. /* build tree downward until keys differ */
  233. for (;;) {
  234. unsigned long keypart2;
  235. /* replace node with subtrie */
  236. keypartbits += 5;
  237. if (keypartbits > 30) {
  238. /* Exceeded 32 bits: rehash */
  239. key = hamt->ReHashKey(str, level);
  240. key2 = hamt->ReHashKey(
  241. ((HAMTEntry *)(node->BaseValue))->str, level);
  242. keypartbits = 0;
  243. }
  244. keypart = (key >> keypartbits) & 0x1F;
  245. keypart2 = (key2 >> keypartbits) & 0x1F;
  246. if (keypart == keypart2) {
  247. /* Still equal, build one-node subtrie and continue
  248. * downward.
  249. */
  250. newnodes = yasm_xmalloc(sizeof(HAMTNode));
  251. newnodes[0].BitMapKey = key2;
  252. newnodes[0].BaseValue = node->BaseValue;
  253. node->BitMapKey = 1<<keypart;
  254. SetSubTrie(hamt, node, newnodes);
  255. node = &newnodes[0];
  256. level++;
  257. } else {
  258. /* partitioned: allocate two-node subtrie */
  259. newnodes = yasm_xmalloc(2*sizeof(HAMTNode));
  260. entry = yasm_xmalloc(sizeof(HAMTEntry));
  261. entry->str = str;
  262. entry->data = data;
  263. STAILQ_INSERT_TAIL(&hamt->entries, entry, next);
  264. /* Copy nodes into subtrie based on order */
  265. if (keypart2 < keypart) {
  266. newnodes[0].BitMapKey = key2;
  267. newnodes[0].BaseValue = node->BaseValue;
  268. newnodes[1].BitMapKey = key;
  269. SetValue(hamt, &newnodes[1], entry);
  270. } else {
  271. newnodes[0].BitMapKey = key;
  272. SetValue(hamt, &newnodes[0], entry);
  273. newnodes[1].BitMapKey = key2;
  274. newnodes[1].BaseValue = node->BaseValue;
  275. }
  276. /* Set bits in bitmap corresponding to keys */
  277. node->BitMapKey = (1UL<<keypart) | (1UL<<keypart2);
  278. SetSubTrie(hamt, node, newnodes);
  279. *replace = 1;
  280. return data;
  281. }
  282. }
  283. }
  284. }
  285. /* Subtrie: look up in bitmap */
  286. keypartbits += 5;
  287. if (keypartbits > 30) {
  288. /* Exceeded 32 bits of current key: rehash */
  289. key = hamt->ReHashKey(str, level);
  290. keypartbits = 0;
  291. }
  292. keypart = (key >> keypartbits) & 0x1F;
  293. if (!(node->BitMapKey & (1<<keypart))) {
  294. /* bit is 0 in bitmap -> add node to table */
  295. unsigned long Size;
  296. /* set bit to 1 */
  297. node->BitMapKey |= 1<<keypart;
  298. /* Count total number of bits in bitmap to determine new size */
  299. BitCount(Size, node->BitMapKey);
  300. Size &= 0x1F;
  301. if (Size == 0)
  302. Size = 32;
  303. newnodes = yasm_xmalloc(Size*sizeof(HAMTNode));
  304. /* Count bits below to find where to insert new node at */
  305. BitCount(Map, node->BitMapKey & ~((~0UL)<<keypart));
  306. Map &= 0x1F; /* Clamp to <32 */
  307. /* Copy existing nodes leaving gap for new node */
  308. memcpy(newnodes, GetSubTrie(node), Map*sizeof(HAMTNode));
  309. memcpy(&newnodes[Map+1], &(GetSubTrie(node))[Map],
  310. (Size-Map-1)*sizeof(HAMTNode));
  311. /* Delete old subtrie */
  312. yasm_xfree(GetSubTrie(node));
  313. /* Set up new node */
  314. newnodes[Map].BitMapKey = key;
  315. entry = yasm_xmalloc(sizeof(HAMTEntry));
  316. entry->str = str;
  317. entry->data = data;
  318. STAILQ_INSERT_TAIL(&hamt->entries, entry, next);
  319. SetValue(hamt, &newnodes[Map], entry);
  320. SetSubTrie(hamt, node, newnodes);
  321. *replace = 1;
  322. return data;
  323. }
  324. /* Count bits below */
  325. BitCount(Map, node->BitMapKey & ~((~0UL)<<keypart));
  326. Map &= 0x1F; /* Clamp to <32 */
  327. /* Go down a level */
  328. level++;
  329. node = &(GetSubTrie(node))[Map];
  330. }
  331. }
  332. /*@=temptrans =kepttrans =mustfree@*/
  333. void *
  334. HAMT_search(HAMT *hamt, const char *str)
  335. {
  336. HAMTNode *node;
  337. unsigned long key, keypart, Map;
  338. int keypartbits = 0;
  339. int level = 0;
  340. key = hamt->HashKey(str);
  341. keypart = key & 0x1F;
  342. node = &hamt->root[keypart];
  343. if (!node->BaseValue)
  344. return NULL;
  345. for (;;) {
  346. if (!(IsSubTrie(node))) {
  347. if (node->BitMapKey == key
  348. && hamt->CmpKey(((HAMTEntry *)(node->BaseValue))->str,
  349. str) == 0)
  350. return ((HAMTEntry *)(node->BaseValue))->data;
  351. else
  352. return NULL;
  353. }
  354. /* Subtree: look up in bitmap */
  355. keypartbits += 5;
  356. if (keypartbits > 30) {
  357. /* Exceeded 32 bits of current key: rehash */
  358. key = hamt->ReHashKey(str, level);
  359. keypartbits = 0;
  360. }
  361. keypart = (key >> keypartbits) & 0x1F;
  362. if (!(node->BitMapKey & (1<<keypart)))
  363. return NULL; /* bit is 0 in bitmap -> no match */
  364. /* Count bits below */
  365. BitCount(Map, node->BitMapKey & ~((~0UL)<<keypart));
  366. Map &= 0x1F; /* Clamp to <32 */
  367. /* Go down a level */
  368. level++;
  369. node = &(GetSubTrie(node))[Map];
  370. }
  371. }