slstring.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692
  1. /*
  2. Copyright (C) 2004, 2005, 2006 John E. Davis
  3. This file is part of the S-Lang Library.
  4. The S-Lang Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU General Public License as
  6. published by the Free Software Foundation; either version 2 of the
  7. License, or (at your option) any later version.
  8. The S-Lang Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this library; if not, write to the Free Software
  14. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  15. USA.
  16. */
  17. #include "slinclud.h"
  18. #include "slang.h"
  19. #include "_slang.h"
  20. typedef struct _pSLstring_Type
  21. {
  22. struct _pSLstring_Type *next;
  23. unsigned int ref_count;
  24. unsigned long hash;
  25. unsigned int len;
  26. char bytes [1];
  27. }
  28. SLstring_Type;
  29. #define MAP_HASH_TO_INDEX(hash) ((hash) % SLSTRING_HASH_TABLE_SIZE)
  30. static SLstring_Type *String_Hash_Table [SLSTRING_HASH_TABLE_SIZE];
  31. static char Single_Char_Strings [256 * 2];
  32. #if SLANG_OPTIMIZE_FOR_SPEED
  33. #define MAX_FREE_STORE_LEN 32
  34. static SLstring_Type *SLS_Free_Store [MAX_FREE_STORE_LEN];
  35. # define NUM_CACHED_STRINGS 601
  36. typedef struct
  37. {
  38. SLstring_Type *sls;
  39. char *str;
  40. }
  41. Cached_String_Type;
  42. static char *Deleted_String = "*deleted*";
  43. static Cached_String_Type Cached_Strings [NUM_CACHED_STRINGS];
  44. #define GET_CACHED_STRING(s) \
  45. (Cached_Strings + (unsigned int)(((unsigned long) (s)) % NUM_CACHED_STRINGS))
  46. _INLINE_
  47. static void cache_string (SLstring_Type *sls)
  48. {
  49. Cached_String_Type *cs;
  50. cs = GET_CACHED_STRING(sls->bytes);
  51. cs->str = sls->bytes;
  52. cs->sls = sls;
  53. }
  54. _INLINE_
  55. static void uncache_string (char *s)
  56. {
  57. Cached_String_Type *cs;
  58. cs = GET_CACHED_STRING(s);
  59. if (cs->str == s)
  60. {
  61. cs->sls = NULL;
  62. cs->str = Deleted_String;
  63. }
  64. }
  65. #endif
  66. #if USE_NEW_HASH_CODE
  67. /* This hash algorithm comes from:
  68. *
  69. * Bob Jenkins, 1996. bob_jenkins@burtleburtle.net.
  70. * You may use this code any way you wish, private, educational, or commercial. It's free.
  71. * See http://burtleburtle.net/bob/hash/evahash.html
  72. */
  73. typedef unsigned long uint32;
  74. #define mix(a,b,c) \
  75. { \
  76. a -= b; a -= c; a ^= (c>>13); \
  77. b -= c; b -= a; b ^= (a<<8); \
  78. c -= a; c -= b; c ^= (b>>13); \
  79. a -= b; a -= c; a ^= (c>>12); \
  80. b -= c; b -= a; b ^= (a<<16); \
  81. c -= a; c -= b; c ^= (b>>5); \
  82. a -= b; a -= c; a ^= (c>>3); \
  83. b -= c; b -= a; b ^= (a<<10); \
  84. c -= a; c -= b; c ^= (b>>15); \
  85. }
  86. _INLINE_
  87. unsigned long _pSLstring_hash (unsigned char *s, unsigned char *smax)
  88. {
  89. register uint32 a,b,c;
  90. unsigned int length = (unsigned int)(smax - s);
  91. unsigned int len = length;
  92. a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
  93. c = 0;
  94. /*---------------------------------------- handle most of the key */
  95. while (len >= 12)
  96. {
  97. a += (s[0] +((uint32)s[1]<<8) +((uint32)s[2]<<16) +((uint32)s[3]<<24));
  98. b += (s[4] +((uint32)s[5]<<8) +((uint32)s[6]<<16) +((uint32)s[7]<<24));
  99. c += (s[8] +((uint32)s[9]<<8) +((uint32)s[10]<<16)+((uint32)s[11]<<24));
  100. mix(a,b,c);
  101. s += 12; len -= 12;
  102. }
  103. /*------------------------------------- handle the last 11 bytes */
  104. c += length;
  105. switch(len) /* all the case statements fall through */
  106. {
  107. case 11: c+=((uint32)s[10]<<24);
  108. case 10: c+=((uint32)s[9]<<16);
  109. case 9 : c+=((uint32)s[8]<<8);
  110. /* the first byte of c is reserved for the length */
  111. case 8 : b+=((uint32)s[7]<<24);
  112. case 7 : b+=((uint32)s[6]<<16);
  113. case 6 : b+=((uint32)s[5]<<8);
  114. case 5 : b+=s[4];
  115. case 4 : a+=((uint32)s[3]<<24);
  116. case 3 : a+=((uint32)s[2]<<16);
  117. case 2 : a+=((uint32)s[1]<<8);
  118. case 1 : a+=s[0];
  119. /* case 0: nothing left to add */
  120. }
  121. mix(a,b,c);
  122. /*-------------------------------------------- report the result */
  123. return (unsigned long) c;
  124. }
  125. #else
  126. _INLINE_
  127. unsigned long _pSLstring_hash (unsigned char *s, unsigned char *smax)
  128. {
  129. register unsigned long h = 0;
  130. register unsigned long sum = 0;
  131. unsigned char *smax4;
  132. smax4 = smax - 4;
  133. while (s < smax4)
  134. {
  135. sum += s[0];
  136. h = sum + (h << 1);
  137. sum += s[1];
  138. h = sum + (h << 1);
  139. sum += s[2];
  140. h = sum + (h << 1);
  141. sum += s[3];
  142. h = sum + (h << 1);
  143. s += 4;
  144. }
  145. while (s < smax)
  146. {
  147. sum += *s++;
  148. h ^= sum + (h << 3); /* slightly different */
  149. }
  150. return h;
  151. }
  152. #endif
  153. unsigned long _pSLcompute_string_hash (char *s)
  154. {
  155. #if SLANG_OPTIMIZE_FOR_SPEED
  156. Cached_String_Type *cs;
  157. cs = GET_CACHED_STRING(s);
  158. if (cs->str == s)
  159. return cs->sls->hash;
  160. #endif
  161. return _pSLstring_hash ((unsigned char *) s, (unsigned char *) s + strlen (s));
  162. }
  163. _INLINE_
  164. /* This routine works with any (long) string */
  165. static SLstring_Type *find_string (char *s, unsigned int len, unsigned long hash)
  166. {
  167. SLstring_Type *sls;
  168. sls = String_Hash_Table [(unsigned int) MAP_HASH_TO_INDEX(hash)];
  169. if (sls == NULL)
  170. return NULL;
  171. do
  172. {
  173. /* Note that we need to actually make sure that bytes[len] == 0.
  174. * In this case, it is not enough to just compare pointers. In fact,
  175. * this is called from create_nstring, etc... It is unlikely that the
  176. * pointer is a slstring
  177. */
  178. if ((sls->hash == hash)
  179. && (sls->len == len)
  180. && (0 == strncmp (s, sls->bytes, len)))
  181. break;
  182. sls = sls->next;
  183. }
  184. while (sls != NULL);
  185. return sls;
  186. }
  187. _INLINE_
  188. static SLstring_Type *find_slstring (char *s, unsigned long hash)
  189. {
  190. SLstring_Type *sls;
  191. sls = String_Hash_Table [MAP_HASH_TO_INDEX(hash)];
  192. while (sls != NULL)
  193. {
  194. if (s == sls->bytes)
  195. return sls;
  196. sls = sls->next;
  197. }
  198. return sls;
  199. }
  200. _INLINE_
  201. static SLstring_Type *allocate_sls (unsigned int len)
  202. {
  203. SLstring_Type *sls;
  204. #if SLANG_OPTIMIZE_FOR_SPEED
  205. if ((len < MAX_FREE_STORE_LEN)
  206. && (NULL != (sls = SLS_Free_Store [len])))
  207. {
  208. SLS_Free_Store[len] = NULL;
  209. return sls;
  210. }
  211. #endif
  212. /* FIXME: use structure padding */
  213. sls = (SLstring_Type *) SLmalloc (len + sizeof (SLstring_Type));
  214. if (sls != NULL)
  215. sls->len = len;
  216. return sls;
  217. }
  218. _INLINE_
  219. static void free_sls (SLstring_Type *sls)
  220. {
  221. #if SLANG_OPTIMIZE_FOR_SPEED
  222. unsigned int len = sls->len;
  223. if ((len < MAX_FREE_STORE_LEN)
  224. && (SLS_Free_Store[len] == NULL))
  225. {
  226. SLS_Free_Store [len] = sls;
  227. return;
  228. }
  229. #endif
  230. SLfree ((char *)sls);
  231. }
  232. _INLINE_
  233. static char *create_long_string (char *s, unsigned int len, unsigned long hash)
  234. {
  235. SLstring_Type *sls;
  236. sls = find_string (s, len, hash);
  237. if (sls != NULL)
  238. {
  239. sls->ref_count++;
  240. #if SLANG_OPTIMIZE_FOR_SPEED
  241. cache_string (sls);
  242. #endif
  243. return sls->bytes;
  244. }
  245. sls = allocate_sls (len);
  246. if (sls == NULL)
  247. return NULL;
  248. strncpy (sls->bytes, s, len);
  249. sls->bytes[len] = 0;
  250. sls->ref_count = 1;
  251. sls->hash = hash;
  252. #if SLANG_OPTIMIZE_FOR_SPEED
  253. cache_string (sls);
  254. #endif
  255. hash = MAP_HASH_TO_INDEX(hash);
  256. sls->next = String_Hash_Table [(unsigned int)hash];
  257. String_Hash_Table [(unsigned int)hash] = sls;
  258. return sls->bytes;
  259. }
  260. _INLINE_
  261. static char *create_short_string (char *s, unsigned int len)
  262. {
  263. char ch;
  264. /* Note: if len is 0, then it does not matter what *s is. This is
  265. * important for SLang_create_nslstring.
  266. */
  267. if (len) ch = *s; else ch = 0;
  268. len = 2 * (unsigned int) ((unsigned char) ch);
  269. Single_Char_Strings [len] = ch;
  270. Single_Char_Strings [len + 1] = 0;
  271. return Single_Char_Strings + len;
  272. }
  273. /* s cannot be NULL */
  274. _INLINE_
  275. static SLstr_Type *create_nstring (char *s, unsigned int len, unsigned long *hash_ptr)
  276. {
  277. unsigned long hash;
  278. if (len < 2)
  279. return create_short_string (s, len);
  280. hash = _pSLstring_hash ((unsigned char *) s, (unsigned char *) (s + len));
  281. *hash_ptr = hash;
  282. return create_long_string (s, len, hash);
  283. }
  284. SLstr_Type *SLang_create_nslstring (char *s, unsigned int len)
  285. {
  286. unsigned long hash;
  287. if (s == NULL)
  288. return NULL;
  289. return create_nstring (s, len, &hash);
  290. }
  291. char *_pSLstring_make_hashed_string (char *s, unsigned int len, unsigned long *hashptr)
  292. {
  293. unsigned long hash;
  294. if (s == NULL) return NULL;
  295. hash = _pSLstring_hash ((unsigned char *) s, (unsigned char *) s + len);
  296. *hashptr = hash;
  297. if (len < 2)
  298. return create_short_string (s, len);
  299. return create_long_string (s, len, hash);
  300. }
  301. char *_pSLstring_dup_hashed_string (char *s, unsigned long hash)
  302. {
  303. unsigned int len;
  304. #if SLANG_OPTIMIZE_FOR_SPEED
  305. Cached_String_Type *cs;
  306. if (s == NULL) return NULL;
  307. if (s[0] == 0)
  308. return create_short_string (s, 0);
  309. if (s[1] == 0)
  310. return create_short_string (s, 1);
  311. cs = GET_CACHED_STRING(s);
  312. if (cs->str == s)
  313. {
  314. cs->sls->ref_count += 1;
  315. return s;
  316. }
  317. #else
  318. if (s == NULL) return NULL;
  319. #endif
  320. len = strlen (s);
  321. #if !SLANG_OPTIMIZE_FOR_SPEED
  322. if (len < 2) return create_short_string (s, len);
  323. #endif
  324. return create_long_string (s, len, hash);
  325. }
  326. /* This function requires an slstring!!! */
  327. char *_pSLstring_dup_slstring (char *s)
  328. {
  329. SLstring_Type *sls;
  330. #if SLANG_OPTIMIZE_FOR_SPEED
  331. Cached_String_Type *cs;
  332. #endif
  333. if (s == NULL)
  334. return s;
  335. #if SLANG_OPTIMIZE_FOR_SPEED
  336. cs = GET_CACHED_STRING(s);
  337. if (cs->str == s)
  338. {
  339. cs->sls->ref_count += 1;
  340. return s;
  341. }
  342. #endif
  343. if ((s[0] == 0) || (s[1] == 0))
  344. return s;
  345. sls = (SLstring_Type *) (s - offsetof(SLstring_Type,bytes[0]));
  346. sls->ref_count++;
  347. #if SLANG_OPTIMIZE_FOR_SPEED
  348. cache_string (sls);
  349. #endif
  350. return s;
  351. }
  352. static void free_sls_string (SLstring_Type *sls)
  353. {
  354. SLstring_Type *sls1, *prev;
  355. unsigned long hash = sls->hash;
  356. hash = MAP_HASH_TO_INDEX(hash);
  357. sls1 = String_Hash_Table [(unsigned int) hash];
  358. prev = NULL;
  359. /* This should not fail. */
  360. while (sls1 != sls)
  361. {
  362. prev = sls1;
  363. sls1 = sls1->next;
  364. }
  365. if (prev != NULL)
  366. prev->next = sls->next;
  367. else
  368. String_Hash_Table [(unsigned int) hash] = sls->next;
  369. free_sls (sls);
  370. }
  371. _INLINE_
  372. static void free_long_string (char *s, unsigned long hash)
  373. {
  374. SLstring_Type *sls;
  375. if (NULL == (sls = find_slstring (s, hash)))
  376. {
  377. SLang_verror (SL_APPLICATION_ERROR, "invalid attempt to free string:%s", s);
  378. return;
  379. }
  380. sls->ref_count--;
  381. if (sls->ref_count != 0)
  382. {
  383. #if SLANG_OPTIMIZE_FOR_SPEED
  384. /* cache_string (sls, len, hash); */
  385. #endif
  386. return;
  387. }
  388. #if SLANG_OPTIMIZE_FOR_SPEED
  389. uncache_string (s);
  390. #endif
  391. free_sls_string (sls);
  392. }
  393. /* This routine may be passed NULL-- it is not an error. */
  394. void SLang_free_slstring (char *s)
  395. {
  396. unsigned long hash;
  397. unsigned int len;
  398. #if SLANG_OPTIMIZE_FOR_SPEED
  399. Cached_String_Type *cs;
  400. #endif
  401. if (s == NULL) return;
  402. #if SLANG_OPTIMIZE_FOR_SPEED
  403. cs = GET_CACHED_STRING(s);
  404. if (cs->str == s)
  405. {
  406. SLstring_Type *sls = cs->sls;
  407. if (sls->ref_count <= 1)
  408. {
  409. #if SLANG_OPTIMIZE_FOR_SPEED
  410. cs->sls = NULL;
  411. cs->str = Deleted_String;
  412. #endif
  413. free_sls_string (sls);
  414. }
  415. else
  416. sls->ref_count -= 1;
  417. return;
  418. }
  419. #endif
  420. if ((len = strlen (s)) < 2)
  421. return;
  422. hash = _pSLstring_hash ((unsigned char *)s, (unsigned char *) s + len);
  423. free_long_string (s, hash);
  424. }
  425. char *SLang_create_slstring (char *s)
  426. {
  427. unsigned long hash;
  428. #if SLANG_OPTIMIZE_FOR_SPEED
  429. Cached_String_Type *cs;
  430. #endif
  431. if (s == NULL) return NULL;
  432. #if SLANG_OPTIMIZE_FOR_SPEED
  433. cs = GET_CACHED_STRING(s);
  434. if (cs->str == s)
  435. {
  436. cs->sls->ref_count += 1;
  437. return s;
  438. }
  439. #endif
  440. return create_nstring (s, strlen (s), &hash);
  441. }
  442. void _pSLfree_hashed_string (char *s, unsigned int len, unsigned long hash)
  443. {
  444. if ((s == NULL) || (len < 2)) return;
  445. free_long_string (s, hash);
  446. }
  447. char *_pSLallocate_slstring (unsigned int len)
  448. {
  449. SLstring_Type *sls = allocate_sls (len);
  450. if (sls == NULL)
  451. return NULL;
  452. sls->hash = 0;
  453. return sls->bytes;
  454. }
  455. void _pSLunallocate_slstring (char *s, unsigned int len)
  456. {
  457. SLstring_Type *sls;
  458. (void) len;
  459. if (s == NULL)
  460. return;
  461. sls = (SLstring_Type *) (s - offsetof(SLstring_Type,bytes[0]));
  462. free_sls (sls);
  463. }
  464. char *_pSLcreate_via_alloced_slstring (char *s, unsigned int len)
  465. {
  466. unsigned long hash;
  467. SLstring_Type *sls;
  468. if (s == NULL)
  469. return NULL;
  470. if (len < 2)
  471. {
  472. char *s1 = create_short_string (s, len);
  473. _pSLunallocate_slstring (s, len);
  474. return s1;
  475. }
  476. /* s is not going to be in the cache because when it was malloced, its
  477. * value was unknown. This simplifies the coding.
  478. */
  479. hash = _pSLstring_hash ((unsigned char *)s, (unsigned char *)s + len);
  480. sls = find_string (s, len, hash);
  481. if (sls != NULL)
  482. {
  483. sls->ref_count++;
  484. _pSLunallocate_slstring (s, len);
  485. s = sls->bytes;
  486. #if SLANG_OPTIMIZE_FOR_SPEED
  487. cache_string (sls);
  488. #endif
  489. return s;
  490. }
  491. sls = (SLstring_Type *) (s - offsetof(SLstring_Type,bytes[0]));
  492. sls->ref_count = 1;
  493. sls->hash = hash;
  494. #if SLANG_OPTIMIZE_FOR_SPEED
  495. cache_string (sls);
  496. #endif
  497. hash = MAP_HASH_TO_INDEX(hash);
  498. sls->next = String_Hash_Table [(unsigned int)hash];
  499. String_Hash_Table [(unsigned int)hash] = sls;
  500. return s;
  501. }
  502. /* Note, a and b may be ordinary strings. The result is an slstring */
  503. char *SLang_concat_slstrings (char *a, char *b)
  504. {
  505. unsigned int lena, len;
  506. char *c;
  507. lena = strlen (a);
  508. len = lena + strlen (b);
  509. c = _pSLallocate_slstring (len);
  510. if (c == NULL)
  511. return NULL;
  512. strcpy (c, a);
  513. strcpy (c + lena, b);
  514. return _pSLcreate_via_alloced_slstring (c, len);
  515. }
  516. /* This routine is assumed to work even if s is not an slstring */
  517. unsigned int _pSLstring_bytelen (SLstr_Type *s)
  518. {
  519. #if SLANG_OPTIMIZE_FOR_SPEED
  520. Cached_String_Type *cs;
  521. cs = GET_CACHED_STRING(s);
  522. if (cs->str == s)
  523. return cs->sls->len;
  524. #endif
  525. return strlen (s);
  526. }
  527. /* The caller must ensure that this is an slstring */
  528. void _pSLang_free_slstring (SLstr_Type *s)
  529. {
  530. #if SLANG_OPTIMIZE_FOR_SPEED
  531. Cached_String_Type *cs;
  532. #endif
  533. SLstring_Type *sls;
  534. if (s == NULL) return;
  535. #if SLANG_OPTIMIZE_FOR_SPEED
  536. cs = GET_CACHED_STRING(s);
  537. if (cs->str == s)
  538. {
  539. sls = cs->sls;
  540. if (sls->ref_count <= 1)
  541. {
  542. #if SLANG_OPTIMIZE_FOR_SPEED
  543. cs->sls = NULL;
  544. cs->str = Deleted_String;
  545. #endif
  546. free_sls_string (sls);
  547. }
  548. else
  549. sls->ref_count -= 1;
  550. return;
  551. }
  552. #endif
  553. if ((s[0] == 0) || (s[1] == 0))
  554. return;
  555. sls = (SLstring_Type *) (s - offsetof(SLstring_Type,bytes[0]));
  556. if (sls->ref_count > 1)
  557. {
  558. sls->ref_count--;
  559. return;
  560. }
  561. free_long_string (s, sls->hash);
  562. }
  563. /* An SLstring is required */
  564. unsigned long _pSLstring_get_hash (SLstr_Type *s)
  565. {
  566. SLstring_Type *sls;
  567. if (s[0] == 0)
  568. return _pSLstring_hash ((unsigned char*)s, (unsigned char *)s);
  569. if (s[1] == 0)
  570. return _pSLstring_hash ((unsigned char *)s, (unsigned char *)s+1);
  571. sls = (SLstring_Type *) (s - offsetof(SLstring_Type,bytes[0]));
  572. return sls->hash;
  573. }