string.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "../libnetdata.h"
  3. #include <Judy.h>
  4. typedef int32_t REFCOUNT;
  5. // ----------------------------------------------------------------------------
  6. // STRING implementation - dedup all STRING
  7. struct netdata_string {
  8. uint32_t length; // the string length including the terminating '\0'
  9. REFCOUNT refcount; // how many times this string is used
  10. // We use a signed number to be able to detect duplicate frees of a string.
  11. // If at any point this goes below zero, we have a duplicate free.
  12. const char str[]; // the string itself, is appended to this structure
  13. };
  14. static struct string_hashtable {
  15. Pvoid_t JudyHSArray; // the Judy array - hashtable
  16. netdata_rwlock_t rwlock; // the R/W lock to protect the Judy array
  17. long int entries; // the number of entries in the index
  18. long int active_references; // the number of active references alive
  19. long int memory; // the memory used, without the JudyHS index
  20. size_t inserts; // the number of successful inserts to the index
  21. size_t deletes; // the number of successful deleted from the index
  22. size_t searches; // the number of successful searches in the index
  23. size_t duplications; // when a string is referenced
  24. size_t releases; // when a string is unreferenced
  25. #ifdef NETDATA_INTERNAL_CHECKS
  26. // internal statistics
  27. size_t found_deleted_on_search;
  28. size_t found_available_on_search;
  29. size_t found_deleted_on_insert;
  30. size_t found_available_on_insert;
  31. size_t spins;
  32. #endif
  33. } string_base = {
  34. .JudyHSArray = NULL,
  35. .rwlock = NETDATA_RWLOCK_INITIALIZER,
  36. };
  37. #ifdef NETDATA_INTERNAL_CHECKS
  38. #define string_internal_stats_add(var, val) __atomic_add_fetch(&string_base.var, val, __ATOMIC_RELAXED)
  39. #else
  40. #define string_internal_stats_add(var, val) do {;} while(0)
  41. #endif
  42. #define string_stats_atomic_increment(var) __atomic_add_fetch(&string_base.var, 1, __ATOMIC_RELAXED)
  43. #define string_stats_atomic_decrement(var) __atomic_sub_fetch(&string_base.var, 1, __ATOMIC_RELAXED)
  44. void string_statistics(size_t *inserts, size_t *deletes, size_t *searches, size_t *entries, size_t *references, size_t *memory, size_t *duplications, size_t *releases) {
  45. if(inserts)
  46. *inserts = string_base.inserts;
  47. if(deletes)
  48. *deletes = string_base.deletes;
  49. if(searches)
  50. *searches = string_base.searches;
  51. if(entries)
  52. *entries = (size_t)string_base.entries;
  53. if(references)
  54. *references = (size_t)string_base.active_references;
  55. if(memory)
  56. *memory = (size_t)string_base.memory;
  57. if(duplications)
  58. *duplications = string_base.duplications;
  59. if(releases)
  60. *releases = string_base.releases;
  61. }
  62. #define string_entry_acquire(se) __atomic_add_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST);
  63. #define string_entry_release(se) __atomic_sub_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST);
  64. static inline bool string_entry_check_and_acquire(STRING *se) {
  65. REFCOUNT expected, desired, count = 0;
  66. expected = __atomic_load_n(&se->refcount, __ATOMIC_SEQ_CST);
  67. do {
  68. count++;
  69. if(expected <= 0) {
  70. // We cannot use this.
  71. // The reference counter reached value zero,
  72. // so another thread is deleting this.
  73. string_internal_stats_add(spins, count - 1);
  74. return false;
  75. }
  76. desired = expected + 1;
  77. } while(!__atomic_compare_exchange_n(&se->refcount, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
  78. string_internal_stats_add(spins, count - 1);
  79. // statistics
  80. // string_base.active_references is altered at the in string_strdupz() and string_freez()
  81. string_stats_atomic_increment(duplications);
  82. return true;
  83. }
  84. STRING *string_dup(STRING *string) {
  85. if(unlikely(!string)) return NULL;
  86. #ifdef NETDATA_INTERNAL_CHECKS
  87. if(unlikely(__atomic_load_n(&string->refcount, __ATOMIC_SEQ_CST) <= 0))
  88. fatal("STRING: tried to %s() a string that is freed (it has %d references).", __FUNCTION__, string->refcount);
  89. #endif
  90. string_entry_acquire(string);
  91. // statistics
  92. string_stats_atomic_increment(active_references);
  93. string_stats_atomic_increment(duplications);
  94. return string;
  95. }
  96. // Search the index and return an ACQUIRED string entry, or NULL
  97. static inline STRING *string_index_search(const char *str, size_t length) {
  98. STRING *string;
  99. // Find the string in the index
  100. // With a read-lock so that multiple readers can use the index concurrently.
  101. netdata_rwlock_rdlock(&string_base.rwlock);
  102. Pvoid_t *Rc;
  103. Rc = JudyHSGet(string_base.JudyHSArray, (void *)str, length);
  104. if(likely(Rc)) {
  105. // found in the hash table
  106. string = *Rc;
  107. if(string_entry_check_and_acquire(string)) {
  108. // we can use this entry
  109. string_internal_stats_add(found_available_on_search, 1);
  110. }
  111. else {
  112. // this entry is about to be deleted by another thread
  113. // do not touch it, let it go...
  114. string = NULL;
  115. string_internal_stats_add(found_deleted_on_search, 1);
  116. }
  117. }
  118. else {
  119. // not found in the hash table
  120. string = NULL;
  121. }
  122. string_stats_atomic_increment(searches);
  123. netdata_rwlock_unlock(&string_base.rwlock);
  124. return string;
  125. }
  126. // Insert a string to the index and return an ACQUIRED string entry,
  127. // or NULL if the call needs to be retried (a deleted entry with the same key is still in the index)
  128. // The returned entry is ACQUIRED, and it can either be:
  129. // 1. a new item inserted, or
  130. // 2. an item found in the index that is not currently deleted
  131. static inline STRING *string_index_insert(const char *str, size_t length) {
  132. STRING *string;
  133. netdata_rwlock_wrlock(&string_base.rwlock);
  134. STRING **ptr;
  135. {
  136. JError_t J_Error;
  137. Pvoid_t *Rc = JudyHSIns(&string_base.JudyHSArray, (void *)str, length, &J_Error);
  138. if (unlikely(Rc == PJERR)) {
  139. fatal(
  140. "STRING: Cannot insert entry with name '%s' to JudyHS, JU_ERRNO_* == %u, ID == %d",
  141. str,
  142. JU_ERRNO(&J_Error),
  143. JU_ERRID(&J_Error));
  144. }
  145. ptr = (STRING **)Rc;
  146. }
  147. if (likely(*ptr == 0)) {
  148. // a new item added to the index
  149. size_t mem_size = sizeof(STRING) + length;
  150. string = mallocz(mem_size);
  151. strcpy((char *)string->str, str);
  152. string->length = length;
  153. string->refcount = 1;
  154. *ptr = string;
  155. string_base.inserts++;
  156. string_base.entries++;
  157. string_base.memory += (long)(mem_size + JUDYHS_INDEX_SIZE_ESTIMATE(length));
  158. }
  159. else {
  160. // the item is already in the index
  161. string = *ptr;
  162. if(string_entry_check_and_acquire(string)) {
  163. // we can use this entry
  164. string_internal_stats_add(found_available_on_insert, 1);
  165. }
  166. else {
  167. // this entry is about to be deleted by another thread
  168. // do not touch it, let it go...
  169. string = NULL;
  170. string_internal_stats_add(found_deleted_on_insert, 1);
  171. }
  172. string_stats_atomic_increment(searches);
  173. }
  174. netdata_rwlock_unlock(&string_base.rwlock);
  175. return string;
  176. }
  177. // delete an entry from the index
  178. static inline void string_index_delete(STRING *string) {
  179. netdata_rwlock_wrlock(&string_base.rwlock);
  180. #ifdef NETDATA_INTERNAL_CHECKS
  181. if(unlikely(__atomic_load_n(&string->refcount, __ATOMIC_SEQ_CST) != 0))
  182. fatal("STRING: tried to delete a string at %s() that is already freed (it has %d references).", __FUNCTION__, string->refcount);
  183. #endif
  184. bool deleted = false;
  185. if (likely(string_base.JudyHSArray)) {
  186. JError_t J_Error;
  187. int ret = JudyHSDel(&string_base.JudyHSArray, (void *)string->str, string->length, &J_Error);
  188. if (unlikely(ret == JERR)) {
  189. error(
  190. "STRING: Cannot delete entry with name '%s' from JudyHS, JU_ERRNO_* == %u, ID == %d",
  191. string->str,
  192. JU_ERRNO(&J_Error),
  193. JU_ERRID(&J_Error));
  194. } else
  195. deleted = true;
  196. }
  197. if (unlikely(!deleted))
  198. error("STRING: tried to delete '%s' that is not in the index. Ignoring it.", string->str);
  199. else {
  200. size_t mem_size = sizeof(STRING) + string->length;
  201. string_base.deletes++;
  202. string_base.entries--;
  203. string_base.memory -= (long)(mem_size + JUDYHS_INDEX_SIZE_ESTIMATE(string->length));
  204. freez(string);
  205. }
  206. netdata_rwlock_unlock(&string_base.rwlock);
  207. }
  208. STRING *string_strdupz(const char *str) {
  209. if(unlikely(!str || !*str)) return NULL;
  210. size_t length = strlen(str) + 1;
  211. STRING *string = string_index_search(str, length);
  212. while(!string) {
  213. // The search above did not find anything,
  214. // We loop here, because during insert we may find an entry that is being deleted by another thread.
  215. // So, we have to let it go and retry to insert it again.
  216. string = string_index_insert(str, length);
  217. }
  218. // statistics
  219. string_stats_atomic_increment(active_references);
  220. return string;
  221. }
  222. void string_freez(STRING *string) {
  223. if(unlikely(!string)) return;
  224. REFCOUNT refcount = string_entry_release(string);
  225. #ifdef NETDATA_INTERNAL_CHECKS
  226. if(unlikely(refcount < 0))
  227. fatal("STRING: tried to %s() a string that is already freed (it has %d references).", __FUNCTION__, string->refcount);
  228. #endif
  229. if(unlikely(refcount == 0))
  230. string_index_delete(string);
  231. // statistics
  232. string_stats_atomic_decrement(active_references);
  233. string_stats_atomic_increment(releases);
  234. }
  235. size_t string_strlen(STRING *string) {
  236. if(unlikely(!string)) return 0;
  237. return string->length - 1;
  238. }
  239. const char *string2str(STRING *string) {
  240. if(unlikely(!string)) return "";
  241. return string->str;
  242. }
  243. STRING *string_2way_merge(STRING *a, STRING *b) {
  244. static STRING *X = NULL;
  245. if(unlikely(!X)) {
  246. X = string_strdupz("[x]");
  247. }
  248. if(unlikely(a == b)) return string_dup(a);
  249. if(unlikely(a == X)) return string_dup(a);
  250. if(unlikely(b == X)) return string_dup(b);
  251. if(unlikely(!a)) return string_dup(X);
  252. if(unlikely(!b)) return string_dup(X);
  253. size_t alen = string_strlen(a);
  254. size_t blen = string_strlen(b);
  255. size_t length = alen + blen + string_strlen(X) + 1;
  256. char buf1[length + 1], buf2[length + 1], *dst1;
  257. const char *s1, *s2;
  258. s1 = string2str(a);
  259. s2 = string2str(b);
  260. dst1 = buf1;
  261. for( ; *s1 && *s2 && *s1 == *s2 ;s1++, s2++)
  262. *dst1++ = *s1;
  263. *dst1 = '\0';
  264. if(*s1 != '\0' || *s2 != '\0') {
  265. *dst1++ = '[';
  266. *dst1++ = 'x';
  267. *dst1++ = ']';
  268. s1 = &(string2str(a))[alen - 1];
  269. s2 = &(string2str(b))[blen - 1];
  270. char *dst2 = &buf2[length];
  271. *dst2 = '\0';
  272. for (; *s1 && *s2 && *s1 == *s2; s1--, s2--)
  273. *(--dst2) = *s1;
  274. strcpy(dst1, dst2);
  275. }
  276. return string_strdupz(buf1);
  277. }
  278. // ----------------------------------------------------------------------------
  279. // STRING unit test
  280. struct thread_unittest {
  281. int join;
  282. int dups;
  283. };
  284. static void *string_thread(void *arg) {
  285. struct thread_unittest *tu = arg;
  286. for(; 1 ;) {
  287. if(__atomic_load_n(&tu->join, __ATOMIC_RELAXED))
  288. break;
  289. STRING *s = string_strdupz("string thread checking 1234567890");
  290. for(int i = 0; i < tu->dups ; i++)
  291. string_dup(s);
  292. for(int i = 0; i < tu->dups ; i++)
  293. string_freez(s);
  294. string_freez(s);
  295. }
  296. return arg;
  297. }
  298. static char **string_unittest_generate_names(size_t entries) {
  299. char **names = mallocz(sizeof(char *) * entries);
  300. for(size_t i = 0; i < entries ;i++) {
  301. char buf[25 + 1] = "";
  302. snprintfz(buf, 25, "name.%zu.0123456789.%zu \t !@#$%%^&*(),./[]{}\\|~`", i, entries / 2 + i);
  303. names[i] = strdupz(buf);
  304. }
  305. return names;
  306. }
  307. static void string_unittest_free_char_pp(char **pp, size_t entries) {
  308. for(size_t i = 0; i < entries ;i++)
  309. freez(pp[i]);
  310. freez(pp);
  311. }
  312. int string_unittest(size_t entries) {
  313. size_t errors = 0;
  314. fprintf(stderr, "Generating %zu names and values...\n", entries);
  315. char **names = string_unittest_generate_names(entries);
  316. // check string
  317. {
  318. long int string_entries_starting = string_base.entries;
  319. fprintf(stderr, "\nChecking strings...\n");
  320. STRING *s1 = string_strdupz("hello unittest");
  321. STRING *s2 = string_strdupz("hello unittest");
  322. if(s1 != s2) {
  323. errors++;
  324. fprintf(stderr, "ERROR: duplicating strings are not deduplicated\n");
  325. }
  326. else
  327. fprintf(stderr, "OK: duplicating string are deduplicated\n");
  328. STRING *s3 = string_dup(s1);
  329. if(s3 != s1) {
  330. errors++;
  331. fprintf(stderr, "ERROR: cloning strings are not deduplicated\n");
  332. }
  333. else
  334. fprintf(stderr, "OK: cloning string are deduplicated\n");
  335. if(s1->refcount != 3) {
  336. errors++;
  337. fprintf(stderr, "ERROR: string refcount is not 3\n");
  338. }
  339. else
  340. fprintf(stderr, "OK: string refcount is 3\n");
  341. STRING *s4 = string_strdupz("world unittest");
  342. if(s4 == s1) {
  343. errors++;
  344. fprintf(stderr, "ERROR: string is sharing pointers on different strings\n");
  345. }
  346. else
  347. fprintf(stderr, "OK: string is properly handling different strings\n");
  348. usec_t start_ut, end_ut;
  349. STRING **strings = mallocz(entries * sizeof(STRING *));
  350. start_ut = now_realtime_usec();
  351. for(size_t i = 0; i < entries ;i++) {
  352. strings[i] = string_strdupz(names[i]);
  353. }
  354. end_ut = now_realtime_usec();
  355. fprintf(stderr, "Created %zu strings in %llu usecs\n", entries, end_ut - start_ut);
  356. start_ut = now_realtime_usec();
  357. for(size_t i = 0; i < entries ;i++) {
  358. strings[i] = string_dup(strings[i]);
  359. }
  360. end_ut = now_realtime_usec();
  361. fprintf(stderr, "Cloned %zu strings in %llu usecs\n", entries, end_ut - start_ut);
  362. start_ut = now_realtime_usec();
  363. for(size_t i = 0; i < entries ;i++) {
  364. strings[i] = string_strdupz(string2str(strings[i]));
  365. }
  366. end_ut = now_realtime_usec();
  367. fprintf(stderr, "Found %zu existing strings in %llu usecs\n", entries, end_ut - start_ut);
  368. start_ut = now_realtime_usec();
  369. for(size_t i = 0; i < entries ;i++) {
  370. string_freez(strings[i]);
  371. }
  372. end_ut = now_realtime_usec();
  373. fprintf(stderr, "Released %zu referenced strings in %llu usecs\n", entries, end_ut - start_ut);
  374. start_ut = now_realtime_usec();
  375. for(size_t i = 0; i < entries ;i++) {
  376. string_freez(strings[i]);
  377. }
  378. end_ut = now_realtime_usec();
  379. fprintf(stderr, "Released (again) %zu referenced strings in %llu usecs\n", entries, end_ut - start_ut);
  380. start_ut = now_realtime_usec();
  381. for(size_t i = 0; i < entries ;i++) {
  382. string_freez(strings[i]);
  383. }
  384. end_ut = now_realtime_usec();
  385. fprintf(stderr, "Freed %zu strings in %llu usecs\n", entries, end_ut - start_ut);
  386. freez(strings);
  387. if(string_base.entries != string_entries_starting + 2) {
  388. errors++;
  389. fprintf(stderr, "ERROR: strings dictionary should have %ld items but it has %ld\n", string_entries_starting + 2, string_base.entries);
  390. }
  391. else
  392. fprintf(stderr, "OK: strings dictionary has 2 items\n");
  393. }
  394. // check 2-way merge
  395. {
  396. struct testcase {
  397. char *src1; char *src2; char *expected;
  398. } tests[] = {
  399. { "", "", ""},
  400. { "a", "", "[x]"},
  401. { "", "a", "[x]"},
  402. { "a", "a", "a"},
  403. { "abcd", "abcd", "abcd"},
  404. { "foo_cs", "bar_cs", "[x]_cs"},
  405. { "cp_UNIQUE_INFIX_cs", "cp_unique_infix_cs", "cp_[x]_cs"},
  406. { "cp_UNIQUE_INFIX_ci_unique_infix_cs", "cp_unique_infix_ci_UNIQUE_INFIX_cs", "cp_[x]_cs"},
  407. { "foo[1234]", "foo[4321]", "foo[[x]]"},
  408. { NULL, NULL, NULL },
  409. };
  410. for (struct testcase *tc = &tests[0]; tc->expected != NULL; tc++) {
  411. STRING *src1 = string_strdupz(tc->src1);
  412. STRING *src2 = string_strdupz(tc->src2);
  413. STRING *expected = string_strdupz(tc->expected);
  414. STRING *result = string_2way_merge(src1, src2);
  415. if (string_cmp(result, expected) != 0) {
  416. fprintf(stderr, "string_2way_merge(\"%s\", \"%s\") -> \"%s\" (expected=\"%s\")\n",
  417. string2str(src1),
  418. string2str(src2),
  419. string2str(result),
  420. string2str(expected));
  421. errors++;
  422. }
  423. string_freez(src1);
  424. string_freez(src2);
  425. string_freez(expected);
  426. string_freez(result);
  427. }
  428. }
  429. // threads testing of string
  430. {
  431. struct thread_unittest tu = {
  432. .dups = 1,
  433. .join = 0,
  434. };
  435. #ifdef NETDATA_INTERNAL_CHECKS
  436. size_t ofound_deleted_on_search = string_base.found_deleted_on_search,
  437. ofound_available_on_search = string_base.found_available_on_search,
  438. ofound_deleted_on_insert = string_base.found_deleted_on_insert,
  439. ofound_available_on_insert = string_base.found_available_on_insert,
  440. ospins = string_base.spins;
  441. #endif
  442. size_t oinserts, odeletes, osearches, oentries, oreferences, omemory, oduplications, oreleases;
  443. string_statistics(&oinserts, &odeletes, &osearches, &oentries, &oreferences, &omemory, &oduplications, &oreleases);
  444. time_t seconds_to_run = 5;
  445. int threads_to_create = 2;
  446. fprintf(
  447. stderr,
  448. "Checking string concurrency with %d threads for %lld seconds...\n",
  449. threads_to_create,
  450. (long long)seconds_to_run);
  451. // check string concurrency
  452. netdata_thread_t threads[threads_to_create];
  453. tu.join = 0;
  454. for (int i = 0; i < threads_to_create; i++) {
  455. char buf[100 + 1];
  456. snprintf(buf, 100, "string%d", i);
  457. netdata_thread_create(
  458. &threads[i], buf, NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, string_thread, &tu);
  459. }
  460. sleep_usec(seconds_to_run * USEC_PER_SEC);
  461. __atomic_store_n(&tu.join, 1, __ATOMIC_RELAXED);
  462. for (int i = 0; i < threads_to_create; i++) {
  463. void *retval;
  464. netdata_thread_join(threads[i], &retval);
  465. }
  466. size_t inserts, deletes, searches, sentries, references, memory, duplications, releases;
  467. string_statistics(&inserts, &deletes, &searches, &sentries, &references, &memory, &duplications, &releases);
  468. fprintf(stderr, "inserts %zu, deletes %zu, searches %zu, entries %zu, references %zu, memory %zu, duplications %zu, releases %zu\n",
  469. inserts - oinserts, deletes - odeletes, searches - osearches, sentries - oentries, references - oreferences, memory - omemory, duplications - oduplications, releases - oreleases);
  470. #ifdef NETDATA_INTERNAL_CHECKS
  471. size_t found_deleted_on_search = string_base.found_deleted_on_search,
  472. found_available_on_search = string_base.found_available_on_search,
  473. found_deleted_on_insert = string_base.found_deleted_on_insert,
  474. found_available_on_insert = string_base.found_available_on_insert,
  475. spins = string_base.spins;
  476. fprintf(stderr, "on insert: %zu ok + %zu deleted\non search: %zu ok + %zu deleted\nspins: %zu\n",
  477. found_available_on_insert - ofound_available_on_insert,
  478. found_deleted_on_insert - ofound_deleted_on_insert,
  479. found_available_on_search - ofound_available_on_search,
  480. found_deleted_on_search - ofound_deleted_on_search,
  481. spins - ospins
  482. );
  483. #endif
  484. }
  485. string_unittest_free_char_pp(names, entries);
  486. fprintf(stderr, "\n%zu errors found\n", errors);
  487. return errors ? 1 : 0;
  488. }