string.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "../libnetdata.h"
  3. #include <Judy.h>
  4. typedef int32_t REFCOUNT;
  5. // ----------------------------------------------------------------------------
  6. // STRING implementation - dedup all STRING
  7. struct netdata_string {
  8. uint32_t length; // the string length including the terminating '\0'
  9. REFCOUNT refcount; // how many times this string is used
  10. // We use a signed number to be able to detect duplicate frees of a string.
  11. // If at any point this goes below zero, we have a duplicate free.
  12. const char str[]; // the string itself, is appended to this structure
  13. };
  14. static struct string_hashtable {
  15. Pvoid_t JudyHSArray; // the Judy array - hashtable
  16. netdata_rwlock_t rwlock; // the R/W lock to protect the Judy array
  17. long int entries; // the number of entries in the index
  18. long int active_references; // the number of active references alive
  19. long int memory; // the memory used, without the JudyHS index
  20. size_t inserts; // the number of successful inserts to the index
  21. size_t deletes; // the number of successful deleted from the index
  22. size_t searches; // the number of successful searches in the index
  23. size_t duplications; // when a string is referenced
  24. size_t releases; // when a string is unreferenced
  25. #ifdef NETDATA_INTERNAL_CHECKS
  26. // internal statistics
  27. size_t found_deleted_on_search;
  28. size_t found_available_on_search;
  29. size_t found_deleted_on_insert;
  30. size_t found_available_on_insert;
  31. size_t spins;
  32. #endif
  33. } string_base = {
  34. .JudyHSArray = NULL,
  35. .rwlock = NETDATA_RWLOCK_INITIALIZER,
  36. };
  37. #ifdef NETDATA_INTERNAL_CHECKS
  38. #define string_internal_stats_add(var, val) __atomic_add_fetch(&string_base.var, val, __ATOMIC_RELAXED)
  39. #else
  40. #define string_internal_stats_add(var, val) do {;} while(0)
  41. #endif
  42. #define string_stats_atomic_increment(var) __atomic_add_fetch(&string_base.var, 1, __ATOMIC_RELAXED)
  43. #define string_stats_atomic_decrement(var) __atomic_sub_fetch(&string_base.var, 1, __ATOMIC_RELAXED)
  44. void string_statistics(size_t *inserts, size_t *deletes, size_t *searches, size_t *entries, size_t *references, size_t *memory, size_t *duplications, size_t *releases) {
  45. if(inserts)
  46. *inserts = string_base.inserts;
  47. if(deletes)
  48. *deletes = string_base.deletes;
  49. if(searches)
  50. *searches = string_base.searches;
  51. if(entries)
  52. *entries = (size_t)string_base.entries;
  53. if(references)
  54. *references = (size_t)string_base.active_references;
  55. if(memory)
  56. *memory = (size_t)string_base.memory;
  57. if(duplications)
  58. *duplications = string_base.duplications;
  59. if(releases)
  60. *releases = string_base.releases;
  61. }
  62. #define string_entry_acquire(se) __atomic_add_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST);
  63. #define string_entry_release(se) __atomic_sub_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST);
  64. static inline bool string_entry_check_and_acquire(STRING *se) {
  65. REFCOUNT expected, desired, count = 0;
  66. expected = __atomic_load_n(&se->refcount, __ATOMIC_SEQ_CST);
  67. do {
  68. count++;
  69. if(expected <= 0) {
  70. // We cannot use this.
  71. // The reference counter reached value zero,
  72. // so another thread is deleting this.
  73. string_internal_stats_add(spins, count - 1);
  74. return false;
  75. }
  76. desired = expected + 1;
  77. } while(!__atomic_compare_exchange_n(&se->refcount, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
  78. string_internal_stats_add(spins, count - 1);
  79. // statistics
  80. // string_base.active_references is altered at the in string_strdupz() and string_freez()
  81. string_stats_atomic_increment(duplications);
  82. return true;
  83. }
  84. STRING *string_dup(STRING *string) {
  85. if(unlikely(!string)) return NULL;
  86. #ifdef NETDATA_INTERNAL_CHECKS
  87. if(unlikely(__atomic_load_n(&string->refcount, __ATOMIC_SEQ_CST) <= 0))
  88. fatal("STRING: tried to %s() a string that is freed (it has %d references).", __FUNCTION__, string->refcount);
  89. #endif
  90. string_entry_acquire(string);
  91. // statistics
  92. string_stats_atomic_increment(active_references);
  93. string_stats_atomic_increment(duplications);
  94. return string;
  95. }
  96. // Search the index and return an ACQUIRED string entry, or NULL
  97. static inline STRING *string_index_search(const char *str, size_t length) {
  98. STRING *string;
  99. // Find the string in the index
  100. // With a read-lock so that multiple readers can use the index concurrently.
  101. netdata_rwlock_rdlock(&string_base.rwlock);
  102. Pvoid_t *Rc;
  103. Rc = JudyHSGet(string_base.JudyHSArray, (void *)str, length);
  104. if(likely(Rc)) {
  105. // found in the hash table
  106. string = *Rc;
  107. if(string_entry_check_and_acquire(string)) {
  108. // we can use this entry
  109. string_internal_stats_add(found_available_on_search, 1);
  110. }
  111. else {
  112. // this entry is about to be deleted by another thread
  113. // do not touch it, let it go...
  114. string = NULL;
  115. string_internal_stats_add(found_deleted_on_search, 1);
  116. }
  117. }
  118. else {
  119. // not found in the hash table
  120. string = NULL;
  121. }
  122. string_stats_atomic_increment(searches);
  123. netdata_rwlock_unlock(&string_base.rwlock);
  124. return string;
  125. }
  126. // Insert a string to the index and return an ACQUIRED string entry,
  127. // or NULL if the call needs to be retried (a deleted entry with the same key is still in the index)
  128. // The returned entry is ACQUIRED, and it can either be:
  129. // 1. a new item inserted, or
  130. // 2. an item found in the index that is not currently deleted
  131. static inline STRING *string_index_insert(const char *str, size_t length) {
  132. STRING *string;
  133. netdata_rwlock_wrlock(&string_base.rwlock);
  134. STRING **ptr;
  135. {
  136. JError_t J_Error;
  137. Pvoid_t *Rc = JudyHSIns(&string_base.JudyHSArray, (void *)str, length, &J_Error);
  138. if (unlikely(Rc == PJERR)) {
  139. fatal(
  140. "STRING: Cannot insert entry with name '%s' to JudyHS, JU_ERRNO_* == %u, ID == %d",
  141. str,
  142. JU_ERRNO(&J_Error),
  143. JU_ERRID(&J_Error));
  144. }
  145. ptr = (STRING **)Rc;
  146. }
  147. if (likely(*ptr == 0)) {
  148. // a new item added to the index
  149. size_t mem_size = sizeof(STRING) + length;
  150. string = mallocz(mem_size);
  151. strcpy((char *)string->str, str);
  152. string->length = length;
  153. string->refcount = 1;
  154. *ptr = string;
  155. string_base.inserts++;
  156. string_base.entries++;
  157. string_base.memory += (long)(mem_size + JUDYHS_INDEX_SIZE_ESTIMATE(length));
  158. }
  159. else {
  160. // the item is already in the index
  161. string = *ptr;
  162. if(string_entry_check_and_acquire(string)) {
  163. // we can use this entry
  164. string_internal_stats_add(found_available_on_insert, 1);
  165. }
  166. else {
  167. // this entry is about to be deleted by another thread
  168. // do not touch it, let it go...
  169. string = NULL;
  170. string_internal_stats_add(found_deleted_on_insert, 1);
  171. }
  172. string_stats_atomic_increment(searches);
  173. }
  174. netdata_rwlock_unlock(&string_base.rwlock);
  175. return string;
  176. }
  177. // delete an entry from the index
  178. static inline void string_index_delete(STRING *string) {
  179. netdata_rwlock_wrlock(&string_base.rwlock);
  180. #ifdef NETDATA_INTERNAL_CHECKS
  181. if(unlikely(__atomic_load_n(&string->refcount, __ATOMIC_SEQ_CST) != 0))
  182. fatal("STRING: tried to delete a string at %s() that is already freed (it has %d references).", __FUNCTION__, string->refcount);
  183. #endif
  184. bool deleted = false;
  185. if (likely(string_base.JudyHSArray)) {
  186. JError_t J_Error;
  187. int ret = JudyHSDel(&string_base.JudyHSArray, (void *)string->str, string->length, &J_Error);
  188. if (unlikely(ret == JERR)) {
  189. error(
  190. "STRING: Cannot delete entry with name '%s' from JudyHS, JU_ERRNO_* == %u, ID == %d",
  191. string->str,
  192. JU_ERRNO(&J_Error),
  193. JU_ERRID(&J_Error));
  194. } else
  195. deleted = true;
  196. }
  197. if (unlikely(!deleted))
  198. error("STRING: tried to delete '%s' that is not in the index. Ignoring it.", string->str);
  199. else {
  200. size_t mem_size = sizeof(STRING) + string->length;
  201. string_base.deletes++;
  202. string_base.entries--;
  203. string_base.memory -= (long)(mem_size + JUDYHS_INDEX_SIZE_ESTIMATE(string->length));
  204. freez(string);
  205. }
  206. netdata_rwlock_unlock(&string_base.rwlock);
  207. }
  208. STRING *string_strdupz(const char *str) {
  209. if(unlikely(!str || !*str)) return NULL;
  210. size_t length = strlen(str) + 1;
  211. STRING *string = string_index_search(str, length);
  212. while(!string) {
  213. // The search above did not find anything,
  214. // We loop here, because during insert we may find an entry that is being deleted by another thread.
  215. // So, we have to let it go and retry to insert it again.
  216. string = string_index_insert(str, length);
  217. }
  218. // statistics
  219. string_stats_atomic_increment(active_references);
  220. return string;
  221. }
  222. void string_freez(STRING *string) {
  223. if(unlikely(!string)) return;
  224. REFCOUNT refcount = string_entry_release(string);
  225. #ifdef NETDATA_INTERNAL_CHECKS
  226. if(unlikely(refcount < 0))
  227. fatal("STRING: tried to %s() a string that is already freed (it has %d references).", __FUNCTION__, string->refcount);
  228. #endif
  229. if(unlikely(refcount == 0))
  230. string_index_delete(string);
  231. // statistics
  232. string_stats_atomic_decrement(active_references);
  233. string_stats_atomic_increment(releases);
  234. }
  235. inline size_t string_strlen(STRING *string) {
  236. if(unlikely(!string)) return 0;
  237. return string->length - 1;
  238. }
  239. inline const char *string2str(STRING *string) {
  240. if(unlikely(!string)) return "";
  241. return string->str;
  242. }
  243. int string_strcmp(STRING *string, const char *s) {
  244. return strcmp(string2str(string), s);
  245. }
  246. STRING *string_2way_merge(STRING *a, STRING *b) {
  247. static STRING *X = NULL;
  248. if(unlikely(!X)) {
  249. X = string_strdupz("[x]");
  250. }
  251. if(unlikely(a == b)) return string_dup(a);
  252. if(unlikely(a == X)) return string_dup(a);
  253. if(unlikely(b == X)) return string_dup(b);
  254. if(unlikely(!a)) return string_dup(X);
  255. if(unlikely(!b)) return string_dup(X);
  256. size_t alen = string_strlen(a);
  257. size_t blen = string_strlen(b);
  258. size_t length = alen + blen + string_strlen(X) + 1;
  259. char buf1[length + 1], buf2[length + 1], *dst1;
  260. const char *s1, *s2;
  261. s1 = string2str(a);
  262. s2 = string2str(b);
  263. dst1 = buf1;
  264. for( ; *s1 && *s2 && *s1 == *s2 ;s1++, s2++)
  265. *dst1++ = *s1;
  266. *dst1 = '\0';
  267. if(*s1 != '\0' || *s2 != '\0') {
  268. *dst1++ = '[';
  269. *dst1++ = 'x';
  270. *dst1++ = ']';
  271. s1 = &(string2str(a))[alen - 1];
  272. s2 = &(string2str(b))[blen - 1];
  273. char *dst2 = &buf2[length];
  274. *dst2 = '\0';
  275. for (; *s1 && *s2 && *s1 == *s2; s1--, s2--)
  276. *(--dst2) = *s1;
  277. strcpy(dst1, dst2);
  278. }
  279. return string_strdupz(buf1);
  280. }
  281. // ----------------------------------------------------------------------------
  282. // STRING unit test
  283. struct thread_unittest {
  284. int join;
  285. int dups;
  286. };
  287. static void *string_thread(void *arg) {
  288. struct thread_unittest *tu = arg;
  289. for(; 1 ;) {
  290. if(__atomic_load_n(&tu->join, __ATOMIC_RELAXED))
  291. break;
  292. STRING *s = string_strdupz("string thread checking 1234567890");
  293. for(int i = 0; i < tu->dups ; i++)
  294. string_dup(s);
  295. for(int i = 0; i < tu->dups ; i++)
  296. string_freez(s);
  297. string_freez(s);
  298. }
  299. return arg;
  300. }
  301. static char **string_unittest_generate_names(size_t entries) {
  302. char **names = mallocz(sizeof(char *) * entries);
  303. for(size_t i = 0; i < entries ;i++) {
  304. char buf[25 + 1] = "";
  305. snprintfz(buf, 25, "name.%zu.0123456789.%zu \t !@#$%%^&*(),./[]{}\\|~`", i, entries / 2 + i);
  306. names[i] = strdupz(buf);
  307. }
  308. return names;
  309. }
  310. static void string_unittest_free_char_pp(char **pp, size_t entries) {
  311. for(size_t i = 0; i < entries ;i++)
  312. freez(pp[i]);
  313. freez(pp);
  314. }
  315. int string_unittest(size_t entries) {
  316. size_t errors = 0;
  317. fprintf(stderr, "Generating %zu names and values...\n", entries);
  318. char **names = string_unittest_generate_names(entries);
  319. // check string
  320. {
  321. long int string_entries_starting = string_base.entries;
  322. fprintf(stderr, "\nChecking strings...\n");
  323. STRING *s1 = string_strdupz("hello unittest");
  324. STRING *s2 = string_strdupz("hello unittest");
  325. if(s1 != s2) {
  326. errors++;
  327. fprintf(stderr, "ERROR: duplicating strings are not deduplicated\n");
  328. }
  329. else
  330. fprintf(stderr, "OK: duplicating string are deduplicated\n");
  331. STRING *s3 = string_dup(s1);
  332. if(s3 != s1) {
  333. errors++;
  334. fprintf(stderr, "ERROR: cloning strings are not deduplicated\n");
  335. }
  336. else
  337. fprintf(stderr, "OK: cloning string are deduplicated\n");
  338. if(s1->refcount != 3) {
  339. errors++;
  340. fprintf(stderr, "ERROR: string refcount is not 3\n");
  341. }
  342. else
  343. fprintf(stderr, "OK: string refcount is 3\n");
  344. STRING *s4 = string_strdupz("world unittest");
  345. if(s4 == s1) {
  346. errors++;
  347. fprintf(stderr, "ERROR: string is sharing pointers on different strings\n");
  348. }
  349. else
  350. fprintf(stderr, "OK: string is properly handling different strings\n");
  351. usec_t start_ut, end_ut;
  352. STRING **strings = mallocz(entries * sizeof(STRING *));
  353. start_ut = now_realtime_usec();
  354. for(size_t i = 0; i < entries ;i++) {
  355. strings[i] = string_strdupz(names[i]);
  356. }
  357. end_ut = now_realtime_usec();
  358. fprintf(stderr, "Created %zu strings in %llu usecs\n", entries, end_ut - start_ut);
  359. start_ut = now_realtime_usec();
  360. for(size_t i = 0; i < entries ;i++) {
  361. strings[i] = string_dup(strings[i]);
  362. }
  363. end_ut = now_realtime_usec();
  364. fprintf(stderr, "Cloned %zu strings in %llu usecs\n", entries, end_ut - start_ut);
  365. start_ut = now_realtime_usec();
  366. for(size_t i = 0; i < entries ;i++) {
  367. strings[i] = string_strdupz(string2str(strings[i]));
  368. }
  369. end_ut = now_realtime_usec();
  370. fprintf(stderr, "Found %zu existing strings in %llu usecs\n", entries, end_ut - start_ut);
  371. start_ut = now_realtime_usec();
  372. for(size_t i = 0; i < entries ;i++) {
  373. string_freez(strings[i]);
  374. }
  375. end_ut = now_realtime_usec();
  376. fprintf(stderr, "Released %zu referenced strings in %llu usecs\n", entries, end_ut - start_ut);
  377. start_ut = now_realtime_usec();
  378. for(size_t i = 0; i < entries ;i++) {
  379. string_freez(strings[i]);
  380. }
  381. end_ut = now_realtime_usec();
  382. fprintf(stderr, "Released (again) %zu referenced strings in %llu usecs\n", entries, end_ut - start_ut);
  383. start_ut = now_realtime_usec();
  384. for(size_t i = 0; i < entries ;i++) {
  385. string_freez(strings[i]);
  386. }
  387. end_ut = now_realtime_usec();
  388. fprintf(stderr, "Freed %zu strings in %llu usecs\n", entries, end_ut - start_ut);
  389. freez(strings);
  390. if(string_base.entries != string_entries_starting + 2) {
  391. errors++;
  392. fprintf(stderr, "ERROR: strings dictionary should have %ld items but it has %ld\n", string_entries_starting + 2, string_base.entries);
  393. }
  394. else
  395. fprintf(stderr, "OK: strings dictionary has 2 items\n");
  396. }
  397. // check 2-way merge
  398. {
  399. struct testcase {
  400. char *src1; char *src2; char *expected;
  401. } tests[] = {
  402. { "", "", ""},
  403. { "a", "", "[x]"},
  404. { "", "a", "[x]"},
  405. { "a", "a", "a"},
  406. { "abcd", "abcd", "abcd"},
  407. { "foo_cs", "bar_cs", "[x]_cs"},
  408. { "cp_UNIQUE_INFIX_cs", "cp_unique_infix_cs", "cp_[x]_cs"},
  409. { "cp_UNIQUE_INFIX_ci_unique_infix_cs", "cp_unique_infix_ci_UNIQUE_INFIX_cs", "cp_[x]_cs"},
  410. { "foo[1234]", "foo[4321]", "foo[[x]]"},
  411. { NULL, NULL, NULL },
  412. };
  413. for (struct testcase *tc = &tests[0]; tc->expected != NULL; tc++) {
  414. STRING *src1 = string_strdupz(tc->src1);
  415. STRING *src2 = string_strdupz(tc->src2);
  416. STRING *expected = string_strdupz(tc->expected);
  417. STRING *result = string_2way_merge(src1, src2);
  418. if (string_cmp(result, expected) != 0) {
  419. fprintf(stderr, "string_2way_merge(\"%s\", \"%s\") -> \"%s\" (expected=\"%s\")\n",
  420. string2str(src1),
  421. string2str(src2),
  422. string2str(result),
  423. string2str(expected));
  424. errors++;
  425. }
  426. string_freez(src1);
  427. string_freez(src2);
  428. string_freez(expected);
  429. string_freez(result);
  430. }
  431. }
  432. // threads testing of string
  433. {
  434. struct thread_unittest tu = {
  435. .dups = 1,
  436. .join = 0,
  437. };
  438. #ifdef NETDATA_INTERNAL_CHECKS
  439. size_t ofound_deleted_on_search = string_base.found_deleted_on_search,
  440. ofound_available_on_search = string_base.found_available_on_search,
  441. ofound_deleted_on_insert = string_base.found_deleted_on_insert,
  442. ofound_available_on_insert = string_base.found_available_on_insert,
  443. ospins = string_base.spins;
  444. #endif
  445. size_t oinserts, odeletes, osearches, oentries, oreferences, omemory, oduplications, oreleases;
  446. string_statistics(&oinserts, &odeletes, &osearches, &oentries, &oreferences, &omemory, &oduplications, &oreleases);
  447. time_t seconds_to_run = 5;
  448. int threads_to_create = 2;
  449. fprintf(
  450. stderr,
  451. "Checking string concurrency with %d threads for %lld seconds...\n",
  452. threads_to_create,
  453. (long long)seconds_to_run);
  454. // check string concurrency
  455. netdata_thread_t threads[threads_to_create];
  456. tu.join = 0;
  457. for (int i = 0; i < threads_to_create; i++) {
  458. char buf[100 + 1];
  459. snprintf(buf, 100, "string%d", i);
  460. netdata_thread_create(
  461. &threads[i], buf, NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, string_thread, &tu);
  462. }
  463. sleep_usec(seconds_to_run * USEC_PER_SEC);
  464. __atomic_store_n(&tu.join, 1, __ATOMIC_RELAXED);
  465. for (int i = 0; i < threads_to_create; i++) {
  466. void *retval;
  467. netdata_thread_join(threads[i], &retval);
  468. }
  469. size_t inserts, deletes, searches, sentries, references, memory, duplications, releases;
  470. string_statistics(&inserts, &deletes, &searches, &sentries, &references, &memory, &duplications, &releases);
  471. fprintf(stderr, "inserts %zu, deletes %zu, searches %zu, entries %zu, references %zu, memory %zu, duplications %zu, releases %zu\n",
  472. inserts - oinserts, deletes - odeletes, searches - osearches, sentries - oentries, references - oreferences, memory - omemory, duplications - oduplications, releases - oreleases);
  473. #ifdef NETDATA_INTERNAL_CHECKS
  474. size_t found_deleted_on_search = string_base.found_deleted_on_search,
  475. found_available_on_search = string_base.found_available_on_search,
  476. found_deleted_on_insert = string_base.found_deleted_on_insert,
  477. found_available_on_insert = string_base.found_available_on_insert,
  478. spins = string_base.spins;
  479. fprintf(stderr, "on insert: %zu ok + %zu deleted\non search: %zu ok + %zu deleted\nspins: %zu\n",
  480. found_available_on_insert - ofound_available_on_insert,
  481. found_deleted_on_insert - ofound_deleted_on_insert,
  482. found_available_on_search - ofound_available_on_search,
  483. found_deleted_on_search - ofound_deleted_on_search,
  484. spins - ospins
  485. );
  486. #endif
  487. }
  488. string_unittest_free_char_pp(names, entries);
  489. fprintf(stderr, "\n%zu errors found\n", errors);
  490. return errors ? 1 : 0;
  491. }