ebpf_mdflush.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "ebpf.h"
  3. #include "ebpf_mdflush.h"
  4. struct config mdflush_config = { .first_section = NULL,
  5. .last_section = NULL,
  6. .mutex = NETDATA_MUTEX_INITIALIZER,
  7. .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
  8. .rwlock = AVL_LOCK_INITIALIZER } };
  9. #define MDFLUSH_MAP_COUNT 0
  10. static ebpf_local_maps_t mdflush_maps[] = {
  11. {
  12. .name = "tbl_mdflush",
  13. .internal_input = 1024,
  14. .user_input = 0,
  15. .type = NETDATA_EBPF_MAP_STATIC,
  16. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED,
  17. #ifdef LIBBPF_MAJOR_VERSION
  18. .map_type = BPF_MAP_TYPE_PERCPU_HASH
  19. #endif
  20. },
  21. /* end */
  22. {
  23. .name = NULL,
  24. .internal_input = 0,
  25. .user_input = 0,
  26. .type = NETDATA_EBPF_MAP_CONTROLLER,
  27. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED
  28. }
  29. };
  30. // store for "published" data from the reader thread, which the collector
  31. // thread will write to netdata agent.
  32. static avl_tree_lock mdflush_pub;
  33. // tmp store for mdflush values we get from a per-CPU eBPF map.
  34. static mdflush_ebpf_val_t *mdflush_ebpf_vals = NULL;
  35. /**
  36. * MDflush exit
  37. *
  38. * Cancel thread and exit.
  39. *
  40. * @param ptr thread data.
  41. */
  42. static void mdflush_exit(void *ptr)
  43. {
  44. ebpf_module_t *em = (ebpf_module_t *)ptr;
  45. if (em->objects)
  46. ebpf_unload_legacy_code(em->objects, em->probe_links);
  47. pthread_mutex_lock(&ebpf_exit_cleanup);
  48. em->enabled = NETDATA_THREAD_EBPF_STOPPED;
  49. pthread_mutex_unlock(&ebpf_exit_cleanup);
  50. }
  51. /**
  52. * Compare mdflush values.
  53. *
  54. * @param a `netdata_mdflush_t *`.
  55. * @param b `netdata_mdflush_t *`.
  56. *
  57. * @return 0 if a==b, 1 if a>b, -1 if a<b.
  58. */
  59. static int mdflush_val_cmp(void *a, void *b)
  60. {
  61. netdata_mdflush_t *ptr1 = a;
  62. netdata_mdflush_t *ptr2 = b;
  63. if (ptr1->unit > ptr2->unit) {
  64. return 1;
  65. }
  66. else if (ptr1->unit < ptr2->unit) {
  67. return -1;
  68. }
  69. else {
  70. return 0;
  71. }
  72. }
  73. /**
  74. * Read count map
  75. *
  76. * Read the hash table and store data to allocated vectors.
  77. *
  78. * @param maps_per_core do I need to read all cores?
  79. */
  80. static void mdflush_read_count_map(int maps_per_core)
  81. {
  82. int mapfd = mdflush_maps[MDFLUSH_MAP_COUNT].map_fd;
  83. mdflush_ebpf_key_t curr_key = (uint32_t)-1;
  84. mdflush_ebpf_key_t key = (uint32_t)-1;
  85. netdata_mdflush_t search_v;
  86. netdata_mdflush_t *v = NULL;
  87. while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) {
  88. curr_key = key;
  89. // get val for this key.
  90. int test = bpf_map_lookup_elem(mapfd, &key, mdflush_ebpf_vals);
  91. if (unlikely(test < 0)) {
  92. continue;
  93. }
  94. // is this record saved yet?
  95. //
  96. // if not, make a new one, mark it as unsaved for now, and continue; we
  97. // will insert it at the end after all of its values are correctly set,
  98. // so that we can safely publish it to the collector within a single,
  99. // short locked operation.
  100. //
  101. // otherwise simply continue; we will only update the flush count,
  102. // which can be republished safely without a lock.
  103. //
  104. // NOTE: lock isn't strictly necessary for this initial search, as only
  105. // this thread does writing, but the AVL is using a read-write lock so
  106. // there is no congestion.
  107. bool v_is_new = false;
  108. search_v.unit = key;
  109. v = (netdata_mdflush_t *)avl_search_lock(
  110. &mdflush_pub,
  111. (avl_t *)&search_v
  112. );
  113. if (unlikely(v == NULL)) {
  114. // flush count can only be added reliably at a later time.
  115. // when they're added, only then will we AVL insert.
  116. v = callocz(1, sizeof(netdata_mdflush_t));
  117. v->unit = key;
  118. sprintf(v->disk_name, "md%u", key);
  119. v->dim_exists = false;
  120. v_is_new = true;
  121. }
  122. // we must add up count value for this record across all CPUs.
  123. uint64_t total_cnt = 0;
  124. int i;
  125. int end = (!maps_per_core) ? 1 : ebpf_nprocs;
  126. for (i = 0; i < end; i++) {
  127. total_cnt += mdflush_ebpf_vals[i];
  128. }
  129. // can now safely publish count for existing records.
  130. v->cnt = total_cnt;
  131. // can now safely publish new record.
  132. if (v_is_new) {
  133. avl_t *check = avl_insert_lock(&mdflush_pub, (avl_t *)v);
  134. if (check != (avl_t *)v) {
  135. error("Internal error, cannot insert the AVL tree.");
  136. }
  137. }
  138. }
  139. }
  140. static void mdflush_create_charts(int update_every)
  141. {
  142. ebpf_create_chart(
  143. "mdstat",
  144. "mdstat_flush",
  145. "MD flushes",
  146. "flushes",
  147. "flush (eBPF)",
  148. "md.flush",
  149. NETDATA_EBPF_CHART_TYPE_STACKED,
  150. NETDATA_CHART_PRIO_MDSTAT_FLUSH,
  151. NULL, NULL, 0, update_every,
  152. NETDATA_EBPF_MODULE_NAME_MDFLUSH
  153. );
  154. fflush(stdout);
  155. }
  156. // callback for avl tree traversal on `mdflush_pub`.
  157. static int mdflush_write_dims(void *entry, void *data)
  158. {
  159. UNUSED(data);
  160. netdata_mdflush_t *v = entry;
  161. // records get dynamically added in, so add the dim if we haven't yet.
  162. if (!v->dim_exists) {
  163. ebpf_write_global_dimension(
  164. v->disk_name, v->disk_name,
  165. ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]
  166. );
  167. v->dim_exists = true;
  168. }
  169. write_chart_dimension(v->disk_name, v->cnt);
  170. return 1;
  171. }
  172. /**
  173. * Main loop for this collector.
  174. */
  175. static void mdflush_collector(ebpf_module_t *em)
  176. {
  177. mdflush_ebpf_vals = callocz(ebpf_nprocs, sizeof(mdflush_ebpf_val_t));
  178. int update_every = em->update_every;
  179. avl_init_lock(&mdflush_pub, mdflush_val_cmp);
  180. // create chart and static dims.
  181. pthread_mutex_lock(&lock);
  182. mdflush_create_charts(update_every);
  183. ebpf_update_stats(&plugin_statistics, em);
  184. ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps);
  185. pthread_mutex_unlock(&lock);
  186. // loop and read from published data until ebpf plugin is closed.
  187. heartbeat_t hb;
  188. heartbeat_init(&hb);
  189. int counter = update_every - 1;
  190. int maps_per_core = em->maps_per_core;
  191. while (!ebpf_exit_plugin) {
  192. (void)heartbeat_next(&hb, USEC_PER_SEC);
  193. if (ebpf_exit_plugin || ++counter != update_every)
  194. continue;
  195. counter = 0;
  196. mdflush_read_count_map(maps_per_core);
  197. pthread_mutex_lock(&lock);
  198. // write dims now for all hitherto discovered devices.
  199. write_begin_chart("mdstat", "mdstat_flush");
  200. avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL);
  201. write_end_chart();
  202. pthread_mutex_unlock(&lock);
  203. }
  204. }
  205. /**
  206. * mdflush thread.
  207. *
  208. * @param ptr a `ebpf_module_t *`.
  209. * @return always NULL.
  210. */
  211. void *ebpf_mdflush_thread(void *ptr)
  212. {
  213. netdata_thread_cleanup_push(mdflush_exit, ptr);
  214. ebpf_module_t *em = (ebpf_module_t *)ptr;
  215. em->maps = mdflush_maps;
  216. char *md_flush_request = ebpf_find_symbol("md_flush_request");
  217. if (!md_flush_request) {
  218. error("Cannot monitor MD devices, because md is not loaded.");
  219. goto endmdflush;
  220. }
  221. #ifdef LIBBPF_MAJOR_VERSION
  222. ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel);
  223. #endif
  224. em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects);
  225. if (!em->probe_links) {
  226. goto endmdflush;
  227. }
  228. mdflush_collector(em);
  229. endmdflush:
  230. freez(md_flush_request);
  231. ebpf_update_disabled_plugin_stats(em);
  232. netdata_thread_cleanup_pop(1);
  233. return NULL;
  234. }