ebpf_mdflush.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "ebpf.h"
  3. #include "ebpf_mdflush.h"
  4. struct config mdflush_config = { .first_section = NULL,
  5. .last_section = NULL,
  6. .mutex = NETDATA_MUTEX_INITIALIZER,
  7. .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
  8. .rwlock = AVL_LOCK_INITIALIZER } };
  9. #define MDFLUSH_MAP_COUNT 0
  10. static ebpf_local_maps_t mdflush_maps[] = {
  11. {
  12. .name = "tbl_mdflush",
  13. .internal_input = 1024,
  14. .user_input = 0,
  15. .type = NETDATA_EBPF_MAP_STATIC,
  16. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED
  17. },
  18. /* end */
  19. {
  20. .name = NULL,
  21. .internal_input = 0,
  22. .user_input = 0,
  23. .type = NETDATA_EBPF_MAP_CONTROLLER,
  24. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED
  25. }
  26. };
  27. // store for "published" data from the reader thread, which the collector
  28. // thread will write to netdata agent.
  29. static avl_tree_lock mdflush_pub;
  30. // tmp store for mdflush values we get from a per-CPU eBPF map.
  31. static mdflush_ebpf_val_t *mdflush_ebpf_vals = NULL;
  32. static struct bpf_link **probe_links = NULL;
  33. static struct bpf_object *objects = NULL;
  34. static int read_thread_closed = 1;
  35. static struct netdata_static_thread mdflush_threads = {"MDFLUSH KERNEL",
  36. NULL, NULL, 1, NULL,
  37. NULL, NULL };
  38. /**
  39. * Clean up the main thread.
  40. *
  41. * @param ptr thread data.
  42. */
  43. static void mdflush_cleanup(void *ptr)
  44. {
  45. ebpf_module_t *em = (ebpf_module_t *)ptr;
  46. if (!em->enabled) {
  47. return;
  48. }
  49. heartbeat_t hb;
  50. heartbeat_init(&hb);
  51. uint32_t tick = 1 * USEC_PER_MS;
  52. while (!read_thread_closed) {
  53. usec_t dt = heartbeat_next(&hb, tick);
  54. UNUSED(dt);
  55. }
  56. freez(mdflush_ebpf_vals);
  57. freez(mdflush_threads.thread);
  58. if (probe_links) {
  59. struct bpf_program *prog;
  60. size_t i = 0 ;
  61. bpf_object__for_each_program(prog, objects) {
  62. bpf_link__destroy(probe_links[i]);
  63. i++;
  64. }
  65. bpf_object__close(objects);
  66. }
  67. }
  68. /**
  69. * Compare mdflush values.
  70. *
  71. * @param a `netdata_mdflush_t *`.
  72. * @param b `netdata_mdflush_t *`.
  73. *
  74. * @return 0 if a==b, 1 if a>b, -1 if a<b.
  75. */
  76. static int mdflush_val_cmp(void *a, void *b)
  77. {
  78. netdata_mdflush_t *ptr1 = a;
  79. netdata_mdflush_t *ptr2 = b;
  80. if (ptr1->unit > ptr2->unit) {
  81. return 1;
  82. }
  83. else if (ptr1->unit < ptr2->unit) {
  84. return -1;
  85. }
  86. else {
  87. return 0;
  88. }
  89. }
  90. static void mdflush_read_count_map()
  91. {
  92. int mapfd = mdflush_maps[MDFLUSH_MAP_COUNT].map_fd;
  93. mdflush_ebpf_key_t curr_key = (uint32_t)-1;
  94. mdflush_ebpf_key_t key = (uint32_t)-1;
  95. netdata_mdflush_t search_v;
  96. netdata_mdflush_t *v = NULL;
  97. while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) {
  98. curr_key = key;
  99. // get val for this key.
  100. int test = bpf_map_lookup_elem(mapfd, &key, mdflush_ebpf_vals);
  101. if (unlikely(test < 0)) {
  102. continue;
  103. }
  104. // is this record saved yet?
  105. //
  106. // if not, make a new one, mark it as unsaved for now, and continue; we
  107. // will insert it at the end after all of its values are correctly set,
  108. // so that we can safely publish it to the collector within a single,
  109. // short locked operation.
  110. //
  111. // otherwise simply continue; we will only update the flush count,
  112. // which can be republished safely without a lock.
  113. //
  114. // NOTE: lock isn't strictly necessary for this initial search, as only
  115. // this thread does writing, but the AVL is using a read-write lock so
  116. // there is no congestion.
  117. bool v_is_new = false;
  118. search_v.unit = key;
  119. v = (netdata_mdflush_t *)avl_search_lock(
  120. &mdflush_pub,
  121. (avl_t *)&search_v
  122. );
  123. if (unlikely(v == NULL)) {
  124. // flush count can only be added reliably at a later time.
  125. // when they're added, only then will we AVL insert.
  126. v = callocz(1, sizeof(netdata_mdflush_t));
  127. v->unit = key;
  128. sprintf(v->disk_name, "md%u", key);
  129. v->dim_exists = false;
  130. v_is_new = true;
  131. }
  132. // we must add up count value for this record across all CPUs.
  133. uint64_t total_cnt = 0;
  134. int i;
  135. int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs;
  136. for (i = 0; i < end; i++) {
  137. total_cnt += mdflush_ebpf_vals[i];
  138. }
  139. // can now safely publish count for existing records.
  140. v->cnt = total_cnt;
  141. // can now safely publish new record.
  142. if (v_is_new) {
  143. avl_t *check = avl_insert_lock(&mdflush_pub, (avl_t *)v);
  144. if (check != (avl_t *)v) {
  145. error("Internal error, cannot insert the AVL tree.");
  146. }
  147. }
  148. }
  149. }
  150. /**
  151. * Read eBPF maps for mdflush.
  152. */
  153. static void *mdflush_reader(void *ptr)
  154. {
  155. read_thread_closed = 0;
  156. heartbeat_t hb;
  157. heartbeat_init(&hb);
  158. ebpf_module_t *em = (ebpf_module_t *)ptr;
  159. usec_t step = NETDATA_MDFLUSH_SLEEP_MS * em->update_every;
  160. while (!close_ebpf_plugin) {
  161. usec_t dt = heartbeat_next(&hb, step);
  162. UNUSED(dt);
  163. mdflush_read_count_map();
  164. }
  165. read_thread_closed = 1;
  166. return NULL;
  167. }
  168. static void mdflush_create_charts(int update_every)
  169. {
  170. ebpf_create_chart(
  171. "mdstat",
  172. "mdstat_flush",
  173. "MD flushes",
  174. "flushes",
  175. "flush (eBPF)",
  176. "md.flush",
  177. NETDATA_EBPF_CHART_TYPE_STACKED,
  178. NETDATA_CHART_PRIO_MDSTAT_FLUSH,
  179. NULL, NULL, 0, update_every,
  180. NETDATA_EBPF_MODULE_NAME_MDFLUSH
  181. );
  182. fflush(stdout);
  183. }
  184. // callback for avl tree traversal on `mdflush_pub`.
  185. static int mdflush_write_dims(void *entry, void *data)
  186. {
  187. UNUSED(data);
  188. netdata_mdflush_t *v = entry;
  189. // records get dynamically added in, so add the dim if we haven't yet.
  190. if (!v->dim_exists) {
  191. ebpf_write_global_dimension(
  192. v->disk_name, v->disk_name,
  193. ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]
  194. );
  195. v->dim_exists = true;
  196. }
  197. write_chart_dimension(v->disk_name, v->cnt);
  198. return 1;
  199. }
  200. /**
  201. * Main loop for this collector.
  202. */
  203. static void mdflush_collector(ebpf_module_t *em)
  204. {
  205. mdflush_ebpf_vals = callocz(ebpf_nprocs, sizeof(mdflush_ebpf_val_t));
  206. avl_init_lock(&mdflush_pub, mdflush_val_cmp);
  207. // create reader thread.
  208. mdflush_threads.thread = mallocz(sizeof(netdata_thread_t));
  209. mdflush_threads.start_routine = mdflush_reader;
  210. netdata_thread_create(
  211. mdflush_threads.thread,
  212. mdflush_threads.name,
  213. NETDATA_THREAD_OPTION_JOINABLE,
  214. mdflush_reader,
  215. em
  216. );
  217. // create chart and static dims.
  218. pthread_mutex_lock(&lock);
  219. mdflush_create_charts(em->update_every);
  220. ebpf_update_stats(&plugin_statistics, em);
  221. pthread_mutex_unlock(&lock);
  222. // loop and read from published data until ebpf plugin is closed.
  223. int update_every = em->update_every;
  224. int counter = update_every - 1;
  225. while (!close_ebpf_plugin) {
  226. pthread_mutex_lock(&collect_data_mutex);
  227. pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex);
  228. if (++counter == update_every) {
  229. counter = 0;
  230. pthread_mutex_lock(&lock);
  231. // write dims now for all hitherto discovered devices.
  232. write_begin_chart("mdstat", "mdstat_flush");
  233. avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL);
  234. write_end_chart();
  235. pthread_mutex_unlock(&lock);
  236. }
  237. pthread_mutex_unlock(&collect_data_mutex);
  238. }
  239. }
  240. /**
  241. * mdflush thread.
  242. *
  243. * @param ptr a `ebpf_module_t *`.
  244. * @return always NULL.
  245. */
  246. void *ebpf_mdflush_thread(void *ptr)
  247. {
  248. netdata_thread_cleanup_push(mdflush_cleanup, ptr);
  249. ebpf_module_t *em = (ebpf_module_t *)ptr;
  250. em->maps = mdflush_maps;
  251. char *md_flush_request = ebpf_find_symbol("md_flush_request");
  252. if (!md_flush_request) {
  253. em->enabled = CONFIG_BOOLEAN_NO;
  254. error("Cannot monitor MD devices, because md is not loaded.");
  255. }
  256. freez(md_flush_request);
  257. if (!em->enabled) {
  258. goto endmdflush;
  259. }
  260. probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects);
  261. if (!probe_links) {
  262. em->enabled = CONFIG_BOOLEAN_NO;
  263. goto endmdflush;
  264. }
  265. mdflush_collector(em);
  266. endmdflush:
  267. if (!em->enabled)
  268. ebpf_update_disabled_plugin_stats(em);
  269. netdata_thread_cleanup_pop(1);
  270. return NULL;
  271. }