ebpf_mdflush.c 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "ebpf.h"
  3. #include "ebpf_mdflush.h"
  4. struct config mdflush_config = { .first_section = NULL,
  5. .last_section = NULL,
  6. .mutex = NETDATA_MUTEX_INITIALIZER,
  7. .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
  8. .rwlock = AVL_LOCK_INITIALIZER } };
  9. #define MDFLUSH_MAP_COUNT 0
  10. static ebpf_local_maps_t mdflush_maps[] = {
  11. {
  12. .name = "tbl_mdflush",
  13. .internal_input = 1024,
  14. .user_input = 0,
  15. .type = NETDATA_EBPF_MAP_STATIC,
  16. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED
  17. },
  18. /* end */
  19. {
  20. .name = NULL,
  21. .internal_input = 0,
  22. .user_input = 0,
  23. .type = NETDATA_EBPF_MAP_CONTROLLER,
  24. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED
  25. }
  26. };
  27. // store for "published" data from the reader thread, which the collector
  28. // thread will write to netdata agent.
  29. static avl_tree_lock mdflush_pub;
  30. // tmp store for mdflush values we get from a per-CPU eBPF map.
  31. static mdflush_ebpf_val_t *mdflush_ebpf_vals = NULL;
  32. static struct netdata_static_thread mdflush_threads = {
  33. .name = "MDFLUSH KERNEL",
  34. .config_section = NULL,
  35. .config_name = NULL,
  36. .env_name = NULL,
  37. .enabled = 1,
  38. .thread = NULL,
  39. .init_routine = NULL,
  40. .start_routine = NULL
  41. };
  42. /**
  43. * MDflush Free
  44. *
  45. * Cleanup variables after child threads to stop
  46. *
  47. * @param ptr thread data.
  48. */
  49. static void ebpf_mdflush_free(ebpf_module_t *em)
  50. {
  51. pthread_mutex_lock(&ebpf_exit_cleanup);
  52. if (em->thread->enabled == NETDATA_THREAD_EBPF_RUNNING) {
  53. em->thread->enabled = NETDATA_THREAD_EBPF_STOPPING;
  54. pthread_mutex_unlock(&ebpf_exit_cleanup);
  55. return;
  56. }
  57. pthread_mutex_unlock(&ebpf_exit_cleanup);
  58. freez(mdflush_ebpf_vals);
  59. freez(mdflush_threads.thread);
  60. em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED;
  61. }
  62. /**
  63. * MDflush exit
  64. *
  65. * Cancel thread and exit.
  66. *
  67. * @param ptr thread data.
  68. */
  69. static void mdflush_exit(void *ptr)
  70. {
  71. ebpf_module_t *em = (ebpf_module_t *)ptr;
  72. ebpf_mdflush_free(em);
  73. }
  74. /**
  75. * CLeanup
  76. *
  77. * Clean allocated memory.
  78. *
  79. * @param ptr thread data.
  80. */
  81. static void mdflush_cleanup(void *ptr)
  82. {
  83. ebpf_module_t *em = (ebpf_module_t *)ptr;
  84. if (mdflush_threads.thread)
  85. netdata_thread_cancel(*mdflush_threads.thread);
  86. ebpf_mdflush_free(em);
  87. }
  88. /**
  89. * Compare mdflush values.
  90. *
  91. * @param a `netdata_mdflush_t *`.
  92. * @param b `netdata_mdflush_t *`.
  93. *
  94. * @return 0 if a==b, 1 if a>b, -1 if a<b.
  95. */
  96. static int mdflush_val_cmp(void *a, void *b)
  97. {
  98. netdata_mdflush_t *ptr1 = a;
  99. netdata_mdflush_t *ptr2 = b;
  100. if (ptr1->unit > ptr2->unit) {
  101. return 1;
  102. }
  103. else if (ptr1->unit < ptr2->unit) {
  104. return -1;
  105. }
  106. else {
  107. return 0;
  108. }
  109. }
  110. static void mdflush_read_count_map()
  111. {
  112. int mapfd = mdflush_maps[MDFLUSH_MAP_COUNT].map_fd;
  113. mdflush_ebpf_key_t curr_key = (uint32_t)-1;
  114. mdflush_ebpf_key_t key = (uint32_t)-1;
  115. netdata_mdflush_t search_v;
  116. netdata_mdflush_t *v = NULL;
  117. while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) {
  118. curr_key = key;
  119. // get val for this key.
  120. int test = bpf_map_lookup_elem(mapfd, &key, mdflush_ebpf_vals);
  121. if (unlikely(test < 0)) {
  122. continue;
  123. }
  124. // is this record saved yet?
  125. //
  126. // if not, make a new one, mark it as unsaved for now, and continue; we
  127. // will insert it at the end after all of its values are correctly set,
  128. // so that we can safely publish it to the collector within a single,
  129. // short locked operation.
  130. //
  131. // otherwise simply continue; we will only update the flush count,
  132. // which can be republished safely without a lock.
  133. //
  134. // NOTE: lock isn't strictly necessary for this initial search, as only
  135. // this thread does writing, but the AVL is using a read-write lock so
  136. // there is no congestion.
  137. bool v_is_new = false;
  138. search_v.unit = key;
  139. v = (netdata_mdflush_t *)avl_search_lock(
  140. &mdflush_pub,
  141. (avl_t *)&search_v
  142. );
  143. if (unlikely(v == NULL)) {
  144. // flush count can only be added reliably at a later time.
  145. // when they're added, only then will we AVL insert.
  146. v = callocz(1, sizeof(netdata_mdflush_t));
  147. v->unit = key;
  148. sprintf(v->disk_name, "md%u", key);
  149. v->dim_exists = false;
  150. v_is_new = true;
  151. }
  152. // we must add up count value for this record across all CPUs.
  153. uint64_t total_cnt = 0;
  154. int i;
  155. int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs;
  156. for (i = 0; i < end; i++) {
  157. total_cnt += mdflush_ebpf_vals[i];
  158. }
  159. // can now safely publish count for existing records.
  160. v->cnt = total_cnt;
  161. // can now safely publish new record.
  162. if (v_is_new) {
  163. avl_t *check = avl_insert_lock(&mdflush_pub, (avl_t *)v);
  164. if (check != (avl_t *)v) {
  165. error("Internal error, cannot insert the AVL tree.");
  166. }
  167. }
  168. }
  169. }
  170. /**
  171. * Read eBPF maps for mdflush.
  172. */
  173. static void *mdflush_reader(void *ptr)
  174. {
  175. netdata_thread_cleanup_push(mdflush_cleanup, ptr);
  176. heartbeat_t hb;
  177. heartbeat_init(&hb);
  178. ebpf_module_t *em = (ebpf_module_t *)ptr;
  179. usec_t step = NETDATA_MDFLUSH_SLEEP_MS * em->update_every;
  180. while (!ebpf_exit_plugin) {
  181. (void)heartbeat_next(&hb, step);
  182. mdflush_read_count_map();
  183. }
  184. netdata_thread_cleanup_pop(1);
  185. return NULL;
  186. }
  187. static void mdflush_create_charts(int update_every)
  188. {
  189. ebpf_create_chart(
  190. "mdstat",
  191. "mdstat_flush",
  192. "MD flushes",
  193. "flushes",
  194. "flush (eBPF)",
  195. "md.flush",
  196. NETDATA_EBPF_CHART_TYPE_STACKED,
  197. NETDATA_CHART_PRIO_MDSTAT_FLUSH,
  198. NULL, NULL, 0, update_every,
  199. NETDATA_EBPF_MODULE_NAME_MDFLUSH
  200. );
  201. fflush(stdout);
  202. }
  203. // callback for avl tree traversal on `mdflush_pub`.
  204. static int mdflush_write_dims(void *entry, void *data)
  205. {
  206. UNUSED(data);
  207. netdata_mdflush_t *v = entry;
  208. // records get dynamically added in, so add the dim if we haven't yet.
  209. if (!v->dim_exists) {
  210. ebpf_write_global_dimension(
  211. v->disk_name, v->disk_name,
  212. ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]
  213. );
  214. v->dim_exists = true;
  215. }
  216. write_chart_dimension(v->disk_name, v->cnt);
  217. return 1;
  218. }
  219. /**
  220. * Main loop for this collector.
  221. */
  222. static void mdflush_collector(ebpf_module_t *em)
  223. {
  224. mdflush_ebpf_vals = callocz(ebpf_nprocs, sizeof(mdflush_ebpf_val_t));
  225. avl_init_lock(&mdflush_pub, mdflush_val_cmp);
  226. // create reader thread.
  227. mdflush_threads.thread = mallocz(sizeof(netdata_thread_t));
  228. mdflush_threads.start_routine = mdflush_reader;
  229. netdata_thread_create(
  230. mdflush_threads.thread,
  231. mdflush_threads.name,
  232. NETDATA_THREAD_OPTION_DEFAULT,
  233. mdflush_reader,
  234. em
  235. );
  236. // create chart and static dims.
  237. pthread_mutex_lock(&lock);
  238. mdflush_create_charts(em->update_every);
  239. ebpf_update_stats(&plugin_statistics, em);
  240. pthread_mutex_unlock(&lock);
  241. // loop and read from published data until ebpf plugin is closed.
  242. heartbeat_t hb;
  243. heartbeat_init(&hb);
  244. usec_t step = em->update_every * USEC_PER_SEC;
  245. while (!ebpf_exit_plugin) {
  246. (void)heartbeat_next(&hb, step);
  247. if (ebpf_exit_plugin)
  248. break;
  249. // write dims now for all hitherto discovered devices.
  250. write_begin_chart("mdstat", "mdstat_flush");
  251. avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL);
  252. write_end_chart();
  253. pthread_mutex_unlock(&lock);
  254. }
  255. }
  256. /**
  257. * mdflush thread.
  258. *
  259. * @param ptr a `ebpf_module_t *`.
  260. * @return always NULL.
  261. */
  262. void *ebpf_mdflush_thread(void *ptr)
  263. {
  264. netdata_thread_cleanup_push(mdflush_exit, ptr);
  265. ebpf_module_t *em = (ebpf_module_t *)ptr;
  266. em->maps = mdflush_maps;
  267. char *md_flush_request = ebpf_find_symbol("md_flush_request");
  268. if (!md_flush_request) {
  269. em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED;
  270. error("Cannot monitor MD devices, because md is not loaded.");
  271. }
  272. freez(md_flush_request);
  273. if (em->thread->enabled == NETDATA_THREAD_EBPF_STOPPED) {
  274. goto endmdflush;
  275. }
  276. em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects);
  277. if (!em->probe_links) {
  278. em->enabled = NETDATA_THREAD_EBPF_STOPPED;
  279. goto endmdflush;
  280. }
  281. mdflush_collector(em);
  282. endmdflush:
  283. ebpf_update_disabled_plugin_stats(em);
  284. netdata_thread_cleanup_pop(1);
  285. return NULL;
  286. }