ebpf_mdflush.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "ebpf.h"
  3. #include "ebpf_mdflush.h"
  4. struct config mdflush_config = { .first_section = NULL,
  5. .last_section = NULL,
  6. .mutex = NETDATA_MUTEX_INITIALIZER,
  7. .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
  8. .rwlock = AVL_LOCK_INITIALIZER } };
  9. #define MDFLUSH_MAP_COUNT 0
  10. static ebpf_local_maps_t mdflush_maps[] = {
  11. {
  12. .name = "tbl_mdflush",
  13. .internal_input = 1024,
  14. .user_input = 0,
  15. .type = NETDATA_EBPF_MAP_STATIC,
  16. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED,
  17. #ifdef LIBBPF_MAJOR_VERSION
  18. .map_type = BPF_MAP_TYPE_PERCPU_HASH
  19. #endif
  20. },
  21. /* end */
  22. {
  23. .name = NULL,
  24. .internal_input = 0,
  25. .user_input = 0,
  26. .type = NETDATA_EBPF_MAP_CONTROLLER,
  27. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED
  28. }
  29. };
  30. netdata_ebpf_targets_t mdflush_targets[] = { {.name = "md_flush_request", .mode = EBPF_LOAD_TRAMPOLINE},
  31. {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}};
  32. // store for "published" data from the reader thread, which the collector
  33. // thread will write to netdata agent.
  34. static avl_tree_lock mdflush_pub;
  35. // tmp store for mdflush values we get from a per-CPU eBPF map.
  36. static mdflush_ebpf_val_t *mdflush_ebpf_vals = NULL;
  37. #ifdef LIBBPF_MAJOR_VERSION
  38. /**
  39. * Disable probes
  40. *
  41. * Disable probes to use trampolines.
  42. *
  43. * @param obj the loaded object structure.
  44. */
  45. static inline void ebpf_disable_probes(struct mdflush_bpf *obj)
  46. {
  47. bpf_program__set_autoload(obj->progs.netdata_md_flush_request_kprobe, false);
  48. }
  49. /**
  50. * Disable trampolines
  51. *
  52. * Disable trampoliness to use probes.
  53. *
  54. * @param obj the loaded object structure.
  55. */
  56. static inline void ebpf_disable_trampoline(struct mdflush_bpf *obj)
  57. {
  58. bpf_program__set_autoload(obj->progs.netdata_md_flush_request_fentry, false);
  59. }
  60. /**
  61. * Set Trampoline
  62. *
  63. * Define target to attach trampoline
  64. *
  65. * @param obj the loaded object structure.
  66. */
  67. static void ebpf_set_trampoline_target(struct mdflush_bpf *obj)
  68. {
  69. bpf_program__set_attach_target(obj->progs.netdata_md_flush_request_fentry, 0,
  70. mdflush_targets[NETDATA_MD_FLUSH_REQUEST].name);
  71. }
  72. /**
  73. * Load probe
  74. *
  75. * Load probe to monitor internal function.
  76. *
  77. * @param obj the loaded object structure.
  78. */
  79. static inline int ebpf_load_probes(struct mdflush_bpf *obj)
  80. {
  81. obj->links.netdata_md_flush_request_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_md_flush_request_kprobe,
  82. false,
  83. mdflush_targets[NETDATA_MD_FLUSH_REQUEST].name);
  84. return libbpf_get_error(obj->links.netdata_md_flush_request_kprobe);
  85. }
  86. /**
  87. * Load and Attach
  88. *
  89. * Load and attach bpf codes according user selection.
  90. *
  91. * @param obj the loaded object structure.
  92. * @param em the structure with configuration
  93. */
  94. static inline int ebpf_mdflush_load_and_attach(struct mdflush_bpf *obj, ebpf_module_t *em)
  95. {
  96. int mode = em->targets[NETDATA_MD_FLUSH_REQUEST].mode;
  97. if (mode == EBPF_LOAD_TRAMPOLINE) { // trampoline
  98. ebpf_disable_probes(obj);
  99. ebpf_set_trampoline_target(obj);
  100. } else // kprobe
  101. ebpf_disable_trampoline(obj);
  102. int ret = mdflush_bpf__load(obj);
  103. if (ret) {
  104. fprintf(stderr, "failed to load BPF object: %d\n", ret);
  105. return -1;
  106. }
  107. if (mode == EBPF_LOAD_TRAMPOLINE)
  108. ret = mdflush_bpf__attach(obj);
  109. else
  110. ret = ebpf_load_probes(obj);
  111. return ret;
  112. }
  113. #endif
  114. /**
  115. * Obsolete global
  116. *
  117. * Obsolete global charts created by thread.
  118. *
  119. * @param em a pointer to `struct ebpf_module`
  120. */
  121. static void ebpf_obsolete_mdflush_global(ebpf_module_t *em)
  122. {
  123. ebpf_write_chart_obsolete("mdstat",
  124. "mdstat_flush",
  125. "",
  126. "MD flushes",
  127. "flushes",
  128. "flush (eBPF)",
  129. NETDATA_EBPF_CHART_TYPE_STACKED,
  130. NULL,
  131. NETDATA_CHART_PRIO_MDSTAT_FLUSH,
  132. em->update_every);
  133. }
  134. /**
  135. * MDflush exit
  136. *
  137. * Cancel thread and exit.
  138. *
  139. * @param ptr thread data.
  140. */
  141. static void mdflush_exit(void *ptr)
  142. {
  143. ebpf_module_t *em = (ebpf_module_t *)ptr;
  144. if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) {
  145. pthread_mutex_lock(&lock);
  146. ebpf_obsolete_mdflush_global(em);
  147. pthread_mutex_unlock(&lock);
  148. fflush(stdout);
  149. }
  150. ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE);
  151. if (em->objects) {
  152. ebpf_unload_legacy_code(em->objects, em->probe_links);
  153. em->objects = NULL;
  154. em->probe_links = NULL;
  155. }
  156. pthread_mutex_lock(&ebpf_exit_cleanup);
  157. em->enabled = NETDATA_THREAD_EBPF_STOPPED;
  158. ebpf_update_stats(&plugin_statistics, em);
  159. pthread_mutex_unlock(&ebpf_exit_cleanup);
  160. }
  161. /**
  162. * Compare mdflush values.
  163. *
  164. * @param a `netdata_mdflush_t *`.
  165. * @param b `netdata_mdflush_t *`.
  166. *
  167. * @return 0 if a==b, 1 if a>b, -1 if a<b.
  168. */
  169. static int mdflush_val_cmp(void *a, void *b)
  170. {
  171. netdata_mdflush_t *ptr1 = a;
  172. netdata_mdflush_t *ptr2 = b;
  173. if (ptr1->unit > ptr2->unit) {
  174. return 1;
  175. }
  176. else if (ptr1->unit < ptr2->unit) {
  177. return -1;
  178. }
  179. else {
  180. return 0;
  181. }
  182. }
  183. /**
  184. * Read count map
  185. *
  186. * Read the hash table and store data to allocated vectors.
  187. *
  188. * @param maps_per_core do I need to read all cores?
  189. */
  190. static void mdflush_read_count_map(int maps_per_core)
  191. {
  192. int mapfd = mdflush_maps[MDFLUSH_MAP_COUNT].map_fd;
  193. mdflush_ebpf_key_t curr_key = (uint32_t)-1;
  194. mdflush_ebpf_key_t key = (uint32_t)-1;
  195. netdata_mdflush_t search_v;
  196. netdata_mdflush_t *v = NULL;
  197. while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) {
  198. curr_key = key;
  199. // get val for this key.
  200. int test = bpf_map_lookup_elem(mapfd, &key, mdflush_ebpf_vals);
  201. if (unlikely(test < 0)) {
  202. continue;
  203. }
  204. // is this record saved yet?
  205. //
  206. // if not, make a new one, mark it as unsaved for now, and continue; we
  207. // will insert it at the end after all of its values are correctly set,
  208. // so that we can safely publish it to the collector within a single,
  209. // short locked operation.
  210. //
  211. // otherwise simply continue; we will only update the flush count,
  212. // which can be republished safely without a lock.
  213. //
  214. // NOTE: lock isn't strictly necessary for this initial search, as only
  215. // this thread does writing, but the AVL is using a read-write lock so
  216. // there is no congestion.
  217. bool v_is_new = false;
  218. search_v.unit = key;
  219. v = (netdata_mdflush_t *)avl_search_lock(
  220. &mdflush_pub,
  221. (avl_t *)&search_v
  222. );
  223. if (unlikely(v == NULL)) {
  224. // flush count can only be added reliably at a later time.
  225. // when they're added, only then will we AVL insert.
  226. v = callocz(1, sizeof(netdata_mdflush_t));
  227. v->unit = key;
  228. sprintf(v->disk_name, "md%u", key);
  229. v->dim_exists = false;
  230. v_is_new = true;
  231. }
  232. // we must add up count value for this record across all CPUs.
  233. uint64_t total_cnt = 0;
  234. int i;
  235. int end = (!maps_per_core) ? 1 : ebpf_nprocs;
  236. for (i = 0; i < end; i++) {
  237. total_cnt += mdflush_ebpf_vals[i];
  238. }
  239. // can now safely publish count for existing records.
  240. v->cnt = total_cnt;
  241. // can now safely publish new record.
  242. if (v_is_new) {
  243. avl_t *check = avl_insert_lock(&mdflush_pub, (avl_t *)v);
  244. if (check != (avl_t *)v) {
  245. netdata_log_error("Internal error, cannot insert the AVL tree.");
  246. }
  247. }
  248. }
  249. }
  250. static void mdflush_create_charts(int update_every)
  251. {
  252. ebpf_create_chart(
  253. "mdstat",
  254. "mdstat_flush",
  255. "MD flushes",
  256. "flushes",
  257. "flush (eBPF)",
  258. "md.flush",
  259. NETDATA_EBPF_CHART_TYPE_STACKED,
  260. NETDATA_CHART_PRIO_MDSTAT_FLUSH,
  261. NULL, NULL, 0, update_every,
  262. NETDATA_EBPF_MODULE_NAME_MDFLUSH
  263. );
  264. fflush(stdout);
  265. }
  266. // callback for avl tree traversal on `mdflush_pub`.
  267. static int mdflush_write_dims(void *entry, void *data)
  268. {
  269. UNUSED(data);
  270. netdata_mdflush_t *v = entry;
  271. // records get dynamically added in, so add the dim if we haven't yet.
  272. if (!v->dim_exists) {
  273. ebpf_write_global_dimension(
  274. v->disk_name, v->disk_name,
  275. ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]
  276. );
  277. v->dim_exists = true;
  278. }
  279. write_chart_dimension(v->disk_name, v->cnt);
  280. return 1;
  281. }
  282. /**
  283. * Main loop for this collector.
  284. */
  285. static void mdflush_collector(ebpf_module_t *em)
  286. {
  287. mdflush_ebpf_vals = callocz(ebpf_nprocs, sizeof(mdflush_ebpf_val_t));
  288. int update_every = em->update_every;
  289. avl_init_lock(&mdflush_pub, mdflush_val_cmp);
  290. // create chart and static dims.
  291. pthread_mutex_lock(&lock);
  292. mdflush_create_charts(update_every);
  293. ebpf_update_stats(&plugin_statistics, em);
  294. ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD);
  295. pthread_mutex_unlock(&lock);
  296. // loop and read from published data until ebpf plugin is closed.
  297. heartbeat_t hb;
  298. heartbeat_init(&hb);
  299. int counter = update_every - 1;
  300. int maps_per_core = em->maps_per_core;
  301. uint32_t running_time = 0;
  302. uint32_t lifetime = em->lifetime;
  303. while (!ebpf_plugin_exit && running_time < lifetime) {
  304. (void)heartbeat_next(&hb, USEC_PER_SEC);
  305. if (ebpf_plugin_exit || ++counter != update_every)
  306. continue;
  307. counter = 0;
  308. mdflush_read_count_map(maps_per_core);
  309. pthread_mutex_lock(&lock);
  310. // write dims now for all hitherto discovered devices.
  311. ebpf_write_begin_chart("mdstat", "mdstat_flush", "");
  312. avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL);
  313. ebpf_write_end_chart();
  314. pthread_mutex_unlock(&lock);
  315. pthread_mutex_lock(&ebpf_exit_cleanup);
  316. if (running_time && !em->running_time)
  317. running_time = update_every;
  318. else
  319. running_time += update_every;
  320. em->running_time = running_time;
  321. pthread_mutex_unlock(&ebpf_exit_cleanup);
  322. }
  323. }
  324. /*
  325. * Load BPF
  326. *
  327. * Load BPF files.
  328. *
  329. * @param em the structure with configuration
  330. *
  331. * @return It returns 0 on success and -1 otherwise.
  332. */
  333. static int ebpf_mdflush_load_bpf(ebpf_module_t *em)
  334. {
  335. int ret = 0;
  336. if (em->load & EBPF_LOAD_LEGACY) {
  337. em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects);
  338. if (!em->probe_links) {
  339. ret = -1;
  340. }
  341. }
  342. #ifdef LIBBPF_MAJOR_VERSION
  343. else {
  344. mdflush_bpf_obj = mdflush_bpf__open();
  345. if (!mdflush_bpf_obj)
  346. ret = -1;
  347. else {
  348. ret = ebpf_mdflush_load_and_attach(mdflush_bpf_obj, em);
  349. if (ret && em->targets[NETDATA_MD_FLUSH_REQUEST].mode == EBPF_LOAD_TRAMPOLINE) {
  350. mdflush_bpf__destroy(mdflush_bpf_obj);
  351. mdflush_bpf_obj = mdflush_bpf__open();
  352. if (!mdflush_bpf_obj)
  353. ret = -1;
  354. else {
  355. em->targets[NETDATA_MD_FLUSH_REQUEST].mode = EBPF_LOAD_PROBE;
  356. ret = ebpf_mdflush_load_and_attach(mdflush_bpf_obj, em);
  357. }
  358. }
  359. }
  360. }
  361. #endif
  362. return ret;
  363. }
  364. /**
  365. * mdflush thread.
  366. *
  367. * @param ptr a `ebpf_module_t *`.
  368. * @return always NULL.
  369. */
  370. void *ebpf_mdflush_thread(void *ptr)
  371. {
  372. netdata_thread_cleanup_push(mdflush_exit, ptr);
  373. ebpf_module_t *em = (ebpf_module_t *)ptr;
  374. em->maps = mdflush_maps;
  375. char *md_flush_request = ebpf_find_symbol("md_flush_request");
  376. if (!md_flush_request) {
  377. netdata_log_error("Cannot monitor MD devices, because md is not loaded.");
  378. goto endmdflush;
  379. }
  380. #ifdef LIBBPF_MAJOR_VERSION
  381. ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel);
  382. ebpf_adjust_thread_load(em, default_btf);
  383. #endif
  384. if (ebpf_mdflush_load_bpf(em)) {
  385. netdata_log_error("Cannot load eBPF software.");
  386. goto endmdflush;
  387. }
  388. mdflush_collector(em);
  389. endmdflush:
  390. freez(md_flush_request);
  391. ebpf_update_disabled_plugin_stats(em);
  392. netdata_thread_cleanup_pop(1);
  393. return NULL;
  394. }