ebpf_oomkill.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "ebpf.h"
  3. #include "ebpf_oomkill.h"
  4. struct config oomkill_config = { .first_section = NULL,
  5. .last_section = NULL,
  6. .mutex = NETDATA_MUTEX_INITIALIZER,
  7. .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
  8. .rwlock = AVL_LOCK_INITIALIZER } };
  9. #define OOMKILL_MAP_KILLCNT 0
  10. static ebpf_local_maps_t oomkill_maps[] = {
  11. {
  12. .name = "tbl_oomkill",
  13. .internal_input = NETDATA_OOMKILL_MAX_ENTRIES,
  14. .user_input = 0,
  15. .type = NETDATA_EBPF_MAP_STATIC,
  16. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED
  17. },
  18. /* end */
  19. {
  20. .name = NULL,
  21. .internal_input = 0,
  22. .user_input = 0,
  23. .type = NETDATA_EBPF_MAP_CONTROLLER,
  24. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED
  25. }
  26. };
  27. static ebpf_tracepoint_t oomkill_tracepoints[] = {
  28. {.enabled = false, .class = "oom", .event = "mark_victim"},
  29. /* end */
  30. {.enabled = false, .class = NULL, .event = NULL}
  31. };
  32. static struct bpf_link **probe_links = NULL;
  33. static struct bpf_object *objects = NULL;
  34. static netdata_publish_syscall_t oomkill_publish_aggregated = {.name = "oomkill", .dimension = "oomkill",
  35. .algorithm = "absolute",
  36. .next = NULL};
  37. /**
  38. * Clean up the main thread.
  39. *
  40. * @param ptr thread data.
  41. */
  42. static void oomkill_cleanup(void *ptr)
  43. {
  44. ebpf_module_t *em = (ebpf_module_t *)ptr;
  45. if (!em->enabled) {
  46. return;
  47. }
  48. if (probe_links) {
  49. struct bpf_program *prog;
  50. size_t i = 0 ;
  51. bpf_object__for_each_program(prog, objects) {
  52. bpf_link__destroy(probe_links[i]);
  53. i++;
  54. }
  55. if (objects)
  56. bpf_object__close(objects);
  57. }
  58. }
  59. static void oomkill_write_data(int32_t *keys, uint32_t total)
  60. {
  61. // for each app, see if it was OOM killed. record as 1 if so otherwise 0.
  62. struct target *w;
  63. for (w = apps_groups_root_target; w != NULL; w = w->next) {
  64. if (likely(w->exposed && w->processes)) {
  65. bool was_oomkilled = false;
  66. struct pid_on_target *pids = w->root_pid;
  67. while (pids) {
  68. uint32_t j;
  69. for (j = 0; j < total; j++) {
  70. if (pids->pid == keys[j]) {
  71. was_oomkilled = true;
  72. // set to 0 so we consider it "done".
  73. keys[j] = 0;
  74. goto write_dim;
  75. }
  76. }
  77. pids = pids->next;
  78. }
  79. write_dim:;
  80. write_chart_dimension(w->name, was_oomkilled);
  81. }
  82. }
  83. // for any remaining keys for which we couldn't find a group, this could be
  84. // for various reasons, but the primary one is that the PID has not yet
  85. // been picked up by the process thread when parsing the proc filesystem.
  86. // since it's been OOM killed, it will never be parsed in the future, so
  87. // we have no choice but to dump it into `other`.
  88. uint32_t j;
  89. uint32_t rem_count = 0;
  90. for (j = 0; j < total; j++) {
  91. int32_t key = keys[j];
  92. if (key != 0) {
  93. rem_count += 1;
  94. }
  95. }
  96. if (rem_count > 0) {
  97. write_chart_dimension("other", rem_count);
  98. }
  99. }
  100. /**
  101. * Create specific OOMkill charts
  102. *
  103. * Create charts for cgroup/application.
  104. *
  105. * @param type the chart type.
  106. * @param update_every value to overwrite the update frequency set by the server.
  107. */
  108. static void ebpf_create_specific_oomkill_charts(char *type, int update_every)
  109. {
  110. ebpf_create_chart(type, NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.",
  111. EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP,
  112. NETDATA_CGROUP_OOMKILLS_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE,
  113. NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5600,
  114. ebpf_create_global_dimension,
  115. &oomkill_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_OOMKILL);
  116. }
  117. /**
  118. * Create Systemd OOMkill Charts
  119. *
  120. * Create charts when systemd is enabled
  121. *
  122. * @param update_every value to overwrite the update frequency set by the server.
  123. **/
  124. static void ebpf_create_systemd_oomkill_charts(int update_every)
  125. {
  126. ebpf_create_charts_on_systemd(NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.",
  127. EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP,
  128. NETDATA_EBPF_CHART_TYPE_LINE, 20191,
  129. ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NULL,
  130. NETDATA_EBPF_MODULE_NAME_OOMKILL, update_every);
  131. }
  132. /**
  133. * Send Systemd charts
  134. *
  135. * Send collected data to Netdata.
  136. *
  137. * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned
  138. * otherwise function returns 1 to avoid chart recreation
  139. */
  140. static int ebpf_send_systemd_oomkill_charts()
  141. {
  142. int ret = 1;
  143. ebpf_cgroup_target_t *ect;
  144. write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_OOMKILL_CHART);
  145. for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
  146. if (unlikely(ect->systemd) && unlikely(ect->updated)) {
  147. write_chart_dimension(ect->name, (long long) ect->oomkill);
  148. ect->oomkill = 0;
  149. } else
  150. ret = 0;
  151. }
  152. write_end_chart();
  153. return ret;
  154. }
  155. /*
  156. * Send Specific OOMkill data
  157. *
  158. * Send data for specific cgroup/apps.
  159. *
  160. * @param type chart type
  161. * @param value value for oomkill
  162. */
  163. static void ebpf_send_specific_oomkill_data(char *type, int value)
  164. {
  165. write_begin_chart(type, NETDATA_OOMKILL_CHART);
  166. write_chart_dimension(oomkill_publish_aggregated.name, (long long)value);
  167. write_end_chart();
  168. }
  169. /**
  170. * Create specific OOMkill charts
  171. *
  172. * Create charts for cgroup/application.
  173. *
  174. * @param type the chart type.
  175. * @param update_every value to overwrite the update frequency set by the server.
  176. */
  177. static void ebpf_obsolete_specific_oomkill_charts(char *type, int update_every)
  178. {
  179. ebpf_write_chart_obsolete(type, NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.",
  180. EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP,
  181. NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_OOMKILLS_CONTEXT,
  182. NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5600, update_every);
  183. }
  184. /**
  185. * Send data to Netdata calling auxiliary functions.
  186. *
  187. * @param update_every value to overwrite the update frequency set by the server.
  188. */
  189. void ebpf_oomkill_send_cgroup_data(int update_every)
  190. {
  191. if (!ebpf_cgroup_pids)
  192. return;
  193. pthread_mutex_lock(&mutex_cgroup_shm);
  194. ebpf_cgroup_target_t *ect;
  195. int has_systemd = shm_ebpf_cgroup.header->systemd_enabled;
  196. if (has_systemd) {
  197. static int systemd_charts = 0;
  198. if (!systemd_charts) {
  199. ebpf_create_systemd_oomkill_charts(update_every);
  200. systemd_charts = 1;
  201. }
  202. systemd_charts = ebpf_send_systemd_oomkill_charts();
  203. }
  204. for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
  205. if (ect->systemd)
  206. continue;
  207. if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART) && ect->updated) {
  208. ebpf_create_specific_oomkill_charts(ect->name, update_every);
  209. ect->flags |= NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART;
  210. }
  211. if (ect->flags & NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART && ect->updated) {
  212. ebpf_send_specific_oomkill_data(ect->name, ect->oomkill);
  213. } else {
  214. ebpf_obsolete_specific_oomkill_charts(ect->name, update_every);
  215. ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART;
  216. }
  217. }
  218. pthread_mutex_unlock(&mutex_cgroup_shm);
  219. }
  220. /**
  221. * Read data
  222. *
  223. * Read OOMKILL events from table.
  224. *
  225. * @param keys vector where data will be stored
  226. *
  227. * @return It returns the number of read elements
  228. */
  229. static uint32_t oomkill_read_data(int32_t *keys)
  230. {
  231. // the first `i` entries of `keys` will contain the currently active PIDs
  232. // in the eBPF map.
  233. uint32_t i = 0;
  234. uint32_t curr_key = 0;
  235. uint32_t key = 0;
  236. int mapfd = oomkill_maps[OOMKILL_MAP_KILLCNT].map_fd;
  237. while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) {
  238. curr_key = key;
  239. keys[i] = (int32_t)key;
  240. i += 1;
  241. // delete this key now that we've recorded its existence. there's no
  242. // race here, as the same PID will only get OOM killed once.
  243. int test = bpf_map_delete_elem(mapfd, &key);
  244. if (unlikely(test < 0)) {
  245. // since there's only 1 thread doing these deletions, it should be
  246. // impossible to get this condition.
  247. error("key unexpectedly not available for deletion.");
  248. }
  249. }
  250. return i;
  251. }
  252. /**
  253. * Update cgroup
  254. *
  255. * Update cgroup data based in
  256. *
  257. * @param keys vector with pids that had oomkill event
  258. * @param total number of elements in keys vector.
  259. */
  260. static void ebpf_update_oomkill_cgroup(int32_t *keys, uint32_t total)
  261. {
  262. ebpf_cgroup_target_t *ect;
  263. for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
  264. ect->oomkill = 0;
  265. struct pid_on_target2 *pids;
  266. for (pids = ect->pids; pids; pids = pids->next) {
  267. uint32_t j;
  268. int32_t pid = pids->pid;
  269. for (j = 0; j < total; j++) {
  270. if (pid == keys[j]) {
  271. ect->oomkill = 1;
  272. break;
  273. }
  274. }
  275. }
  276. }
  277. }
  278. /**
  279. * Main loop for this collector.
  280. */
  281. static void oomkill_collector(ebpf_module_t *em)
  282. {
  283. int cgroups = em->cgroup_charts;
  284. int update_every = em->update_every;
  285. int counter = update_every - 1;
  286. int32_t keys[NETDATA_OOMKILL_MAX_ENTRIES];
  287. memset(keys, 0, sizeof(keys));
  288. // loop and read until ebpf plugin is closed.
  289. while (!close_ebpf_plugin) {
  290. pthread_mutex_lock(&collect_data_mutex);
  291. pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex);
  292. if (++counter == update_every) {
  293. counter = 0;
  294. pthread_mutex_lock(&lock);
  295. uint32_t count = oomkill_read_data(keys);
  296. if (cgroups && count)
  297. ebpf_update_oomkill_cgroup(keys, count);
  298. // write everything from the ebpf map.
  299. if (cgroups)
  300. ebpf_oomkill_send_cgroup_data(update_every);
  301. write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_OOMKILL_CHART);
  302. oomkill_write_data(keys, count);
  303. write_end_chart();
  304. pthread_mutex_unlock(&lock);
  305. }
  306. pthread_mutex_unlock(&collect_data_mutex);
  307. }
  308. }
  309. /**
  310. * Create apps charts
  311. *
  312. * Call ebpf_create_chart to create the charts on apps submenu.
  313. *
  314. * @param em a pointer to the structure with the default values.
  315. */
  316. void ebpf_oomkill_create_apps_charts(struct ebpf_module *em, void *ptr)
  317. {
  318. struct target *root = ptr;
  319. ebpf_create_charts_on_apps(NETDATA_OOMKILL_CHART,
  320. "OOM kills",
  321. EBPF_COMMON_DIMENSION_KILLS,
  322. "mem",
  323. NETDATA_EBPF_CHART_TYPE_STACKED,
  324. 20020,
  325. ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
  326. root, em->update_every, NETDATA_EBPF_MODULE_NAME_OOMKILL);
  327. }
  328. /**
  329. * OOM kill tracking thread.
  330. *
  331. * @param ptr a `ebpf_module_t *`.
  332. * @return always NULL.
  333. */
  334. void *ebpf_oomkill_thread(void *ptr)
  335. {
  336. netdata_thread_cleanup_push(oomkill_cleanup, ptr);
  337. ebpf_module_t *em = (ebpf_module_t *)ptr;
  338. em->maps = oomkill_maps;
  339. if (unlikely(!all_pids || !em->apps_charts)) {
  340. // When we are not running integration with apps, we won't fill necessary variables for this thread to run, so
  341. // we need to disable it.
  342. if (em->enabled)
  343. info("Disabling OOMKILL thread, because apps integration is completely disabled.");
  344. em->enabled = 0;
  345. }
  346. if (!em->enabled) {
  347. goto endoomkill;
  348. }
  349. if (ebpf_enable_tracepoints(oomkill_tracepoints) == 0) {
  350. em->enabled = CONFIG_BOOLEAN_NO;
  351. goto endoomkill;
  352. }
  353. probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects);
  354. if (!probe_links) {
  355. em->enabled = CONFIG_BOOLEAN_NO;
  356. goto endoomkill;
  357. }
  358. pthread_mutex_lock(&lock);
  359. ebpf_update_stats(&plugin_statistics, em);
  360. pthread_mutex_unlock(&lock);
  361. oomkill_collector(em);
  362. endoomkill:
  363. if (!em->enabled)
  364. ebpf_update_disabled_plugin_stats(em);
  365. netdata_thread_cleanup_pop(1);
  366. return NULL;
  367. }