Browse Source

eBPF and cgroup (process, file descriptor, VFS, directory cache and OOMkill) (#11611)

thiagoftsm 3 years ago
parent
commit
dbc8fefd54

+ 1 - 0
collectors/ebpf.plugin/ebpf.h

@@ -117,6 +117,7 @@ typedef struct ebpf_tracepoint {
 #define NETDATA_EBPF_MEMORY_GROUP "mem"
 #define NETDATA_EBPF_SYSTEM_GROUP "system"
 #define NETDATA_SYSTEM_SWAP_SUBMENU "swap"
+#define NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU "swap (eBPF)"
 #define NETDATA_SYSTEM_IPC_SHM_SUBMENU "ipc shared memory"
 
 // Log file

+ 1 - 4
collectors/ebpf.plugin/ebpf_apps.h

@@ -12,6 +12,7 @@
 
 #define NETDATA_APPS_FAMILY "apps"
 #define NETDATA_APPS_FILE_GROUP "file_access"
+#define NETDATA_APPS_FILE_CGROUP_GROUP "file_access (eBPF)"
 #define NETDATA_APPS_PROCESS_GROUP "process (eBPF)"
 #define NETDATA_APPS_NET_GROUP "net"
 #define NETDATA_APPS_IPC_SHM_GROUP "ipc shared memory"
@@ -353,18 +354,14 @@ typedef struct ebpf_process_stat {
     uint32_t pid;
 
     //Counter
-    uint32_t open_call;
     uint32_t exit_call;
     uint32_t release_call;
     uint32_t fork_call;
     uint32_t clone_call;
-    uint32_t close_call;
 
     //Counter
-    uint32_t open_err;
     uint32_t fork_err;
     uint32_t clone_err;
-    uint32_t close_err;
 
     uint8_t removeme;
 } ebpf_process_stat_t;

+ 3 - 2
collectors/ebpf.plugin/ebpf_cgroup.c

@@ -330,13 +330,14 @@ void ebpf_parse_cgroup_shm_data()
  * @param charttype chart type
  * @param order  the chart order
  * @param algorithm the algorithm used by dimension
+ * @param context   add context for chart
  * @param module    chart module name, this is the eBPF thread.
  */
 void ebpf_create_charts_on_systemd(char *id, char *title, char *units, char *family, char *charttype, int order,
-                                   char *algorithm, char *module)
+                                   char *algorithm, char *context, char *module)
 {
     ebpf_cgroup_target_t *w;
-    ebpf_write_chart_cmd(NETDATA_SERVICE_FAMILY, id, title, units, family, charttype, NULL, order, module);
+    ebpf_write_chart_cmd(NETDATA_SERVICE_FAMILY, id, title, units, family, charttype, context, order, module);
 
     for (w = ebpf_cgroup_pids; w; w = w->next) {
         if (unlikely(w->systemd) && unlikely(w->updated))

+ 10 - 1
collectors/ebpf.plugin/ebpf_cgroup.h

@@ -16,6 +16,10 @@ struct pid_on_target2 {
     int updated;
 
     netdata_publish_swap_t swap;
+    netdata_fd_stat_t fd;
+    netdata_publish_vfs_t vfs;
+    ebpf_process_stat_t ps;
+    netdata_dcstat_pid_t dc;
 
     struct pid_on_target2 *next;
 };
@@ -40,6 +44,11 @@ typedef struct ebpf_cgroup_target {
     uint32_t updated;
 
     netdata_publish_swap_t publish_systemd_swap;
+    netdata_fd_stat_t publish_systemd_fd;
+    netdata_publish_vfs_t publish_systemd_vfs;
+    ebpf_process_stat_t publish_systemd_ps;
+    netdata_publish_dcstat_t publish_dc;
+    int oomkill;
 
     struct pid_on_target2 *pids;
     struct ebpf_cgroup_target *next;
@@ -50,6 +59,6 @@ extern void ebpf_parse_cgroup_shm_data();
 extern void ebpf_close_cgroup_shm();
 extern void ebpf_clean_cgroup_pids();
 extern void ebpf_create_charts_on_systemd(char *id, char *title, char *units, char *family, char *charttype, int order,
-                                          char *algorithm, char *module);
+                                          char *algorithm, char *context, char *module);
 
 #endif /* NETDATA_EBPF_CGROUP_H */

+ 355 - 0
collectors/ebpf.plugin/ebpf_dcstat.c

@@ -285,6 +285,43 @@ static void read_apps_table()
     }
 }
 
+/**
+ * Update cgroup
+ *
+ * Update cgroup data based in
+ */
+static void ebpf_update_dc_cgroup()
+{
+    netdata_dcstat_pid_t *cv = dcstat_vector;
+    int fd = dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd;
+    size_t length = sizeof(netdata_dcstat_pid_t)*ebpf_nprocs;
+
+    ebpf_cgroup_target_t *ect;
+    pthread_mutex_lock(&mutex_cgroup_shm);
+    for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
+        struct pid_on_target2 *pids;
+        for (pids = ect->pids; pids; pids = pids->next) {
+            int pid = pids->pid;
+            netdata_dcstat_pid_t *out = &pids->dc;
+            if (dcstat_pid[pid]) {
+                netdata_publish_dcstat_t *in = dcstat_pid[pid];
+
+                memcpy(out, &in->curr, sizeof(netdata_dcstat_pid_t));
+            } else {
+                memset(cv, 0, length);
+                if (bpf_map_lookup_elem(fd, &pid, cv)) {
+                    continue;
+                }
+
+                dcstat_apps_accumulator(cv);
+
+                memcpy(out, cv, sizeof(netdata_dcstat_pid_t));
+            }
+        }
+    }
+    pthread_mutex_unlock(&mutex_cgroup_shm);
+}
+
 /**
  * Read global table
  *
@@ -474,6 +511,317 @@ static void dcstat_send_global(netdata_publish_dcstat_t *publish)
                       &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE], 3);
 }
 
+/**
+ * Create specific directory cache charts
+ *
+ * Create charts for cgroup/application.
+ *
+ * @param type the chart type.
+ */
+static void ebpf_create_specific_dc_charts(char *type)
+{
+    ebpf_create_chart(type, NETDATA_DC_HIT_CHART, "Percentage of files listed inside directory cache.",
+                      EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU,
+                      NETDATA_CGROUP_DC_HIT_RATIO_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE,
+                      NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5700,
+                      ebpf_create_global_dimension,
+                      dcstat_counter_publish_aggregated, 1, NETDATA_EBPF_MODULE_NAME_DCSTAT);
+
+    ebpf_create_chart(type, NETDATA_DC_REFERENCE_CHART, "Count file access.",
+                      EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU,
+                      NETDATA_CGROUP_DC_REFERENCE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE,
+                      NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5701,
+                      ebpf_create_global_dimension,
+                      &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE], 1,
+                      NETDATA_EBPF_MODULE_NAME_DCSTAT);
+
+    ebpf_create_chart(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART,
+                      "Access to files that were not present inside directory cache.",
+                      EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU,
+                      NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE,
+                      NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5702,
+                      ebpf_create_global_dimension,
+                      &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_SLOW], 1,
+                      NETDATA_EBPF_MODULE_NAME_DCSTAT);
+
+    ebpf_create_chart(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART,
+                      "Number of requests for files that were not found on filesystem.",
+                      EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU,
+                      NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE,
+                      NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5703,
+                      ebpf_create_global_dimension,
+                      &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_MISS], 1,
+                      NETDATA_EBPF_MODULE_NAME_DCSTAT);
+}
+
+/**
+ * Obsolete specific directory cache charts
+ *
+ * Obsolete charts for cgroup/application.
+ *
+ * @param type the chart type.
+ */
+static void ebpf_obsolete_specific_dc_charts(char *type)
+{
+    ebpf_write_chart_obsolete(type, NETDATA_DC_HIT_CHART,
+                              "Percentage of files listed inside directory cache.",
+                              EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU,
+                              NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_HIT_RATIO_CONTEXT,
+                              NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5700);
+
+    ebpf_write_chart_obsolete(type, NETDATA_DC_REFERENCE_CHART,
+                              "Count file access.",
+                              EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU,
+                              NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_REFERENCE_CONTEXT,
+                              NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5701);
+
+    ebpf_write_chart_obsolete(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART,
+                              "Access to files that were not present inside directory cache.",
+                              EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU,
+                              NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT,
+                              NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5702);
+
+    ebpf_write_chart_obsolete(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART,
+                              "Number of requests for files that were not found on filesystem.",
+                              EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU,
+                              NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT,
+                              NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5703);
+}
+
+/**
+ * Cachestat sum PIDs
+ *
+ * Sum values for all PIDs associated to a group
+ *
+ * @param publish  output structure.
+ * @param root     structure with listed IPs
+ */
+void ebpf_dc_sum_cgroup_pids(netdata_publish_dcstat_t *publish, struct pid_on_target2 *root)
+{
+    memset(&publish->curr, 0, sizeof(netdata_dcstat_pid_t));
+    netdata_dcstat_pid_t *dst = &publish->curr;
+    while (root) {
+        int32_t pid = root->pid;
+        netdata_publish_dcstat_t *w = dcstat_pid[pid];
+        if (w) {
+            netdata_dcstat_pid_t *src = &w->curr;
+            dst->cache_access += src->cache_access;
+            dst->file_system += src->file_system;
+            dst->not_found += src->not_found;
+        }
+
+        root = root->next;
+    }
+}
+
+/**
+ * Calc chart values
+ *
+ * Do necessary math to plot charts.
+ */
+void ebpf_dc_calc_chart_values()
+{
+    ebpf_cgroup_target_t *ect;
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        ebpf_dc_sum_cgroup_pids(&ect->publish_dc, ect->pids);
+        uint64_t cache = ect->publish_dc.curr.cache_access;
+        uint64_t not_found = ect->publish_dc.curr.not_found;
+
+        dcstat_update_publish(&ect->publish_dc, cache, not_found);
+
+        ect->publish_dc.cache_access = (long long)ect->publish_dc.curr.cache_access -
+            (long long)ect->publish_dc.prev.cache_access;
+        ect->publish_dc.prev.cache_access = ect->publish_dc.curr.cache_access;
+
+        if (ect->publish_dc.curr.not_found < ect->publish_dc.prev.not_found) {
+            ect->publish_dc.prev.not_found = 0;
+        }
+    }
+}
+
+/**
+ *  Create Systemd directory cache Charts
+ *
+ *  Create charts when systemd is enabled
+ **/
+static void ebpf_create_systemd_dc_charts()
+{
+    ebpf_create_charts_on_systemd(NETDATA_DC_HIT_CHART,
+                                  "Percentage of files listed inside directory cache.",
+                                  EBPF_COMMON_DIMENSION_PERCENTAGE,
+                                  NETDATA_DIRECTORY_CACHE_SUBMENU,
+                                  NETDATA_EBPF_CHART_TYPE_LINE,
+                                  21200,
+                                  ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
+                                  NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT);
+
+    ebpf_create_charts_on_systemd(NETDATA_DC_REFERENCE_CHART,
+                                  "Count file access.",
+                                  EBPF_COMMON_DIMENSION_FILES,
+                                  NETDATA_DIRECTORY_CACHE_SUBMENU,
+                                  NETDATA_EBPF_CHART_TYPE_LINE,
+                                  21201,
+                                  ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
+                                  NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT);
+
+    ebpf_create_charts_on_systemd(NETDATA_DC_REQUEST_NOT_CACHE_CHART,
+                                  "Access to files that were not present inside directory cache.",
+                                  EBPF_COMMON_DIMENSION_FILES,
+                                  NETDATA_DIRECTORY_CACHE_SUBMENU,
+                                  NETDATA_EBPF_CHART_TYPE_LINE,
+                                  21202,
+                                  ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
+                                  NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT);
+
+    ebpf_create_charts_on_systemd(NETDATA_DC_REQUEST_NOT_FOUND_CHART,
+                                  "Number of requests for files that were not found on filesystem.",
+                                  EBPF_COMMON_DIMENSION_FILES,
+                                  NETDATA_DIRECTORY_CACHE_SUBMENU,
+                                  NETDATA_EBPF_CHART_TYPE_LINE,
+                                  21202,
+                                  ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
+                                  NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT);
+}
+
+/**
+ * Send Directory Cache charts
+ *
+ * Send collected data to Netdata.
+ *
+ * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned
+ *         otherwise function returns 1 to avoid chart recreation
+ */
+static int ebpf_send_systemd_dc_charts()
+{
+    int ret = 1;
+    collected_number value;
+    ebpf_cgroup_target_t *ect;
+    write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_HIT_CHART);
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+            write_chart_dimension(ect->name, (long long) ect->publish_dc.ratio);
+        } else
+            ret = 0;
+    }
+    write_end_chart();
+
+    write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REFERENCE_CHART);
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+            write_chart_dimension(ect->name, (long long) ect->publish_dc.cache_access);
+        }
+    }
+    write_end_chart();
+
+    write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART);
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+            value = (collected_number) (!ect->publish_dc.cache_access) ? 0 :
+                (long long )ect->publish_dc.curr.file_system - (long long)ect->publish_dc.prev.file_system;
+            ect->publish_dc.prev.file_system = ect->publish_dc.curr.file_system;
+
+            write_chart_dimension(ect->name, (long long) value);
+        }
+    }
+    write_end_chart();
+
+    write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART);
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+            value = (collected_number) (!ect->publish_dc.cache_access) ? 0 :
+                (long long)ect->publish_dc.curr.not_found - (long long)ect->publish_dc.prev.not_found;
+
+            ect->publish_dc.prev.not_found = ect->publish_dc.curr.not_found;
+
+            write_chart_dimension(ect->name, (long long) value);
+        }
+    }
+    write_end_chart();
+
+    return ret;
+}
+
+/**
+ * Send Directory Cache charts
+ *
+ * Send collected data to Netdata.
+ *
+ */
+static void ebpf_send_specific_dc_data(char *type, netdata_publish_dcstat_t *pdc)
+{
+    collected_number value;
+    write_begin_chart(type, NETDATA_DC_HIT_CHART);
+    write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_RATIO].name,
+                          (long long) pdc->ratio);
+    write_end_chart();
+
+    write_begin_chart(type, NETDATA_DC_REFERENCE_CHART);
+    write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE].name,
+                          (long long) pdc->cache_access);
+    write_end_chart();
+
+    value = (collected_number) (!pdc->cache_access) ? 0 :
+        (long long )pdc->curr.file_system - (long long)pdc->prev.file_system;
+    pdc->prev.file_system = pdc->curr.file_system;
+
+    write_begin_chart(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART);
+    write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_SLOW].name, (long long) value);
+    write_end_chart();
+
+    value = (collected_number) (!pdc->cache_access) ? 0 :
+        (long long)pdc->curr.not_found - (long long)pdc->prev.not_found;
+    pdc->prev.not_found = pdc->curr.not_found;
+
+    write_begin_chart(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART);
+    write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_MISS].name, (long long) value);
+    write_end_chart();
+}
+
+/**
+ * Send data to Netdata calling auxiliar functions.
+*/
+void ebpf_dc_send_cgroup_data()
+{
+    if (!ebpf_cgroup_pids)
+        return;
+
+    pthread_mutex_lock(&mutex_cgroup_shm);
+    ebpf_cgroup_target_t *ect;
+    ebpf_dc_calc_chart_values();
+
+    int has_systemd = shm_ebpf_cgroup.header->systemd_enabled;
+    if (has_systemd) {
+        static int systemd_charts = 0;
+        if (!systemd_charts) {
+            ebpf_create_systemd_dc_charts();
+            systemd_charts = 1;
+        }
+
+        systemd_charts = ebpf_send_systemd_dc_charts();
+    }
+
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        if (ect->systemd)
+            continue;
+
+        if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_DC_CHART) && ect->updated) {
+            ebpf_create_specific_dc_charts(ect->name);
+            ect->flags |= NETDATA_EBPF_CGROUP_HAS_DC_CHART;
+        }
+
+        if (ect->flags & NETDATA_EBPF_CGROUP_HAS_DC_CHART) {
+            if (ect->updated) {
+                ebpf_send_specific_dc_data(ect->name, &ect->publish_dc);
+            } else {
+                ebpf_obsolete_specific_dc_charts(ect->name);
+                ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_DC_CHART;
+            }
+        }
+    }
+
+    pthread_mutex_unlock(&mutex_cgroup_shm);
+}
+
 /**
 * Main loop for this collector.
 */
@@ -488,6 +836,7 @@ static void dcstat_collector(ebpf_module_t *em)
     netdata_publish_dcstat_t publish;
     memset(&publish, 0, sizeof(publish));
     int apps = em->apps_charts;
+    int cgroups = em->cgroup_charts;
     while (!close_ebpf_plugin) {
         pthread_mutex_lock(&collect_data_mutex);
         pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex);
@@ -495,6 +844,9 @@ static void dcstat_collector(ebpf_module_t *em)
         if (apps)
             read_apps_table();
 
+        if (cgroups)
+            ebpf_update_dc_cgroup();
+
         pthread_mutex_lock(&lock);
 
         dcstat_send_global(&publish);
@@ -502,6 +854,9 @@ static void dcstat_collector(ebpf_module_t *em)
         if (apps)
             ebpf_dcache_send_apps_data(apps_groups_root_target);
 
+        if (cgroups)
+            ebpf_dc_send_cgroup_data();
+
         pthread_mutex_unlock(&lock);
         pthread_mutex_unlock(&collect_data_mutex);
     }

+ 11 - 0
collectors/ebpf.plugin/ebpf_dcstat.h

@@ -17,6 +17,17 @@
 // configuration file
 #define NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE "dcstat.conf"
 
+// Contexts
+#define NETDATA_CGROUP_DC_HIT_RATIO_CONTEXT "cgroup.dc_ratio"
+#define NETDATA_CGROUP_DC_REFERENCE_CONTEXT "cgroup.dc_reference"
+#define NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT "cgroup.dc_not_cache"
+#define NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT "cgroup.dc_not_found"
+
+#define NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT "services.dc_ratio"
+#define NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT "services.dc_reference"
+#define NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT "services.dc_not_cache"
+#define NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT "services.dc_not_found"
+
 #define NETDATA_LATENCY_DCSTAT_SLEEP_MS 700000ULL
 
 enum directory_cache_indexes {

+ 328 - 0
collectors/ebpf.plugin/ebpf_fd.c

@@ -249,6 +249,41 @@ static void read_apps_table()
     }
 }
 
+/**
+ * Update cgroup
+ *
+ * Update cgroup data based in
+ */
+static void ebpf_update_fd_cgroup()
+{
+    ebpf_cgroup_target_t *ect ;
+    netdata_fd_stat_t *fv = fd_vector;
+    int fd = fd_maps[NETDATA_FD_PID_STATS].map_fd;
+    size_t length = sizeof(netdata_fd_stat_t) * ebpf_nprocs;
+
+    pthread_mutex_lock(&mutex_cgroup_shm);
+    for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
+        struct pid_on_target2 *pids;
+        for (pids = ect->pids; pids; pids = pids->next) {
+            int pid = pids->pid;
+            netdata_fd_stat_t *out = &pids->fd;
+            if (fd_pid[pid]) {
+                netdata_fd_stat_t *in = fd_pid[pid];
+
+                memcpy(out, in, sizeof(netdata_fd_stat_t));
+            } else {
+                memset(fv, 0, length);
+                if (!bpf_map_lookup_elem(fd, &pid, fv)) {
+                    fd_apps_accumulator(fv);
+
+                    memcpy(out, fv, sizeof(netdata_publish_swap_t));
+                }
+            }
+        }
+    }
+    pthread_mutex_unlock(&mutex_cgroup_shm);
+}
+
 /**
  * Sum PIDs
  *
@@ -336,6 +371,292 @@ void ebpf_fd_send_apps_data(ebpf_module_t *em, struct target *root)
     }
 }
 
+/**
+ * Sum PIDs
+ *
+ * Sum values for all targets.
+ *
+ * @param fd  structure used to store data
+ * @param pids input data
+ */
+static void ebpf_fd_sum_cgroup_pids(netdata_fd_stat_t *fd, struct pid_on_target2 *pids)
+{
+    netdata_fd_stat_t accumulator;
+    memset(&accumulator, 0, sizeof(accumulator));
+
+    while (pids) {
+        netdata_fd_stat_t *w = &pids->fd;
+
+        accumulator.open_err += w->open_err;
+        accumulator.open_call += w->open_call;
+        accumulator.close_call += w->close_call;
+        accumulator.close_err += w->close_err;
+
+        pids = pids->next;
+    }
+
+    fd->open_call = (accumulator.open_call >= fd->open_call) ? accumulator.open_call : fd->open_call;
+    fd->open_err = (accumulator.open_err >= fd->open_err) ? accumulator.open_err : fd->open_err;
+    fd->close_call = (accumulator.close_call >= fd->close_call) ? accumulator.close_call : fd->close_call;
+    fd->close_err = (accumulator.close_err >= fd->close_err) ? accumulator.close_err : fd->close_err;
+}
+
+/**
+ * Create specific file descriptor charts
+ *
+ * Create charts for cgroup/application.
+ *
+ * @param type the chart type.
+ * @param em   the main thread structure.
+ */
+static void ebpf_create_specific_fd_charts(char *type, ebpf_module_t *em)
+{
+    ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN, "Number of open files",
+                      EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP,
+                      NETDATA_CGROUP_FD_OPEN_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE,
+                      NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5400,
+                      ebpf_create_global_dimension,
+                      &fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN],
+                      1, NETDATA_EBPF_MODULE_NAME_SWAP);
+
+    if (em->mode < MODE_ENTRY) {
+        ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "Fails to open files",
+                          EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP,
+                          NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE,
+                          NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5401,
+                          ebpf_create_global_dimension,
+                          &fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN],
+                          1,
+                          NETDATA_EBPF_MODULE_NAME_SWAP);
+    }
+
+    ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, "Files closed",
+                      EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP,
+                      NETDATA_CGROUP_FD_CLOSE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE,
+                      NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5402,
+                      ebpf_create_global_dimension,
+                      &fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE],
+                      1, NETDATA_EBPF_MODULE_NAME_SWAP);
+
+    if (em->mode < MODE_ENTRY) {
+        ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "Fails to close files",
+                          EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP,
+                          NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE,
+                          NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5403,
+                          ebpf_create_global_dimension,
+                          &fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE],
+                          1,
+                          NETDATA_EBPF_MODULE_NAME_SWAP);
+    }
+}
+
+/**
+ * Obsolete specific file descriptor charts
+ *
+ * Obsolete charts for cgroup/application.
+ *
+ * @param type the chart type.
+ * @param em   the main thread structure.
+ */
+static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em)
+{
+    ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN, "Number of open files",
+                              EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP,
+                              NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_OPEN_CONTEXT,
+                              NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5400);
+
+    if (em->mode < MODE_ENTRY) {
+        ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "Fails to open files",
+                                  EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP,
+                                  NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT,
+                                  NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5401);
+    }
+
+    ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, "Files closed",
+                              EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP,
+                              NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_CLOSE_CONTEXT,
+                              NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5402);
+
+    if (em->mode < MODE_ENTRY) {
+        ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "Fails to close files",
+                                  EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP,
+                                  NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT,
+                                  NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5403);
+    }
+}
+
+/*
+ * Send specific file descriptor data
+ *
+ * Send data for specific cgroup/apps.
+ *
+ * @param type   chart type
+ * @param values structure with values that will be sent to netdata
+ */
+static void ebpf_send_specific_fd_data(char *type, netdata_fd_stat_t *values, ebpf_module_t *em)
+{
+    write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN);
+    write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_call);
+    write_end_chart();
+
+    if (em->mode < MODE_ENTRY) {
+        write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR);
+        write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_err);
+        write_end_chart();
+    }
+
+    write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSED);
+    write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].name, (long long)values->close_call);
+    write_end_chart();
+
+    if (em->mode < MODE_ENTRY) {
+        write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR);
+        write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].name, (long long)values->close_err);
+        write_end_chart();
+    }
+}
+
+/**
+ *  Create systemd file descriptor charts
+ *
+ *  Create charts when systemd is enabled
+ *
+ *  @param em the main collector structure
+ **/
+static void ebpf_create_systemd_fd_charts(ebpf_module_t *em)
+{
+    ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_OPEN, "Number of open files",
+                                  EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP,
+                                  NETDATA_EBPF_CHART_TYPE_STACKED, 20061,
+                                  ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_OPEN_CONTEXT,
+                                  NETDATA_EBPF_MODULE_NAME_PROCESS);
+
+    if (em->mode < MODE_ENTRY) {
+        ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "Fails to open files",
+                                      EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP,
+                                      NETDATA_EBPF_CHART_TYPE_STACKED, 20062,
+                                      ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_OPEN_ERR_CONTEXT,
+                                      NETDATA_EBPF_MODULE_NAME_PROCESS);
+    }
+
+    ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_CLOSED, "Files closed",
+                                  EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP,
+                                  NETDATA_EBPF_CHART_TYPE_STACKED, 20063,
+                                  ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_CLOSE_CONTEXT,
+                                  NETDATA_EBPF_MODULE_NAME_PROCESS);
+
+    if (em->mode < MODE_ENTRY) {
+        ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "Fails to close files",
+                                      EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP,
+                                      NETDATA_EBPF_CHART_TYPE_STACKED, 20064,
+                                      ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_CLOSE_ERR_CONTEXT,
+                                      NETDATA_EBPF_MODULE_NAME_PROCESS);
+    }
+}
+
+/**
+ * Send Systemd charts
+ *
+ * Send collected data to Netdata.
+ *
+ *  @param em the main collector structure
+ *
+ *  @return It returns the status for chart creation, if it is necessary to remove a specific dimension zero is returned
+ *         otherwise function returns 1 to avoid chart recreation
+ */
+static int ebpf_send_systemd_fd_charts(ebpf_module_t *em)
+{
+    int ret = 1;
+    ebpf_cgroup_target_t *ect;
+    write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN);
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+            write_chart_dimension(ect->name, ect->publish_systemd_fd.open_call);
+        } else
+            ret = 0;
+    }
+    write_end_chart();
+
+    if (em->mode < MODE_ENTRY) {
+        write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR);
+        for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+            if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+                write_chart_dimension(ect->name, ect->publish_systemd_fd.open_err);
+            }
+        }
+        write_end_chart();
+    }
+
+    write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED);
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+            write_chart_dimension(ect->name, ect->publish_systemd_fd.close_call);
+        }
+    }
+    write_end_chart();
+
+    if (em->mode < MODE_ENTRY) {
+        write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR);
+        for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+            if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+                write_chart_dimension(ect->name, ect->publish_systemd_fd.close_err);
+            }
+        }
+        write_end_chart();
+    }
+
+    return ret;
+}
+
+/**
+ * Send data to Netdata calling auxiliar functions.
+ *
+ * @param em the main collector structure
+*/
+static void ebpf_fd_send_cgroup_data(ebpf_module_t *em)
+{
+    if (!ebpf_cgroup_pids)
+        return;
+
+    pthread_mutex_lock(&mutex_cgroup_shm);
+    ebpf_cgroup_target_t *ect;
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        ebpf_fd_sum_cgroup_pids(&ect->publish_systemd_fd, ect->pids);
+    }
+
+    int has_systemd = shm_ebpf_cgroup.header->systemd_enabled;
+    if (has_systemd) {
+        static int systemd_charts = 0;
+        if (!systemd_charts) {
+            ebpf_create_systemd_fd_charts(em);
+            systemd_charts = 1;
+        }
+
+        systemd_charts = ebpf_send_systemd_fd_charts(em);
+    }
+
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        if (ect->systemd)
+            continue;
+
+        if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_FD_CHART) && ect->updated) {
+            ebpf_create_specific_fd_charts(ect->name, em);
+            ect->flags |= NETDATA_EBPF_CGROUP_HAS_FD_CHART;
+        }
+
+        if (ect->flags & NETDATA_EBPF_CGROUP_HAS_FD_CHART ) {
+            if (ect->updated) {
+                ebpf_send_specific_fd_data(ect->name, &ect->publish_systemd_fd, em);
+            } else {
+                ebpf_obsolete_specific_fd_charts(ect->name, em);
+                ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_FD_CHART;
+            }
+        }
+    }
+
+    pthread_mutex_unlock(&mutex_cgroup_shm);
+}
+
 /**
 * Main loop for this collector.
 */
@@ -348,6 +669,7 @@ static void fd_collector(ebpf_module_t *em)
                           ebpf_fd_read_hash, em);
 
     int apps = em->apps_charts;
+    int cgroups = em->cgroup_charts;
     while (!close_ebpf_plugin) {
         pthread_mutex_lock(&collect_data_mutex);
         pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex);
@@ -355,6 +677,9 @@ static void fd_collector(ebpf_module_t *em)
         if (apps)
             read_apps_table();
 
+        if (cgroups)
+            ebpf_update_fd_cgroup();
+
         pthread_mutex_lock(&lock);
 
         ebpf_fd_send_data(em);
@@ -362,6 +687,9 @@ static void fd_collector(ebpf_module_t *em)
         if (apps)
             ebpf_fd_send_apps_data(em, apps_groups_root_target);
 
+        if (cgroups)
+            ebpf_fd_send_cgroup_data(em);
+
         pthread_mutex_unlock(&lock);
         pthread_mutex_unlock(&collect_data_mutex);
     }

+ 11 - 0
collectors/ebpf.plugin/ebpf_fd.h

@@ -24,6 +24,17 @@
 // Process configuration name
 #define NETDATA_FD_CONFIG_FILE "fd.conf"
 
+// Contexts
+#define NETDATA_CGROUP_FD_OPEN_CONTEXT "cgroup.fd_open"
+#define NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT "cgroup.fd_open_error"
+#define NETDATA_CGROUP_FD_CLOSE_CONTEXT "cgroup.fd_close"
+#define NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT "cgroup.fd_close_error"
+
+#define NETDATA_SYSTEMD_FD_OPEN_CONTEXT "services.fd_open"
+#define NETDATA_SYSTEMD_FD_OPEN_ERR_CONTEXT "services.fd_open_error"
+#define NETDATA_SYSTEMD_FD_CLOSE_CONTEXT "services.fd_close"
+#define NETDATA_SYSTEMD_FD_CLOSE_ERR_CONTEXT "services.fd_close_error"
+
 typedef struct netdata_fd_stat {
     uint64_t pid_tgid;                     // Unique identifier
     uint32_t pid;                          // Process ID

+ 213 - 31
collectors/ebpf.plugin/ebpf_oomkill.c

@@ -37,6 +37,10 @@ static ebpf_tracepoint_t oomkill_tracepoints[] = {
 static struct bpf_link **probe_links = NULL;
 static struct bpf_object *objects = NULL;
 
+static netdata_publish_syscall_t oomkill_publish_aggregated = {.name = "oomkill", .dimension = "oomkill",
+                                                               .algorithm = "absolute",
+                                                               .next = NULL};
+
 /**
  * Clean up the main thread.
  *
@@ -60,32 +64,8 @@ static void oomkill_cleanup(void *ptr)
     }
 }
 
-static void oomkill_write_data()
+static void oomkill_write_data(int32_t *keys, uint32_t total)
 {
-    // the first `i` entries of `keys` will contain the currently active PIDs
-    // in the eBPF map.
-    uint32_t i = 0;
-    int32_t keys[NETDATA_OOMKILL_MAX_ENTRIES] = {0};
-
-    uint32_t curr_key = 0;
-    uint32_t key = 0;
-    int mapfd = oomkill_maps[OOMKILL_MAP_KILLCNT].map_fd;
-    while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) {
-        curr_key = key;
-
-        keys[i] = key;
-        i += 1;
-
-        // delete this key now that we've recorded its existence. there's no
-        // race here, as the same PID will only get OOM killed once.
-        int test = bpf_map_delete_elem(mapfd, &key);
-        if (unlikely(test < 0)) {
-            // since there's only 1 thread doing these deletions, it should be
-            // impossible to get this condition.
-            error("key unexpectedly not available for deletion.");
-        }
-    }
-
     // for each app, see if it was OOM killed. record as 1 if so otherwise 0.
     struct target *w;
     for (w = apps_groups_root_target; w != NULL; w = w->next) {
@@ -94,7 +74,7 @@ static void oomkill_write_data()
             struct pid_on_target *pids = w->root_pid;
             while (pids) {
                 uint32_t j;
-                for (j = 0; j < i; j++) {
+                for (j = 0; j < total; j++) {
                     if (pids->pid == keys[j]) {
                         was_oomkilled = true;
                         // set to 0 so we consider it "done".
@@ -117,7 +97,7 @@ static void oomkill_write_data()
     // we have no choice but to dump it into `other`.
     uint32_t j;
     uint32_t rem_count = 0;
-    for (j = 0; j < i; j++) {
+    for (j = 0; j < total; j++) {
         int32_t key = keys[j];
         if (key != 0) {
             rem_count += 1;
@@ -128,12 +108,207 @@ static void oomkill_write_data()
     }
 }
 
+/**
+ * Create specific OOMkill charts
+ *
+ * Create charts for cgroup/application.
+ *
+ * @param type the chart type.
+ */
+static void ebpf_create_specific_oomkill_charts(char *type)
+{
+    ebpf_create_chart(type, NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.",
+                      EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP,
+                      NULL, NETDATA_EBPF_CHART_TYPE_LINE,
+                      NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5600,
+                      ebpf_create_global_dimension,
+                      &oomkill_publish_aggregated, 1, NETDATA_EBPF_MODULE_NAME_OOMKILL);
+}
+
+/**
+ *  Create Systemd OOMkill Charts
+ *
+ *  Create charts when systemd is enabled
+ **/
+static void ebpf_create_systemd_oomkill_charts()
+{
+    ebpf_create_charts_on_systemd(NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.",
+                                  EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP,
+                                  NETDATA_EBPF_CHART_TYPE_LINE, 20191,
+                                  ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NULL,
+                                  NETDATA_EBPF_MODULE_NAME_OOMKILL);
+}
+
+/**
+ * Send Systemd charts
+ *
+ * Send collected data to Netdata.
+ *
+ * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned
+ *         otherwise function returns 1 to avoid chart recreation
+ */
+static int ebpf_send_systemd_oomkill_charts()
+{
+    int ret = 1;
+    ebpf_cgroup_target_t *ect;
+    write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_OOMKILL_CHART);
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+            write_chart_dimension(ect->name, (long long) ect->oomkill);
+            ect->oomkill = 0;
+        } else
+            ret = 0;
+    }
+    write_end_chart();
+
+    return ret;
+}
+
+/*
+ * Send Specific OOMkill data
+ *
+ * Send data for specific cgroup/apps.
+ *
+ * @param type   chart type
+ * @param value  value for oomkill
+ */
+static void ebpf_send_specific_oomkill_data(char *type, int value)
+{
+    write_begin_chart(type, NETDATA_OOMKILL_CHART);
+    write_chart_dimension(oomkill_publish_aggregated.name, (long long)value);
+    write_end_chart();
+}
+
+/**
+ * Create specific OOMkill charts
+ *
+ * Create charts for cgroup/application.
+ *
+ * @param type the chart type.
+ */
+static void ebpf_obsolete_specific_oomkill_charts(char *type)
+{
+    ebpf_write_chart_obsolete(type, NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.",
+                              EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP,
+                              NETDATA_EBPF_CHART_TYPE_LINE, NULL,
+                              NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5600);
+}
+
+/**
+ * Send data to Netdata calling auxiliar functions.
+ *
+ * @param root the target list.
+*/
+void ebpf_oomkill_send_cgroup_data()
+{
+    if (!ebpf_cgroup_pids)
+        return;
+
+    pthread_mutex_lock(&mutex_cgroup_shm);
+    ebpf_cgroup_target_t *ect;
+
+    int has_systemd = shm_ebpf_cgroup.header->systemd_enabled;
+    if (has_systemd) {
+        static int systemd_charts = 0;
+        if (!systemd_charts) {
+            ebpf_create_systemd_oomkill_charts();
+            systemd_charts = 1;
+        }
+        systemd_charts = ebpf_send_systemd_oomkill_charts();
+    }
+
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        if (ect->systemd)
+            continue;
+
+        if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART) && ect->updated) {
+            ebpf_create_specific_oomkill_charts(ect->name);
+            ect->flags |= NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART;
+        }
+
+        if (ect->flags & NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART && ect->updated) {
+            ebpf_send_specific_oomkill_data(ect->name, ect->oomkill);
+        } else {
+            ebpf_obsolete_specific_oomkill_charts(ect->name);
+            ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART;
+        }
+    }
+
+    pthread_mutex_unlock(&mutex_cgroup_shm);
+}
+
+/**
+ * Read data
+ *
+ * Read OOMKILL events from table.
+ *
+ * @param keys vector where data will be stored
+ *
+ * @return It returns the number of read elements
+ */
+static uint32_t oomkill_read_data(int32_t *keys)
+{
+    // the first `i` entries of `keys` will contain the currently active PIDs
+    // in the eBPF map.
+    uint32_t i = 0;
+
+    uint32_t curr_key = 0;
+    uint32_t key = 0;
+    int mapfd = oomkill_maps[OOMKILL_MAP_KILLCNT].map_fd;
+    while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) {
+        curr_key = key;
+
+        keys[i] = (int32_t)key;
+        i += 1;
+
+        // delete this key now that we've recorded its existence. there's no
+        // race here, as the same PID will only get OOM killed once.
+        int test = bpf_map_delete_elem(mapfd, &key);
+        if (unlikely(test < 0)) {
+            // since there's only 1 thread doing these deletions, it should be
+            // impossible to get this condition.
+            error("key unexpectedly not available for deletion.");
+        }
+    }
+
+    return i;
+}
+
+/**
+ * Update cgroup
+ *
+ * Update cgroup data based in
+ *
+ * @param keys  vector with pids that had oomkill event
+ * @param total number of elements in keys vector.
+ */
+static void ebpf_update_oomkill_cgroup(int32_t *keys, uint32_t total)
+{
+    ebpf_cgroup_target_t *ect;
+    for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
+        ect->oomkill = 0;
+        struct pid_on_target2 *pids;
+        for (pids = ect->pids; pids; pids = pids->next) {
+            uint32_t j;
+            int32_t pid = pids->pid;
+            for (j = 0; j < total; j++) {
+                if (pid == keys[j]) {
+                    ect->oomkill = 1;
+                    break;
+                }
+            }
+        }
+    }
+}
+
 /**
 * Main loop for this collector.
 */
 static void oomkill_collector(ebpf_module_t *em)
 {
-    UNUSED(em);
+    int cgroups = em->cgroup_charts;
+    int32_t keys[NETDATA_OOMKILL_MAX_ENTRIES];
+    memset(keys, 0, sizeof(keys));
 
     // loop and read until ebpf plugin is closed.
     while (!close_ebpf_plugin) {
@@ -141,9 +316,16 @@ static void oomkill_collector(ebpf_module_t *em)
         pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex);
         pthread_mutex_lock(&lock);
 
+        uint32_t count = oomkill_read_data(keys);
+        if (cgroups && count)
+            ebpf_update_oomkill_cgroup(keys, count);
+
         // write everything from the ebpf map.
-        write_begin_chart(NETDATA_APPS_FAMILY, "oomkills");
-        oomkill_write_data();
+        if (cgroups)
+            ebpf_oomkill_send_cgroup_data();
+
+        write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_OOMKILL_CHART);
+        oomkill_write_data(keys, count);
         write_end_chart();
 
         pthread_mutex_unlock(&lock);
@@ -163,7 +345,7 @@ void ebpf_oomkill_create_apps_charts(struct ebpf_module *em, void *ptr)
     UNUSED(em);
 
     struct target *root = ptr;
-    ebpf_create_charts_on_apps("oomkills",
+    ebpf_create_charts_on_apps(NETDATA_OOMKILL_CHART,
                                "OOM kills",
                                EBPF_COMMON_DIMENSION_KILLS,
                                "mem",

+ 2 - 0
collectors/ebpf.plugin/ebpf_oomkill.h

@@ -20,6 +20,8 @@ typedef uint8_t oomkill_ebpf_val_t;
 #define NETDATA_OOMKILL_SLEEP_MS 650000ULL
 #define NETDATA_OOMKILL_CONFIG_FILE "oomkill.conf"
 
+#define NETDATA_OOMKILL_CHART "oomkills"
+
 extern struct config oomkill_config;
 extern void *ebpf_oomkill_thread(void *ptr);
 extern void ebpf_oomkill_create_apps_charts(struct ebpf_module *em, void *ptr);

Some files were not shown because too many files changed in this diff