Browse Source

eBPF memory (#18232)

thiagoftsm 6 months ago
parent
commit
3590d9fc27

+ 2 - 2
packaging/cmake/Modules/NetdataEBPFCORE.cmake

@@ -11,8 +11,8 @@ set(ebpf-co-re_SOURCE_DIR "${CMAKE_BINARY_DIR}/ebpf-co-re")
 function(netdata_fetch_ebpf_co_re)
     ExternalProject_Add(
         ebpf-co-re
-        URL https://github.com/netdata/ebpf-co-re/releases/download/v1.4.5/netdata-ebpf-co-re-glibc-v1.4.5.tar.xz
-        URL_HASH SHA256=6937a167f6f8c65a0b0528a297df9944d15a649c9af34a70a678d4eabbbf22d1
+        URL https://github.com/netdata/ebpf-co-re/releases/download/v1.4.5.1/netdata-ebpf-co-re-glibc-v1.4.5.1.tar.xz
+        URL_HASH SHA256=10d49602c873932a4e0a3717a4af2137434b480d0170c2fb000ec70ae02f6e30
         SOURCE_DIR "${ebpf-co-re_SOURCE_DIR}"
         CONFIGURE_COMMAND ""
         BUILD_COMMAND ""

+ 4 - 4
packaging/cmake/Modules/NetdataEBPFLegacy.cmake

@@ -20,19 +20,19 @@ function(netdata_fetch_legacy_ebpf_code)
     endif()
 
     if(need_static)
-        set(_hash 3f97034a595b5fd52ac4c5f43ce43085cc1391f39f2a281191efb15cc9666af4)
+        set(_hash 1c0c8f1177514e9e21a23c28841406595e57b7cfacd93746ff2d6b25987b94a6)
         set(_libc "static")
     elseif(_libc STREQUAL "glibc")
-        set(_hash 66094175e4d79b8a7222bc20d9e0d1bfbd37414891f88fc0113da53a97f8896a)
+        set(_hash e365a76a2bb25190f1d91e4dea2cfc5ff5db63b5238fbfbc89f72755cf85a12c)
     elseif(_libc STREQUAL "musl")
-        set(_hash 58daad4a82cf3c511372892dd21b2825fcb138aad22c1db6bc889b1965439f5e)
+        set(_hash ec14dcdfa29d4fba1cea6763740b9d37683515bde88a1a29b6e7c70ce01a604d)
     else()
         message(FATAL_ERROR "Could not determine libc implementation, unable to install eBPF legacy code.")
     endif()
 
     ExternalProject_Add(
         ebpf-code-legacy
-        URL https://github.com/netdata/kernel-collector/releases/download/v1.4.5/netdata-kernel-collector-${_libc}-v1.4.5.tar.xz
+        URL https://github.com/netdata/kernel-collector/releases/download/v1.4.5.1/netdata-kernel-collector-${_libc}-v1.4.5.1.tar.xz
         URL_HASH SHA256=${_hash}
         SOURCE_DIR "${ebpf-legacy_SOURCE_DIR}"
         CONFIGURE_COMMAND ""

+ 69 - 28
src/collectors/ebpf.plugin/ebpf.c

@@ -30,6 +30,7 @@ int ebpf_nprocs;
 int isrh = 0;
 int main_thread_id = 0;
 int process_pid_fd = -1;
+uint64_t collect_pids = 0;
 static size_t global_iterations_counter = 1;
 bool publish_internal_metrics = true;
 
@@ -996,7 +997,7 @@ static inline void ebpf_create_apps_for_module(ebpf_module_t *em, struct ebpf_ta
  */
 static void ebpf_create_apps_charts(struct ebpf_target *root)
 {
-    if (unlikely(!ebpf_all_pids))
+    if (unlikely(!ebpf_pids))
         return;
 
     struct ebpf_target *w;
@@ -1028,21 +1029,15 @@ static void ebpf_create_apps_charts(struct ebpf_target *root)
         }
     }
 
-    int i;
-    if (!newly_added) {
+    if (newly_added) {
+        int i;
         for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) {
-            ebpf_module_t *current = &ebpf_modules[i];
-            if (current->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED)
+            if (!(collect_pids & (1<<i)))
                 continue;
 
+            ebpf_module_t *current = &ebpf_modules[i];
             ebpf_create_apps_for_module(current, root);
         }
-        return;
-    }
-
-    for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) {
-        ebpf_module_t *current = &ebpf_modules[i];
-        ebpf_create_apps_for_module(current, root);
     }
 }
 
@@ -2680,7 +2675,7 @@ static void ebpf_allocate_common_vectors()
 {
     ebpf_judy_pid.pid_table = ebpf_allocate_pid_aral(NETDATA_EBPF_PID_SOCKET_ARAL_TABLE_NAME,
                                                      sizeof(netdata_ebpf_judy_pid_stats_t));
-    ebpf_all_pids = callocz((size_t)pid_max, sizeof(struct ebpf_pid_stat *));
+    ebpf_pids = callocz((size_t)pid_max, sizeof(ebpf_pid_data_t));
     ebpf_aral_init();
 }
 
@@ -3014,7 +3009,7 @@ static int ebpf_load_collector_config(char *path, int *disable_cgroups, int upda
 /**
  * Set global variables reading environment variables
  */
-void set_global_variables()
+static void ebpf_set_global_variables()
 {
     // Get environment variables
     ebpf_plugin_dir = getenv("NETDATA_PLUGINS_DIR");
@@ -3418,6 +3413,11 @@ void ebpf_send_statistic_data()
     }
     ebpf_write_end_chart();
 
+    ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, "monitoring_pid", "");
+    write_chart_dimension("user", ebpf_all_pids_count);
+    write_chart_dimension("kernel", ebpf_hash_table_pids_count);
+    ebpf_write_end_chart();
+
     ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LIFE_TIME, "");
     for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) {
         ebpf_module_t *wem = &ebpf_modules[i];
@@ -3489,6 +3489,37 @@ static void update_internal_metric_variable()
         publish_internal_metrics = false;
 }
 
+/**
+ * Create PIDS Chart
+ *
+ * Write to standard output current values for PIDSs charts.
+ *
+ * @param order        order to display chart
+ * @param update_every time used to update charts
+ */
+static void ebpf_create_pids_chart(int order, int update_every)
+{
+    ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY,
+                         "monitoring_pid",
+                         "",
+                         "Total number of monitored PIDs",
+                         "pids",
+                         NETDATA_EBPF_FAMILY,
+                         NETDATA_EBPF_CHART_TYPE_LINE,
+                         "netdata.ebpf_pids",
+                         order,
+                         update_every,
+                         "main");
+
+    ebpf_write_global_dimension("user",
+                                "user",
+                                ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+    ebpf_write_global_dimension("kernel",
+                                "kernel",
+                                ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+}
+
 /**
  * Create Thread Chart
  *
@@ -3538,7 +3569,7 @@ static void ebpf_create_thread_chart(char *name,
                                     (char *)em->info.thread_name,
                                     ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
     }
-}
+                                     }
 
 /**
  * Create chart for Load Thread
@@ -3741,6 +3772,8 @@ static void ebpf_create_statistic_charts(int update_every)
                              update_every,
                              NULL);
 
+    ebpf_create_pids_chart(NETDATA_EBPF_ORDER_PIDS, update_every);
+
     ebpf_create_thread_chart(NETDATA_EBPF_LIFE_TIME,
                              "Time remaining for thread.",
                              "seconds",
@@ -3974,18 +4007,18 @@ int main(int argc, char **argv)
     clocks_init();
     nd_log_initialize_for_external_plugins(NETDATA_EBPF_PLUGIN_NAME);
 
-    main_thread_id = gettid_cached();
-
-    set_global_variables();
-    ebpf_parse_args(argc, argv);
-    ebpf_manage_pid(getpid());
-
+    ebpf_set_global_variables();
     if (ebpf_can_plugin_load_code(running_on_kernel, NETDATA_EBPF_PLUGIN_NAME))
         return 2;
 
     if (ebpf_adjust_memory_limit())
         return 3;
 
+    main_thread_id = gettid_cached();
+
+    ebpf_parse_args(argc, argv);
+    ebpf_manage_pid(getpid());
+
     signal(SIGINT, ebpf_stop_threads);
     signal(SIGQUIT, ebpf_stop_threads);
     signal(SIGTERM, ebpf_stop_threads);
@@ -4018,7 +4051,7 @@ int main(int argc, char **argv)
         ebpf_cgroup_integration,
         NULL);
 
-    int i;
+    uint32_t i;
     for (i = 0; ebpf_threads[i].name != NULL; i++) {
         struct netdata_static_thread *st = &ebpf_threads[i];
 
@@ -4028,6 +4061,10 @@ int main(int argc, char **argv)
         if (em->enabled != NETDATA_THREAD_EBPF_NOT_RUNNING) {
             em->enabled = NETDATA_THREAD_EBPF_RUNNING;
             em->lifetime = EBPF_NON_FUNCTION_LIFE_TIME;
+
+            if (em->functions.apps_routine && (em->apps_charts || em->cgroup_charts)) {
+                collect_pids |= 1<<i;
+            }
             st->thread = nd_thread_create(st->name, NETDATA_THREAD_OPTION_JOINABLE, st->start_routine, em);
         } else {
             em->lifetime = EBPF_DEFAULT_LIFETIME;
@@ -4038,7 +4075,7 @@ int main(int argc, char **argv)
     heartbeat_t hb;
     heartbeat_init(&hb);
     int update_apps_every = (int) EBPF_CFG_UPDATE_APPS_EVERY_DEFAULT;
-    int max_period = update_apps_every * EBPF_CLEANUP_FACTOR;
+    uint32_t max_period = EBPF_CLEANUP_FACTOR;
     int update_apps_list = update_apps_every - 1;
     int process_maps_per_core = ebpf_modules[EBPF_MODULE_PROCESS_IDX].maps_per_core;
     //Plugin will be killed when it receives a signal
@@ -4057,12 +4094,16 @@ int main(int argc, char **argv)
         if (++update_apps_list == update_apps_every) {
             update_apps_list = 0;
             pthread_mutex_lock(&lock);
-            pthread_mutex_lock(&collect_data_mutex);
-            ebpf_cleanup_exited_pids(max_period);
-            collect_data_for_all_processes(process_pid_fd, process_maps_per_core);
-
-            ebpf_create_apps_charts(apps_groups_root_target);
-            pthread_mutex_unlock(&collect_data_mutex);
+            if (collect_pids) {
+                pthread_mutex_lock(&collect_data_mutex);
+                ebpf_parse_proc_files();
+                if (collect_pids & (1<<EBPF_MODULE_PROCESS_IDX)) {
+                    collect_data_for_all_processes(process_pid_fd, process_maps_per_core, max_period);
+                }
+
+                ebpf_create_apps_charts(apps_groups_root_target);
+                pthread_mutex_unlock(&collect_data_mutex);
+            }
             pthread_mutex_unlock(&lock);
         }
     }

+ 1 - 1
src/collectors/ebpf.plugin/ebpf.d/cachestat.conf

@@ -37,6 +37,6 @@
 #    pid table size = 32768
     ebpf type format = auto
     ebpf co-re tracing = trampoline
-    collect pid = all
+    collect pid = real parent
 #    maps per core = yes
     lifetime = 300

+ 1 - 1
src/collectors/ebpf.plugin/ebpf.d/dcstat.conf

@@ -35,6 +35,6 @@
 #    pid table size = 32768
     ebpf type format = auto
     ebpf co-re tracing = trampoline
-    collect pid = all
+    collect pid = real parent
 #    maps per core = yes
     lifetime = 300

+ 1 - 0
src/collectors/ebpf.plugin/ebpf.d/fd.conf

@@ -23,5 +23,6 @@
 #    pid table size = 32768
     ebpf type format = auto
     ebpf co-re tracing = trampoline
+    collect pid = real parent
 #    maps per core = yes
     lifetime = 300

+ 12 - 0
src/collectors/ebpf.plugin/ebpf.d/oomkill.conf

@@ -3,9 +3,21 @@
 #  `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates
 #            new charts for the return of these functions, such as errors.
 #
+# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin`
+# or `cgroups.plugin`.
+# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change
+# the setting `apps` and `cgroups` to  'no'.
+#
+# The `pid table size` defines the maximum number of PIDs stored inside the hash table.
+#
+# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6.
+#
 # The `lifetime` defines the time length a thread will run when it is enabled by a function.
 #
 [global]
 #    ebpf load mode = entry
 #    update every = 1
+    ebpf type format = auto
+    ebpf co-re tracing = trampoline
+    collect pid = real parent
     lifetime = 300

+ 1 - 1
src/collectors/ebpf.plugin/ebpf.d/process.conf

@@ -26,6 +26,6 @@
 #    cgroups = no
 #    update every = 10
 #    pid table size = 32768
-    collect pid = all
+    collect pid = real parent
 #    maps per core = yes
     lifetime = 300

+ 1 - 1
src/collectors/ebpf.plugin/ebpf.d/shm.conf

@@ -31,7 +31,7 @@
 #    pid table size = 32768
     ebpf type format = auto
     ebpf co-re tracing = trampoline
-    collect pid = all
+    collect pid = real parent
 #    maps per core = yes
     lifetime = 300
 

+ 1 - 1
src/collectors/ebpf.plugin/ebpf.d/swap.conf

@@ -30,6 +30,6 @@
 #    pid table size = 32768
     ebpf type format = auto
     ebpf co-re tracing = trampoline
-    collect pid = all
+    collect pid = real parent
 #    maps per core = yes
     lifetime = 300

Some files were not shown because too many files changed in this diff