Browse Source

eBPF process (collector improvements) (#11643)

thiagoftsm 3 years ago
parent
commit
075fd5c821

+ 3 - 4
collectors/ebpf.plugin/ebpf_apps.h

@@ -357,12 +357,11 @@ typedef struct ebpf_process_stat {
     //Counter
     uint32_t exit_call;
     uint32_t release_call;
-    uint32_t fork_call;
-    uint32_t clone_call;
+    uint32_t create_process;
+    uint32_t create_thread;
 
     //Counter
-    uint32_t fork_err;
-    uint32_t clone_err;
+    uint32_t task_err;
 
     uint8_t removeme;
 } ebpf_process_stat_t;

+ 247 - 50
collectors/ebpf.plugin/ebpf_process.c

@@ -30,6 +30,14 @@ static ebpf_local_maps_t process_maps[] = {{.name = "tbl_pid_stats", .internal_i
                                             .type = NETDATA_EBPF_MAP_CONTROLLER,
                                             .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}};
 
+char *tracepoint_sched_type = { "sched" } ;
+char *tracepoint_sched_process_exit = { "sched_process_exit" };
+char *tracepoint_sched_process_exec = { "sched_process_exec" };
+char *tracepoint_sched_process_fork = { "sched_process_fork" };
+static int was_sched_process_exit_enabled = 0;
+static int was_sched_process_exec_enabled = 0;
+static int was_sched_process_fork_enabled = 0;
+
 static netdata_idx_t *process_hash_values = NULL;
 static netdata_syscall_stat_t process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_END];
 static netdata_publish_syscall_t process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_END];
@@ -115,14 +123,14 @@ static void ebpf_process_send_data(ebpf_module_t *em)
     netdata_publish_vfs_common_t pvc;
     ebpf_update_global_publish(process_publish_aggregated, &pvc, process_aggregated_data);
 
-    write_count_chart(NETDATA_EXIT_SYSCALL, NETDATA_EBPF_FAMILY,
+    write_count_chart(NETDATA_EXIT_SYSCALL, NETDATA_EBPF_SYSTEM_GROUP,
                       &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT], 2);
-    write_count_chart(NETDATA_PROCESS_SYSCALL, NETDATA_EBPF_FAMILY,
+    write_count_chart(NETDATA_PROCESS_SYSCALL, NETDATA_EBPF_SYSTEM_GROUP,
                       &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], 2);
 
-    write_status_chart(NETDATA_EBPF_FAMILY, &pvc);
+    write_status_chart(NETDATA_EBPF_SYSTEM_GROUP, &pvc);
     if (em->mode < MODE_ENTRY) {
-        write_err_chart(NETDATA_PROCESS_ERROR_NAME, NETDATA_EBPF_FAMILY,
+        write_err_chart(NETDATA_PROCESS_ERROR_NAME, NETDATA_EBPF_SYSTEM_GROUP,
                         &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], 2);
     }
 }
@@ -181,7 +189,7 @@ void ebpf_process_remove_pids()
  *
  * @param root the target list.
  */
-void ebpf_process_send_apps_data(struct target *root)
+void ebpf_process_send_apps_data(struct target *root, ebpf_module_t *em)
 {
     struct target *w;
     collected_number value;
@@ -189,7 +197,7 @@ void ebpf_process_send_apps_data(struct target *root)
     write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS);
     for (w = root; w; w = w->next) {
         if (unlikely(w->exposed && w->processes)) {
-            value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, call_do_fork));
+            value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, create_process));
             write_chart_dimension(w->name, value);
         }
     }
@@ -198,7 +206,17 @@ void ebpf_process_send_apps_data(struct target *root)
     write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD);
     for (w = root; w; w = w->next) {
         if (unlikely(w->exposed && w->processes)) {
-            value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, call_sys_clone));
+            value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, create_thread));
+            write_chart_dimension(w->name, value);
+        }
+    }
+    write_end_chart();
+
+    write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT);
+    for (w = root; w; w = w->next) {
+        if (unlikely(w->exposed && w->processes)) {
+            value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t,
+                                                                           call_do_exit));
             write_chart_dimension(w->name, value);
         }
     }
@@ -214,6 +232,18 @@ void ebpf_process_send_apps_data(struct target *root)
     }
     write_end_chart();
 
+    if (em->mode < MODE_ENTRY) {
+        write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR);
+        for (w = root; w; w = w->next) {
+            if (unlikely(w->exposed && w->processes)) {
+                value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t,
+                                                                               task_err));
+                write_chart_dimension(w->name, value);
+            }
+        }
+        write_end_chart();
+    }
+
     ebpf_process_remove_pids();
 }
 
@@ -279,11 +309,10 @@ static void ebpf_process_update_apps_data()
         //Read data
         cad->call_do_exit = ps->exit_call;
         cad->call_release_task = ps->release_call;
-        cad->call_do_fork = ps->fork_call;
-        cad->call_sys_clone = ps->clone_call;
+        cad->create_process = ps->create_process;
+        cad->create_thread = ps->create_thread;
 
-        cad->ecall_do_fork = ps->fork_err;
-        cad->ecall_sys_clone = ps->clone_err;
+        cad->task_err = ps->task_err;
 
         pids = pids->next;
     }
@@ -358,7 +387,7 @@ static void ebpf_process_status_chart(char *family, char *name, char *axis,
  */
 static void ebpf_create_global_charts(ebpf_module_t *em)
 {
-    ebpf_create_chart(NETDATA_EBPF_FAMILY,
+    ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP,
                       NETDATA_PROCESS_SYSCALL,
                       "Start process",
                       EBPF_COMMON_DIMENSION_CALL,
@@ -370,7 +399,7 @@ static void ebpf_create_global_charts(ebpf_module_t *em)
                       &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK],
                       2, NETDATA_EBPF_MODULE_NAME_PROCESS);
 
-    ebpf_create_chart(NETDATA_EBPF_FAMILY,
+    ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP,
                       NETDATA_EXIT_SYSCALL,
                       "Exit process",
                       EBPF_COMMON_DIMENSION_CALL,
@@ -382,7 +411,7 @@ static void ebpf_create_global_charts(ebpf_module_t *em)
                       &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT],
                       2, NETDATA_EBPF_MODULE_NAME_PROCESS);
 
-    ebpf_process_status_chart(NETDATA_EBPF_FAMILY,
+    ebpf_process_status_chart(NETDATA_EBPF_SYSTEM_GROUP,
                               NETDATA_PROCESS_STATUS_NAME,
                               EBPF_COMMON_DIMENSION_DIFFERENCE,
                               NETDATA_PROCESS_GROUP,
@@ -390,7 +419,7 @@ static void ebpf_create_global_charts(ebpf_module_t *em)
                               21004);
 
     if (em->mode < MODE_ENTRY) {
-        ebpf_create_chart(NETDATA_EBPF_FAMILY,
+        ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP,
                           NETDATA_PROCESS_ERROR_NAME,
                           "Fails to create process",
                           EBPF_COMMON_DIMENSION_CALL,
@@ -414,12 +443,11 @@ static void ebpf_create_global_charts(ebpf_module_t *em)
  */
 void ebpf_process_create_apps_charts(struct ebpf_module *em, void *ptr)
 {
-    UNUSED(em);
     struct target *root = ptr;
     ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_PROCESS,
                                "Process started",
                                EBPF_COMMON_DIMENSION_CALL,
-                               NETDATA_APPS_PROCESS_GROUP,
+                               NETDATA_PROCESS_GROUP,
                                NETDATA_EBPF_CHART_TYPE_STACKED,
                                20065,
                                ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
@@ -428,20 +456,41 @@ void ebpf_process_create_apps_charts(struct ebpf_module *em, void *ptr)
     ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_THREAD,
                                "Threads started",
                                EBPF_COMMON_DIMENSION_CALL,
-                               NETDATA_APPS_PROCESS_GROUP,
+                               NETDATA_PROCESS_GROUP,
                                NETDATA_EBPF_CHART_TYPE_STACKED,
                                20066,
                                ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
                                root, NETDATA_EBPF_MODULE_NAME_PROCESS);
 
+    ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_EXIT,
+                               "Tasks starts exit process.",
+                               EBPF_COMMON_DIMENSION_CALL,
+                               NETDATA_PROCESS_GROUP,
+                               NETDATA_EBPF_CHART_TYPE_STACKED,
+                               20067,
+                               ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
+                               root, NETDATA_EBPF_MODULE_NAME_PROCESS);
+
     ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_CLOSE,
                                "Tasks closed",
                                EBPF_COMMON_DIMENSION_CALL,
-                               NETDATA_APPS_PROCESS_GROUP,
+                               NETDATA_PROCESS_GROUP,
                                NETDATA_EBPF_CHART_TYPE_STACKED,
-                               20067,
+                               20068,
                                ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
                                root, NETDATA_EBPF_MODULE_NAME_PROCESS);
+
+    if (em->mode < MODE_ENTRY) {
+        ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_ERROR,
+                                   "Errors to create process or threads.",
+                                   EBPF_COMMON_DIMENSION_CALL,
+                                   NETDATA_PROCESS_GROUP,
+                                   NETDATA_EBPF_CHART_TYPE_STACKED,
+                                   20069,
+                                   ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
+                                   root,
+                                   NETDATA_EBPF_MODULE_NAME_PROCESS);
+    }
 }
 
 /**
@@ -547,22 +596,20 @@ static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_
 
         accumulator.exit_call += ps->exit_call;
         accumulator.release_call += ps->release_call;
-        accumulator.fork_call += ps->fork_call;
-        accumulator.clone_call += ps->clone_call;
+        accumulator.create_process += ps->create_process;
+        accumulator.create_thread += ps->create_thread;
 
-        accumulator.fork_err += ps->fork_err;
-        accumulator.clone_err += ps->clone_err;
+        accumulator.task_err += ps->task_err;
 
         pids = pids->next;
     }
 
     ps->exit_call = (accumulator.exit_call >= ps->exit_call) ? accumulator.exit_call : ps->exit_call;
     ps->release_call = (accumulator.release_call >= ps->release_call) ? accumulator.release_call : ps->release_call;
-    ps->fork_call = (accumulator.fork_call >= ps->fork_call) ? accumulator.fork_call : ps->fork_call;
-    ps->clone_call = (accumulator.clone_call >= ps->clone_call) ? accumulator.clone_call : ps->clone_call;
+    ps->create_process = (accumulator.create_process >= ps->create_process) ? accumulator.create_process : ps->create_process;
+    ps->create_thread = (accumulator.create_thread >= ps->create_thread) ? accumulator.create_thread : ps->create_thread;
 
-    ps->fork_err = (accumulator.fork_err >= ps->fork_err) ? accumulator.fork_err : ps->fork_err;
-    ps->clone_err = (accumulator.clone_err >= ps->clone_err) ? accumulator.clone_err : ps->clone_err;
+    ps->task_err = (accumulator.task_err >= ps->task_err) ? accumulator.task_err : ps->task_err;
 }
 
 /*
@@ -572,23 +619,36 @@ static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_
  *
  * @param type   chart type
  * @param values structure with values that will be sent to netdata
+ * @param em   the structure with thread information
  */
-static void ebpf_send_specific_process_data(char *type, ebpf_process_stat_t *values)
+static void ebpf_send_specific_process_data(char *type, ebpf_process_stat_t *values, ebpf_module_t *em)
 {
     write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS);
     write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK].name,
-                          (long long) values->fork_call);
+                          (long long) values->create_process);
     write_end_chart();
 
     write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_THREAD);
     write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_CLONE].name,
-                          (long long) values->clone_call);
+                          (long long) values->create_thread);
     write_end_chart();
 
-    write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE);
+    write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_EXIT);
     write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT].name,
                           (long long) values->release_call);
     write_end_chart();
+
+    write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE);
+    write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].name,
+                          (long long) values->release_call);
+    write_end_chart();
+
+    if (em->mode < MODE_ENTRY) {
+        write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_ERROR);
+        write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT].name,
+                              (long long) values->task_err);
+        write_end_chart();
+    }
 }
 
 /**
@@ -597,8 +657,9 @@ static void ebpf_send_specific_process_data(char *type, ebpf_process_stat_t *val
  * Create charts for cgroup/application
  *
  * @param type the chart type.
+ * @param em   the structure with thread information
  */
-static void ebpf_create_specific_process_charts(char *type)
+static void ebpf_create_specific_process_charts(char *type, ebpf_module_t *em)
 {
     ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, "Process started",
                       EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP,
@@ -615,13 +676,31 @@ static void ebpf_create_specific_process_charts(char *type)
                       &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_CLONE],
                       1, NETDATA_EBPF_MODULE_NAME_PROCESS);
 
-    ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE, "Tasks closed",
+    ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_EXIT, "Tasks starts exit process.",
                       EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP,
-                      NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT,
+                      NETDATA_CGROUP_PROCESS_EXIT_CONTEXT,
                       NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5002,
                       ebpf_create_global_dimension,
                       &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT],
                       1, NETDATA_EBPF_MODULE_NAME_PROCESS);
+
+    ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE, "Tasks closed",
+                      EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP,
+                      NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT,
+                      NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5003,
+                      ebpf_create_global_dimension,
+                      &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK],
+                      1, NETDATA_EBPF_MODULE_NAME_PROCESS);
+
+    if (em->mode < MODE_ENTRY) {
+        ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_ERROR, "Errors to create process or threads.",
+                          EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP,
+                          NETDATA_CGROUP_PROCESS_ERROR_CONTEXT,
+                          NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5004,
+                          ebpf_create_global_dimension,
+                          &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT],
+                          1, NETDATA_EBPF_MODULE_NAME_PROCESS);
+    }
 }
 
 /**
@@ -630,8 +709,9 @@ static void ebpf_create_specific_process_charts(char *type)
  * Obsolete charts for cgroup/application
  *
  * @param type the chart type.
+ * @param em   the structure with thread information
  */
-static void ebpf_obsolete_specific_process_charts(char *type)
+static void ebpf_obsolete_specific_process_charts(char *type, ebpf_module_t *em)
 {
     ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, "Process started",
                               EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE,
@@ -641,17 +721,29 @@ static void ebpf_obsolete_specific_process_charts(char *type)
                               EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE,
                               NETDATA_CGROUP_THREAD_CREATE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5001);
 
+    ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_EXIT,"Tasks starts exit process.",
+                              EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE,
+                              NETDATA_CGROUP_PROCESS_EXIT_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5003);
+
     ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_CLOSE,"Tasks closed",
                               EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE,
-                              NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5002);
+                              NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5003);
+
+    if (em->mode < MODE_ENTRY) {
+        ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_ERROR,"Errors to create process or threads.",
+                                  EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE,
+                                  NETDATA_CGROUP_PROCESS_ERROR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5004);
+    }
 }
 
 /**
  *  Create Systemd process Charts
  *
  *  Create charts when systemd is enabled
+ *
+ *  @param em   the structure with thread information
  **/
-static void ebpf_create_systemd_process_charts()
+static void ebpf_create_systemd_process_charts(ebpf_module_t *em)
 {
     ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_PROCESS, "Process started",
                                   EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP,
@@ -665,11 +757,25 @@ static void ebpf_create_systemd_process_charts()
                                   ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_THREAD_CREATE_CONTEXT,
                                   NETDATA_EBPF_MODULE_NAME_PROCESS);
 
-    ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_CLOSE, "Tasks closed",
+    ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_CLOSE, "Tasks starts exit process.",
                                   EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP,
                                   NETDATA_EBPF_CHART_TYPE_STACKED, 20067,
+                                  ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_PROCESS_EXIT_CONTEXT,
+                                  NETDATA_EBPF_MODULE_NAME_PROCESS);
+
+    ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_EXIT, "Tasks closed",
+                                  EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP,
+                                  NETDATA_EBPF_CHART_TYPE_STACKED, 20068,
                                   ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_PROCESS_CLOSE_CONTEXT,
                                   NETDATA_EBPF_MODULE_NAME_PROCESS);
+
+    if (em->mode < MODE_ENTRY) {
+        ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_ERROR, "Errors to create process or threads.",
+                                      EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP,
+                                      NETDATA_EBPF_CHART_TYPE_STACKED, 20069,
+                                      ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_PROCESS_ERROR_CONTEXT,
+                                      NETDATA_EBPF_MODULE_NAME_PROCESS);
+    }
 }
 
 /**
@@ -677,17 +783,19 @@ static void ebpf_create_systemd_process_charts()
  *
  * Send collected data to Netdata.
  *
+ *  @param em   the structure with thread information
+ *
  * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned
  *         otherwise function returns 1 to avoid chart recreation
  */
-static int ebpf_send_systemd_process_charts()
+static int ebpf_send_systemd_process_charts(ebpf_module_t *em)
 {
     int ret = 1;
     ebpf_cgroup_target_t *ect;
     write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS);
     for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
         if (unlikely(ect->systemd) && unlikely(ect->updated)) {
-            write_chart_dimension(ect->name, ect->publish_systemd_ps.fork_call);
+            write_chart_dimension(ect->name, ect->publish_systemd_ps.create_process);
         } else
             ret = 0;
     }
@@ -696,7 +804,15 @@ static int ebpf_send_systemd_process_charts()
     write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD);
     for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
         if (unlikely(ect->systemd) && unlikely(ect->updated)) {
-            write_chart_dimension(ect->name, ect->publish_systemd_ps.clone_call);
+            write_chart_dimension(ect->name, ect->publish_systemd_ps.create_thread);
+        }
+    }
+    write_end_chart();
+
+    write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT);
+    for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+        if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+            write_chart_dimension(ect->name, ect->publish_systemd_ps.exit_call);
         }
     }
     write_end_chart();
@@ -709,13 +825,25 @@ static int ebpf_send_systemd_process_charts()
     }
     write_end_chart();
 
+    if (em->mode < MODE_ENTRY) {
+        write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR);
+        for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+            if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+                write_chart_dimension(ect->name, ect->publish_systemd_ps.task_err);
+            }
+        }
+        write_end_chart();
+    }
+
     return ret;
 }
 
 /**
  * Send data to Netdata calling auxiliar functions.
+ *
+ * @param em   the structure with thread information
 */
-static void ebpf_process_send_cgroup_data()
+static void ebpf_process_send_cgroup_data(ebpf_module_t *em)
 {
     if (!ebpf_cgroup_pids)
         return;
@@ -731,11 +859,11 @@ static void ebpf_process_send_cgroup_data()
     if (has_systemd) {
         static int systemd_chart = 0;
         if (!systemd_chart) {
-            ebpf_create_systemd_process_charts();
+            ebpf_create_systemd_process_charts(em);
             systemd_chart = 1;
         }
 
-        systemd_chart = ebpf_send_systemd_process_charts();
+        systemd_chart = ebpf_send_systemd_process_charts(em);
     }
 
     for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
@@ -743,15 +871,15 @@ static void ebpf_process_send_cgroup_data()
             continue;
 
         if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART) && ect->updated) {
-            ebpf_create_specific_process_charts(ect->name);
+            ebpf_create_specific_process_charts(ect->name, em);
             ect->flags |= NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART;
         }
 
         if (ect->flags & NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART) {
             if (ect->updated) {
-                ebpf_send_specific_process_data(ect->name, &ect->publish_systemd_ps);
+                ebpf_send_specific_process_data(ect->name, &ect->publish_systemd_ps, em);
             } else {
-                ebpf_obsolete_specific_process_charts(ect->name);
+                ebpf_obsolete_specific_process_charts(ect->name, em);
                 ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART;
             }
         }
@@ -831,11 +959,11 @@ static void process_collector(usec_t step, ebpf_module_t *em)
         }
 
         if (publish_apps) {
-            ebpf_process_send_apps_data(apps_groups_root_target);
+            ebpf_process_send_apps_data(apps_groups_root_target, em);
         }
 
         if (cgroups) {
-            ebpf_process_send_cgroup_data();
+            ebpf_process_send_cgroup_data(em);
         }
         pthread_mutex_unlock(&lock);
 
@@ -863,6 +991,30 @@ void clean_global_memory() {
     }
 }
 
+/**
+ * Process disable tracepoints
+ *
+ * Disable tracepoints when the plugin was responsible to enable it.
+ */
+static void ebpf_process_disable_tracepoints()
+{
+    char *default_message = { "Cannot disable the tracepoint" };
+    if (!was_sched_process_exit_enabled) {
+        if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exit))
+            error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exit);
+    }
+
+    if (!was_sched_process_exec_enabled) {
+        if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exec))
+            error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exec);
+    }
+
+    if (!was_sched_process_fork_enabled) {
+        if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_fork))
+            error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_fork);
+    }
+}
+
 /**
  * Clean up the main thread.
  *
@@ -887,6 +1039,8 @@ static void ebpf_process_cleanup(void *ptr)
     freez(global_process_stats);
     freez(current_apps_data);
 
+    ebpf_process_disable_tracepoints();
+
     if (probe_links) {
         struct bpf_program *prog;
         size_t i = 0 ;
@@ -973,6 +1127,45 @@ static void wait_for_all_threads_die()
     }
 }
 
+/**
+ * Enable tracepoints
+ *
+ * Enable necessary tracepoints for thread.
+ *
+ * @return  It returns 0 on success and -1 otherwise
+ */
+static int ebpf_process_enable_tracepoints()
+{
+    int test = ebpf_is_tracepoint_enabled(tracepoint_sched_type, tracepoint_sched_process_exit);
+    if (test == -1)
+        return -1;
+    else if (!test) {
+        if (ebpf_enable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exit))
+            return -1;
+    }
+    was_sched_process_exit_enabled = test;
+
+    test = ebpf_is_tracepoint_enabled(tracepoint_sched_type, tracepoint_sched_process_exec);
+    if (test == -1)
+        return -1;
+    else if (!test) {
+        if (ebpf_enable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exec))
+            return -1;
+    }
+    was_sched_process_exec_enabled = test;
+
+    test = ebpf_is_tracepoint_enabled(tracepoint_sched_type, tracepoint_sched_process_fork);
+    if (test == -1)
+        return -1;
+    else if (!test) {
+        if (ebpf_enable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_fork))
+            return -1;
+    }
+    was_sched_process_fork_enabled = test;
+
+    return 0;
+}
+
 /**
  * Process thread
  *
@@ -988,6 +1181,10 @@ void *ebpf_process_thread(void *ptr)
 
     ebpf_module_t *em = (ebpf_module_t *)ptr;
     em->maps = process_maps;
+
+    if (ebpf_process_enable_tracepoints()) {
+        em->enabled = em->global_charts = em->apps_charts = em->cgroup_charts =  CONFIG_BOOLEAN_NO;
+    }
     process_enabled = em->enabled;
 
     pthread_mutex_lock(&lock);

+ 11 - 6
collectors/ebpf.plugin/ebpf_process.h

@@ -7,8 +7,8 @@
 #define NETDATA_EBPF_MODULE_NAME_PROCESS "process"
 
 // Groups used on Dashboard
-#define NETDATA_PROCESS_GROUP "process"
-#define NETDATA_PROCESS_CGROUP_GROUP "process (eBPF)"
+#define NETDATA_PROCESS_GROUP "processes"
+#define NETDATA_PROCESS_CGROUP_GROUP "processes (eBPF)"
 
 // Global chart name
 #define NETDATA_EXIT_SYSCALL "exit"
@@ -19,7 +19,9 @@
 // Charts created on Apps submenu
 #define NETDATA_SYSCALL_APPS_TASK_PROCESS "process_create"
 #define NETDATA_SYSCALL_APPS_TASK_THREAD "thread_create"
+#define NETDATA_SYSCALL_APPS_TASK_EXIT "task_exit"
 #define NETDATA_SYSCALL_APPS_TASK_CLOSE "task_close"
+#define NETDATA_SYSCALL_APPS_TASK_ERROR "task_error"
 
 // Process configuration name
 #define NETDATA_PROCESS_CONFIG_FILE "process.conf"
@@ -28,10 +30,14 @@
 #define NETDATA_CGROUP_PROCESS_CREATE_CONTEXT "cgroup.process_create"
 #define NETDATA_CGROUP_THREAD_CREATE_CONTEXT "cgroup.thread_create"
 #define NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT "cgroup.task_close"
+#define NETDATA_CGROUP_PROCESS_EXIT_CONTEXT "cgroup.task_exit"
+#define NETDATA_CGROUP_PROCESS_ERROR_CONTEXT "cgroup.task_error"
 
 #define NETDATA_SYSTEMD_PROCESS_CREATE_CONTEXT "services.process_create"
 #define NETDATA_SYSTEMD_THREAD_CREATE_CONTEXT "services.thread_create"
 #define NETDATA_SYSTEMD_PROCESS_CLOSE_CONTEXT "services.task_close"
+#define NETDATA_SYSTEMD_PROCESS_EXIT_CONTEXT "services.task_exit"
+#define NETDATA_SYSTEMD_PROCESS_ERROR_CONTEXT "services.task_error"
 
 // Index from kernel
 typedef enum ebpf_process_index {
@@ -66,12 +72,11 @@ typedef struct ebpf_process_publish_apps {
     // Number of calls during the last read
     uint64_t call_do_exit;
     uint64_t call_release_task;
-    uint64_t call_do_fork;
-    uint64_t call_sys_clone;
+    uint64_t create_process;
+    uint64_t create_thread;
 
     // Number of errors during the last read
-    uint64_t ecall_do_fork;
-    uint64_t ecall_sys_clone;
+    uint64_t task_err;
 } ebpf_process_publish_apps_t;
 
 enum ebpf_process_tables {

+ 3 - 3
packaging/ebpf.checksums

@@ -1,3 +1,3 @@
-f8e89d3a37b49f2b14d66dace463545e19bbcb5f486f0ef525d8e57b4acf809b  netdata-kernel-collector-glibc-v0.8.1.tar.xz
-094d4a9b05463031feb432cdaf599570e77df226ea743ee16b11178de00ca930  netdata-kernel-collector-musl-v0.8.1.tar.xz
-2994127a98ac86f0028c8242ba41a0ccc69786c076cc236d27b187ea2d38876a  netdata-kernel-collector-static-v0.8.1.tar.xz
+0e37657f98a8a287bd9798284ab4d6ed45fecd8b45e1e92e0c3bf04ba1e8c1fd  netdata-kernel-collector-glibc-v0.8.2.tar.xz
+fbaea92c63293a220083febc2223e605cb43b6698469b13b44628194973babae  netdata-kernel-collector-musl-v0.8.2.tar.xz
+8a7ff8a5d0d62c082be606dd4296e0b28563682ae2fbc89faaf730117e2ef016  netdata-kernel-collector-static-v0.8.2.tar.xz

+ 1 - 1
packaging/ebpf.version

@@ -1 +1 @@
-v0.8.1
+v0.8.2

+ 50 - 25
web/gui/dashboard_info.js

@@ -1310,6 +1310,26 @@ netdataDashboard.context = {
         info: 'The amount of time the system has been running, including time spent in suspend.'
     },
 
+    'system.process_thread': {
+        title : 'Task creation',
+        info: 'Number of times that either <a href="https://www.ece.uic.edu/~yshi1/linux/lkse/node4.html#SECTION00421000000000000000" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, is called to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads monitoring tracepoint <code>sched_process_fork</code>. This chart is provided by eBPF plugin.'
+    },
+
+    'system.exit': {
+        title : 'Exit monitoring',
+        info: 'Calls for the functions responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) and releasing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks. This chart is provided by eBPF plugin.'
+    },
+
+    'system.task_error': {
+        title : 'Task error',
+        info: 'Number of errors to create a new process or thread. This chart is provided by eBPF plugin.'
+    },
+
+    'system.process_status': {
+        title : 'Task status',
+        info: 'Difference between the number of process created and the number of threads created per period(<code>process</code> dimension), it also shows the number of possible zombie process running on system. This chart is provided by eBPF plugin.'
+    },
+
     // ------------------------------------------------------------------------
     // CPU charts
 
@@ -2543,15 +2563,23 @@ netdataDashboard.context = {
     },
 
     'apps.process_create': {
-        info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the process by counting the number of calls to <a href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that do not have the flag <code>CLONE_THREAD</code> set.'
+        info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. This chart is provided by eBPF plugin.'
     },
 
     'apps.thread_create': {
-        info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads by counting the number of calls to <a  href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that have the flag <code>CLONE_THREAD</code> set.'
+        info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads monitoring tracepoint <code>sched_process_fork</code>. This chart is provided by eBPF plugin.'
+    },
+
+    'apps.task_exit': {
+        info: 'Calls to the function responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) tasks. This chart is provided by eBPF plugin.'
     },
 
     'apps.task_close': {
-        info: 'Calls to the functions responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) and releasing (<a  href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks.'
+        info: 'Calls to the function responsible for releasing (<a  href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks. This chart is provided by eBPF plugin.'
+    },
+
+    'apps.task_error': {
+        info: 'Number of errors to create a new process or thread. This chart is provided by eBPF plugin.'
     },
 
     'apps.total_bandwidth_sent': {
@@ -3890,10 +3918,19 @@ netdataDashboard.context = {
         info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads by counting the number of calls to <a  href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that have the flag <code>CLONE_THREAD</code> set.'
     },
 
+    'cgroup.task_exit': {
+        info: 'Calls to the function responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) tasks.'
+    },
+
     'cgroup.task_close': {
-        info: 'Calls to the functions responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) and releasing (<a  href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks.'
+        info: 'Calls to the functions responsible for releasing (<a  href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks.'
     },
 
+    'cgroup.task_error': {
+        info: 'Number of errors to create a new process or thread. This chart is provided by eBPF plugin.'
+    },
+
+
     'cgroup.dc_ratio': {
         info: 'Percentage of file accesses that were present in the directory cache. 100% means that every file that was accessed was present in the directory cache. If files are not present in the directory cache 1) they are not present in the file system, 2) the files were not accessed before. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>. Netdata also gives a summary for these charts in <a href="#menu_filesystem_submenu_directory_cache__eBPF_">Filesystem submenu</a>.'
     },
@@ -4151,8 +4188,16 @@ netdataDashboard.context = {
         info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads by counting the number of calls to <a  href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that have the flag <code>CLONE_THREAD</code> set.'
     },
 
+    'services.task_exit': {
+        info: 'Calls to the functions responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) tasks.'
+    },
+
     'services.task_close': {
-        info: 'Calls to the functions responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) and releasing (<a  href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks.'
+        info: 'Calls to the functions responsible for releasing (<a  href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks.'
+    },
+
+    'services.task_error': {
+        info: 'Number of errors to create a new process or thread. This chart is provided by eBPF plugin.'
     },
 
     'services.dc_ratio': {
@@ -5795,26 +5840,6 @@ netdataDashboard.context = {
     // ------------------------------------------------------------------------
     // eBPF
 
-    'ebpf.process_thread': {
-        title : 'Task creation',
-        info: 'Number of times that either <a href="https://www.ece.uic.edu/~yshi1/linux/lkse/node4.html#SECTION00421000000000000000" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, is called to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads by counting the number of calls for <a href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that has the flag <code>CLONE_THREAD</code> set.'
-    },
-
-    'ebpf.exit': {
-        title : 'Exit monitoring',
-        info: 'Calls for the functions responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) and releasing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks.'
-    },
-
-    'ebpf.task_error': {
-        title : 'Task error',
-        info: 'Number of errors to create a new process or thread.'
-    },
-
-    'ebpf.process_status': {
-        title : 'Task status',
-        info: 'Difference between the number of process created and the number of threads created per period(<code>process</code> dimension), it also shows the number of possible zombie process running on system.'
-    },
-
     'apps.swap_read_call': {
         info: 'The function <code>swap_readpage</code> is called when the kernel reads a page from swap memory. Netdata also gives a summary for these charts in <a href="#menu_system_submenu_swap">System overview</a>.'
     },