Browse Source

Fix eBPF load on RH 8.x family and improve code. (#14090)

thiagoftsm 2 years ago
parent
commit
d127c108eb

+ 3 - 0
collectors/ebpf.plugin/ebpf.h

@@ -123,6 +123,9 @@ enum ebpf_threads_status {
 #endif
 #endif
 
+// Messages
+#define NETDATA_EBPF_DEFAULT_FNT_NOT_FOUND "Cannot find the necessary functions to monitor"
+
 // Chart definitions
 #define NETDATA_EBPF_FAMILY "ebpf"
 #define NETDATA_EBPF_IP_FAMILY "ip"

+ 45 - 18
collectors/ebpf.plugin/ebpf_cachestat.c

@@ -51,6 +51,9 @@ netdata_ebpf_targets_t cachestat_targets[] = { {.name = "add_to_page_cache_lru",
                                                {.name = "mark_buffer_dirty", .mode = EBPF_LOAD_TRAMPOLINE},
                                                {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}};
 
+static char *account_page[NETDATA_CACHESTAT_ACCOUNT_DIRTY_END] ={ "account_page_dirtied",
+                                                                  "__set_page_dirty", "__folio_mark_dirty"  };
+
 #ifdef LIBBPF_MAJOR_VERSION
 #include "includes/cachestat.skel.h" // BTF code
 
@@ -83,10 +86,12 @@ static void ebpf_cachestat_disable_probe(struct cachestat_bpf *obj)
  */
 static void ebpf_cachestat_disable_specific_probe(struct cachestat_bpf *obj)
 {
-    if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_16) {
+    if (!strcmp(cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name,
+                account_page[NETDATA_CACHESTAT_FOLIO_DIRTY])) {
         bpf_program__set_autoload(obj->progs.netdata_account_page_dirtied_kprobe, false);
         bpf_program__set_autoload(obj->progs.netdata_set_page_dirty_kprobe, false);
-    } else if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_15) {
+    } else if (!strcmp(cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name,
+                       account_page[NETDATA_CACHESTAT_SET_PAGE_DIRTY])) {
         bpf_program__set_autoload(obj->progs.netdata_folio_mark_dirty_kprobe, false);
         bpf_program__set_autoload(obj->progs.netdata_account_page_dirtied_kprobe, false);
     } else {
@@ -122,10 +127,12 @@ static void ebpf_cachestat_disable_trampoline(struct cachestat_bpf *obj)
  */
 static void ebpf_cachestat_disable_specific_trampoline(struct cachestat_bpf *obj)
 {
-    if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_16) {
+    if (!strcmp(cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name,
+                account_page[NETDATA_CACHESTAT_FOLIO_DIRTY])) {
         bpf_program__set_autoload(obj->progs.netdata_account_page_dirtied_fentry, false);
         bpf_program__set_autoload(obj->progs.netdata_set_page_dirty_fentry, false);
-    } else if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_15) {
+    } else if (!strcmp(cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name,
+                       account_page[NETDATA_CACHESTAT_SET_PAGE_DIRTY])) {
         bpf_program__set_autoload(obj->progs.netdata_folio_mark_dirty_fentry, false);
         bpf_program__set_autoload(obj->progs.netdata_account_page_dirtied_fentry, false);
     } else {
@@ -149,10 +156,12 @@ static inline void netdata_set_trampoline_target(struct cachestat_bpf *obj)
     bpf_program__set_attach_target(obj->progs.netdata_mark_page_accessed_fentry, 0,
                                    cachestat_targets[NETDATA_KEY_CALLS_MARK_PAGE_ACCESSED].name);
 
-    if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_16) {
+    if (!strcmp(cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name,
+                account_page[NETDATA_CACHESTAT_FOLIO_DIRTY])) {
         bpf_program__set_attach_target(obj->progs.netdata_folio_mark_dirty_fentry, 0,
                                        cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name);
-    } else if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_15) {
+    } else if (!strcmp(cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name,
+                       account_page[NETDATA_CACHESTAT_SET_PAGE_DIRTY])) {
         bpf_program__set_attach_target(obj->progs.netdata_set_page_dirty_fentry, 0,
                                        cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name);
     } else {
@@ -192,12 +201,14 @@ static int ebpf_cachestat_attach_probe(struct cachestat_bpf *obj)
     if (ret)
         return -1;
 
-    if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_16) {
+    if (!strcmp(cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name,
+                account_page[NETDATA_CACHESTAT_FOLIO_DIRTY])) {
         obj->links.netdata_folio_mark_dirty_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_folio_mark_dirty_kprobe,
                                                                                 false,
                                                                                 cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name);
         ret = libbpf_get_error(obj->links.netdata_folio_mark_dirty_kprobe);
-    } else if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_15) {
+    } else if (!strcmp(cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name,
+                       account_page[NETDATA_CACHESTAT_SET_PAGE_DIRTY])) {
         obj->links.netdata_set_page_dirty_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_set_page_dirty_kprobe,
                                                                               false,
                                                                               cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name);
@@ -363,7 +374,8 @@ static void ebpf_cachestat_free(ebpf_module_t *em)
 static void ebpf_cachestat_exit(void *ptr)
 {
     ebpf_module_t *em = (ebpf_module_t *)ptr;
-    netdata_thread_cancel(*cachestat_threads.thread);
+    if (cachestat_threads.thread)
+        netdata_thread_cancel(*cachestat_threads.thread);
     ebpf_cachestat_free(em);
 }
 
@@ -1237,16 +1249,28 @@ static void ebpf_cachestat_allocate_global_vectors(int apps)
  * Update Internal value
  *
  * Update values used during runtime.
+ *
+ * @return It returns 0 when one of the functions is present and -1 otherwise.
  */
-static void ebpf_cachestat_set_internal_value()
+static int ebpf_cachestat_set_internal_value()
 {
-    static char *account_page[] = { "account_page_dirtied", "__set_page_dirty", "__folio_mark_dirty"  };
-    if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_16)
-        cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name = account_page[NETDATA_CACHESTAT_FOLIO_DIRTY];
-    else if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_15)
-        cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name = account_page[NETDATA_CACHESTAT_SET_PAGE_DIRTY];
-    else
-        cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name = account_page[NETDATA_CACHESTAT_ACCOUNT_PAGE_DIRTY];
+    ebpf_addresses_t address = {.function = NULL, .hash = 0, .addr = 0};
+    int i;
+    for (i = 0; i < NETDATA_CACHESTAT_ACCOUNT_DIRTY_END ; i++) {
+        address.function = account_page[i];
+        ebpf_load_addresses(&address, -1);
+        if (address.addr)
+            break;
+    }
+
+    if (!address.addr) {
+        error("%s cachestat.", NETDATA_EBPF_DEFAULT_FNT_NOT_FOUND);
+        return -1;
+    }
+
+    cachestat_targets[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED].name = address.function;
+
+    return 0;
 }
 
 /*
@@ -1300,7 +1324,10 @@ void *ebpf_cachestat_thread(void *ptr)
 
     ebpf_update_pid_table(&cachestat_maps[NETDATA_CACHESTAT_PID_STATS], em);
 
-    ebpf_cachestat_set_internal_value();
+    if (ebpf_cachestat_set_internal_value()) {
+        em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED;
+        goto endcachestat;
+    }
 
 #ifdef LIBBPF_MAJOR_VERSION
     ebpf_adjust_thread_load(em, default_btf);

+ 3 - 1
collectors/ebpf.plugin/ebpf_cachestat.h

@@ -48,7 +48,9 @@ enum cachestat_counters {
 enum cachestat_account_dirty_pages {
     NETDATA_CACHESTAT_ACCOUNT_PAGE_DIRTY,
     NETDATA_CACHESTAT_SET_PAGE_DIRTY,
-    NETDATA_CACHESTAT_FOLIO_DIRTY
+    NETDATA_CACHESTAT_FOLIO_DIRTY,
+
+    NETDATA_CACHESTAT_ACCOUNT_DIRTY_END
 };
 
 enum cachestat_indexes {

+ 2 - 1
collectors/ebpf.plugin/ebpf_dcstat.c

@@ -338,7 +338,8 @@ static void ebpf_dcstat_free(ebpf_module_t *em )
 static void ebpf_dcstat_exit(void *ptr)
 {
     ebpf_module_t *em = (ebpf_module_t *)ptr;
-    netdata_thread_cancel(*dcstat_threads.thread);
+    if (dcstat_threads.thread)
+        netdata_thread_cancel(*dcstat_threads.thread);
     ebpf_dcstat_free(em);
 }
 

+ 2 - 1
collectors/ebpf.plugin/ebpf_disk.c

@@ -473,7 +473,8 @@ static void ebpf_disk_free(ebpf_module_t *em)
 static void ebpf_disk_exit(void *ptr)
 {
     ebpf_module_t *em = (ebpf_module_t *)ptr;
-    netdata_thread_cancel(*disk_threads.thread);
+    if (disk_threads.thread)
+        netdata_thread_cancel(*disk_threads.thread);
     ebpf_disk_free(em);
 }
 

+ 51 - 17
collectors/ebpf.plugin/ebpf_fd.c

@@ -6,6 +6,9 @@
 static char *fd_dimension_names[NETDATA_FD_SYSCALL_END] = { "open", "close" };
 static char *fd_id_names[NETDATA_FD_SYSCALL_END] = { "do_sys_open",  "__close_fd" };
 
+static char *close_targets[NETDATA_EBPF_MAX_FD_TARGETS] = {"close_fd", "__close_fd"};
+static char *open_targets[NETDATA_EBPF_MAX_FD_TARGETS] = {"do_sys_openat2", "do_sys_open"};
+
 static netdata_syscall_stat_t fd_aggregated_data[NETDATA_FD_SYSCALL_END];
 static netdata_publish_syscall_t fd_publish_aggregated[NETDATA_FD_SYSCALL_END];
 
@@ -65,7 +68,7 @@ static inline void ebpf_fd_disable_probes(struct fd_bpf *obj)
     bpf_program__set_autoload(obj->progs.netdata_sys_open_kprobe, false);
     bpf_program__set_autoload(obj->progs.netdata_sys_open_kretprobe, false);
     bpf_program__set_autoload(obj->progs.netdata_release_task_fd_kprobe, false);
-    if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) {
+    if (!strcmp(fd_targets[NETDATA_FD_SYSCALL_CLOSE].name, close_targets[NETDATA_FD_CLOSE_FD])) {
         bpf_program__set_autoload(obj->progs.netdata___close_fd_kretprobe, false);
         bpf_program__set_autoload(obj->progs.netdata___close_fd_kprobe, false);
         bpf_program__set_autoload(obj->progs.netdata_close_fd_kprobe, false);
@@ -85,7 +88,7 @@ static inline void ebpf_fd_disable_probes(struct fd_bpf *obj)
  */
 static inline void ebpf_disable_specific_probes(struct fd_bpf *obj)
 {
-    if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) {
+    if (!strcmp(fd_targets[NETDATA_FD_SYSCALL_CLOSE].name, close_targets[NETDATA_FD_CLOSE_FD])) {
         bpf_program__set_autoload(obj->progs.netdata___close_fd_kretprobe, false);
         bpf_program__set_autoload(obj->progs.netdata___close_fd_kprobe, false);
     } else {
@@ -121,7 +124,7 @@ static inline void ebpf_disable_trampoline(struct fd_bpf *obj)
  */
 static inline void ebpf_disable_specific_trampoline(struct fd_bpf *obj)
 {
-    if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) {
+    if (!strcmp(fd_targets[NETDATA_FD_SYSCALL_CLOSE].name, close_targets[NETDATA_FD_CLOSE_FD])) {
         bpf_program__set_autoload(obj->progs.netdata___close_fd_fentry, false);
         bpf_program__set_autoload(obj->progs.netdata___close_fd_fexit, false);
     } else {
@@ -143,7 +146,7 @@ static void ebpf_set_trampoline_target(struct fd_bpf *obj)
     bpf_program__set_attach_target(obj->progs.netdata_sys_open_fexit, 0, fd_targets[NETDATA_FD_SYSCALL_OPEN].name);
     bpf_program__set_attach_target(obj->progs.netdata_release_task_fd_fentry, 0, EBPF_COMMON_FNCT_CLEAN_UP);
 
-    if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) {
+    if (!strcmp(fd_targets[NETDATA_FD_SYSCALL_CLOSE].name, close_targets[NETDATA_FD_CLOSE_FD])) {
         bpf_program__set_attach_target(
             obj->progs.netdata_close_fd_fentry, 0, fd_targets[NETDATA_FD_SYSCALL_CLOSE].name);
         bpf_program__set_attach_target(obj->progs.netdata_close_fd_fexit, 0, fd_targets[NETDATA_FD_SYSCALL_CLOSE].name);
@@ -185,7 +188,7 @@ static int ebpf_fd_attach_probe(struct fd_bpf *obj)
     if (ret)
         return -1;
 
-    if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) {
+    if (!strcmp(fd_targets[NETDATA_FD_SYSCALL_CLOSE].name, close_targets[NETDATA_FD_CLOSE_FD])) {
         obj->links.netdata_close_fd_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_close_fd_kretprobe, true,
                                                                            fd_targets[NETDATA_FD_SYSCALL_CLOSE].name);
         ret = libbpf_get_error(obj->links.netdata_close_fd_kretprobe);
@@ -216,22 +219,48 @@ static int ebpf_fd_attach_probe(struct fd_bpf *obj)
     return 0;
 }
 
+/**
+ * FD Fill Address
+ *
+ * Fill address value used to load probes/trampoline.
+ */
+static inline void ebpf_fd_fill_address(ebpf_addresses_t *address, char **targets)
+{
+    int i;
+    for (i = 0; i < NETDATA_EBPF_MAX_FD_TARGETS; i++) {
+        address->function = targets[i];
+        ebpf_load_addresses(address, -1);
+        if (address->addr)
+            break;
+    }
+}
+
 /**
  * Set target values
  *
  * Set pointers used to laod data.
+ *
+ * @return It returns 0 on success and -1 otherwise.
  */
-static void ebpf_fd_set_target_values()
+static int ebpf_fd_set_target_values()
 {
-    static char *close_targets[] = {"close_fd", "__close_fd"};
-    static char *open_targets[] = {"do_sys_openat2", "do_sys_open"};
-    if (running_on_kernel >= NETDATA_EBPF_KERNEL_5_11) {
-        fd_targets[NETDATA_FD_SYSCALL_OPEN].name = open_targets[0];
-        fd_targets[NETDATA_FD_SYSCALL_CLOSE].name = close_targets[0];
-    } else {
-        fd_targets[NETDATA_FD_SYSCALL_OPEN].name = open_targets[1];
-        fd_targets[NETDATA_FD_SYSCALL_CLOSE].name = close_targets[1];
-    }
+    ebpf_addresses_t address = {.function = NULL, .hash = 0, .addr = 0};
+    ebpf_fd_fill_address(&address, close_targets);
+
+    if (!address.addr)
+        return -1;
+
+    fd_targets[NETDATA_FD_SYSCALL_CLOSE].name = address.function;
+
+    address.addr = 0;
+    ebpf_fd_fill_address(&address, open_targets);
+
+    if (!address.addr)
+        return -1;
+
+    fd_targets[NETDATA_FD_SYSCALL_OPEN].name = address.function;
+
+    return 0;
 }
 
 /**
@@ -290,7 +319,11 @@ static inline int ebpf_fd_load_and_attach(struct fd_bpf *obj, ebpf_module_t *em)
     netdata_ebpf_targets_t *mt = em->targets;
     netdata_ebpf_program_loaded_t test = mt[NETDATA_FD_SYSCALL_OPEN].mode;
 
-    ebpf_fd_set_target_values();
+    if (ebpf_fd_set_target_values()) {
+        error("%s file descriptor.", NETDATA_EBPF_DEFAULT_FNT_NOT_FOUND);
+        return -1;
+    }
+
     if (test == EBPF_LOAD_TRAMPOLINE) {
         ebpf_fd_disable_probes(obj);
         ebpf_disable_specific_trampoline(obj);
@@ -372,7 +405,8 @@ static void ebpf_fd_free(ebpf_module_t *em)
 static void ebpf_fd_exit(void *ptr)
 {
     ebpf_module_t *em = (ebpf_module_t *)ptr;
-    netdata_thread_cancel(*fd_thread.thread);
+    if (fd_thread.thread)
+        netdata_thread_cancel(*fd_thread.thread);
     ebpf_fd_free(em);
 }
 

+ 8 - 0
collectors/ebpf.plugin/ebpf_fd.h

@@ -74,6 +74,14 @@ enum fd_syscalls {
     NETDATA_FD_SYSCALL_END
 };
 
+enum fd_close_syscall {
+    NETDATA_FD_CLOSE_FD,
+    NETDATA_FD___CLOSE_FD,
+
+    NETDATA_FD_CLOSE_END
+};
+
+#define NETDATA_EBPF_MAX_FD_TARGETS 2
 
 void *ebpf_fd_thread(void *ptr);
 void ebpf_fd_create_apps_charts(struct ebpf_module *em, void *ptr);

+ 2 - 1
collectors/ebpf.plugin/ebpf_filesystem.c

@@ -367,7 +367,8 @@ static void ebpf_filesystem_free(ebpf_module_t *em)
 static void ebpf_filesystem_exit(void *ptr)
 {
     ebpf_module_t *em = (ebpf_module_t *)ptr;
-    netdata_thread_cancel(*filesystem_threads.thread);
+    if (filesystem_threads.thread)
+        netdata_thread_cancel(*filesystem_threads.thread);
     ebpf_filesystem_free(em);
 }
 

+ 2 - 1
collectors/ebpf.plugin/ebpf_hardirq.c

@@ -183,7 +183,8 @@ static void ebpf_hardirq_free(ebpf_module_t *em)
 static void hardirq_exit(void *ptr)
 {
     ebpf_module_t *em = (ebpf_module_t *)ptr;
-    netdata_thread_cancel(*hardirq_threads.thread);
+    if (hardirq_threads.thread)
+        netdata_thread_cancel(*hardirq_threads.thread);
     ebpf_hardirq_free(em);
 }
 

+ 2 - 1
collectors/ebpf.plugin/ebpf_mdflush.c

@@ -92,7 +92,8 @@ static void mdflush_exit(void *ptr)
 static void mdflush_cleanup(void *ptr)
 {
     ebpf_module_t *em = (ebpf_module_t *)ptr;
-    netdata_thread_cancel(*mdflush_threads.thread);
+    if (mdflush_threads.thread)
+        netdata_thread_cancel(*mdflush_threads.thread);
     ebpf_mdflush_free(em);
 }
 

Some files were not shown because too many files changed in this diff