Browse Source

Ebpf extend sync (#10814)

Extend synchronization thread.
thiagoftsm 4 years ago
parent
commit
a714066bcc

+ 18 - 2
collectors/ebpf.plugin/README.md

@@ -198,7 +198,7 @@ The eBPF collector enables and runs the following eBPF programs by default:
     When in `return` mode, it also creates charts showing errors when these operations are executed.
 -   `network viewer`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the
     bandwidth consumed by each.
--   `sync`: Montitor calls for syscall sync(2).
+-   `sync`: Montitor calls for syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2).
 
 ## Thread configuration
 
@@ -218,6 +218,7 @@ The following configuration files are available:
 - `process.conf`: Configuration for the `process` thread.
 - `network.conf`: Configuration for the `network viewer` thread. This config file overwrites the global options and
   also lets you specify which network the eBPF collector monitors.
+- `sync.conf`: Configuration for the `sync` thread.
 
 ### Network configuration
 
@@ -261,7 +262,7 @@ The dimensions for the traffic charts are created using the destination IPs of t
 changed setting `resolve hostname ips = yes` and restarting Netdata, after this Netdata will create dimensions using
 the `hostnames` every time that is possible to resolve IPs to their hostnames.
 
-### `[service name]`
+#### `[service name]`
 
 Netdata uses the list of services in `/etc/services` to plot network connection charts. If this file does not contain the
 name for a particular service you use in your infrastructure, you will need to add it to the `[service name]` section.
@@ -274,6 +275,21 @@ service in network connection charts, and thus see the name of the service inste
     19999 = Netdata
 ```
 
+### Sync configuration
+
+The sync configuration has specific options to disable monitoring for syscalls, as default option all syscalls are 
+monitored.
+
+```conf
+[syscalls]
+    sync = yes
+    msync = yes
+    fsync = yes
+    fdatasync = yes
+    syncfs = yes
+    sync_file_range = yes
+```
+
 ## Troubleshooting
 
 If the eBPF collector does not work, you can troubleshoot it by running the `ebpf.plugin` command and investigating its

+ 9 - 0
collectors/ebpf.plugin/ebpf.d/sync.conf

@@ -12,3 +12,12 @@
     ebpf load mode = entry
     apps = yes
     update every = 2
+
+# List of monitored syscalls 
+[syscalls]
+    sync = yes
+    msync = yes
+    fsync = yes
+    fdatasync = yes
+    syncfs = yes
+    sync_file_range = yes

+ 212 - 43
collectors/ebpf.plugin/ebpf_sync.c

@@ -5,17 +5,14 @@
 
 static ebpf_data_t sync_data;
 
-static struct bpf_link **probe_links = NULL;
-static struct bpf_object *objects = NULL;
-
-static char *sync_counter_dimension_name[NETDATA_SYNC_END] = { "sync" };
-static netdata_syscall_stat_t sync_counter_aggregated_data;
-static netdata_publish_syscall_t sync_counter_publish_aggregated;
+static char *sync_counter_dimension_name[NETDATA_SYNC_IDX_END] = { "sync", "syncfs",  "msync", "fsync", "fdatasync",
+                                                                   "sync_file_range" };
+static netdata_syscall_stat_t sync_counter_aggregated_data[NETDATA_SYNC_IDX_END];
+static netdata_publish_syscall_t sync_counter_publish_aggregated[NETDATA_SYNC_IDX_END];
 
 static int read_thread_closed = 1;
 
-static int *map_fd = NULL;
-static netdata_idx_t sync_hash_values = 0;
+static netdata_idx_t sync_hash_values[NETDATA_SYNC_IDX_END];
 
 struct netdata_static_thread sync_threads = {"SYNC KERNEL", NULL, NULL, 1,
                                               NULL, NULL,  NULL};
@@ -26,6 +23,60 @@ struct config sync_config = { .first_section = NULL,
     .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
         .rwlock = AVL_LOCK_INITIALIZER } };
 
+ebpf_sync_syscalls_t local_syscalls[] = {
+    {.syscall = "sync", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+    {.syscall = "syncfs", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+    {.syscall = "msync", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+    {.syscall = "fsync", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+    {.syscall = "fdatasync", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+    {.syscall = "sync_file_range", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+    {.syscall = NULL, .enabled = CONFIG_BOOLEAN_NO, .objects = NULL, .probe_links = NULL}
+};
+
+/*****************************************************************
+ *
+ *  INITIALIZE THREAD
+ *
+ *****************************************************************/
+
+/*
+ * Initialize Syscalls
+ *
+ * Load the eBPF programs to monitor syscalls
+ *
+ * @return 0 on success and -1 otherwise.
+ */
+static int ebpf_sync_initialize_syscall(ebpf_module_t *em)
+{
+    int i;
+    const char *saved_name = em->thread_name;
+    for (i = 0; local_syscalls[i].syscall; i++) {
+        ebpf_sync_syscalls_t *w = &local_syscalls[i];
+        if (!w->probe_links && w->enabled) {
+            fill_ebpf_data(&w->kernel_info);
+            if (ebpf_update_kernel(&w->kernel_info)) {
+                em->thread_name = saved_name;
+                error("Cannot update the kernel for eBPF module %s", w->syscall);
+                return -1;
+            }
+
+            em->thread_name = w->syscall;
+            w->probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &w->objects, w->kernel_info.map_fd);
+            if (!w->probe_links) {
+                em->thread_name = saved_name;
+                return -1;
+            }
+        }
+    }
+    em->thread_name = saved_name;
+
+    memset(sync_counter_aggregated_data, 0 , NETDATA_SYNC_IDX_END * sizeof(netdata_syscall_stat_t));
+    memset(sync_counter_publish_aggregated, 0 , NETDATA_SYNC_IDX_END * sizeof(netdata_publish_syscall_t));
+    memset(sync_hash_values, 0 , NETDATA_SYNC_IDX_END * sizeof(netdata_idx_t));
+
+    return 0;
+}
+
 /*****************************************************************
  *
  *  DATA THREAD
@@ -39,12 +90,16 @@ struct config sync_config = { .first_section = NULL,
  */
 static void read_global_table()
 {
-    uint32_t idx = NETDATA_SYNC_CALL;
     netdata_idx_t stored;
-    int fd = map_fd[NETDATA_SYNC_GLOBLAL_TABLE];
-
-    if (!bpf_map_lookup_elem(fd, &idx, &stored)) {
-        sync_hash_values = stored;
+    uint32_t idx = NETDATA_SYNC_CALL;
+    int i;
+    for (i = 0; local_syscalls[i].syscall; i++) {
+        if (local_syscalls[i].enabled) {
+            int fd = local_syscalls[i].kernel_info.map_fd[NETDATA_SYNC_GLOBLAL_TABLE];
+            if (!bpf_map_lookup_elem(fd, &idx, &stored)) {
+                sync_hash_values[i] = stored;
+            }
+        }
     }
 }
 
@@ -78,14 +133,57 @@ void *ebpf_sync_read_hash(void *ptr)
 }
 
 /**
- * Send global
+ * Create Sync charts
+ *
+ * Create charts and dimensions according user input.
+ *
+ * @param id        chart id
+ * @param idx       the first index with data.
+ * @param end       the last index with data.
+ */
+static void ebpf_send_sync_chart(char *id,
+                                   int idx,
+                                   int end)
+{
+    write_begin_chart(NETDATA_EBPF_MEMORY_GROUP, id);
+
+    netdata_publish_syscall_t *move = &sync_counter_publish_aggregated[idx];
+
+    while (move && idx <= end) {
+        if (local_syscalls[idx].enabled)
+            write_chart_dimension(move->name, sync_hash_values[idx]);
+
+        move = move->next;
+        idx++;
+    }
+
+    write_end_chart();
+}
+
+/**
+ * Send data
  *
  * Send global charts to Netdata
  */
-static void sync_send_global()
+static void sync_send_data()
 {
-    ebpf_one_dimension_write_charts(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_SYNC_CHART,
-                                    sync_counter_publish_aggregated.dimension, sync_hash_values);
+    if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled) {
+        ebpf_send_sync_chart(NETDATA_EBPF_FILE_SYNC_CHART, NETDATA_SYNC_FSYNC_IDX, NETDATA_SYNC_FDATASYNC_IDX);
+    }
+
+    if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled)
+        ebpf_one_dimension_write_charts(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_MSYNC_CHART,
+                                        sync_counter_publish_aggregated[NETDATA_SYNC_MSYNC_IDX].dimension,
+                                        sync_hash_values[NETDATA_SYNC_MSYNC_IDX]);
+
+    if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled) {
+        ebpf_send_sync_chart(NETDATA_EBPF_SYNC_CHART, NETDATA_SYNC_SYNC_IDX, NETDATA_SYNC_SYNCFS_IDX);
+    }
+
+    if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled)
+        ebpf_one_dimension_write_charts(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_FILE_SEGMENT_CHART,
+                                        sync_counter_publish_aggregated[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].dimension,
+                                        sync_hash_values[NETDATA_SYNC_SYNC_FILE_RANGE_IDX]);
 }
 
 /**
@@ -96,8 +194,6 @@ static void sync_collector(ebpf_module_t *em)
     sync_threads.thread = mallocz(sizeof(netdata_thread_t));
     sync_threads.start_routine = ebpf_sync_read_hash;
 
-    map_fd = sync_data.map_fd;
-
     netdata_thread_create(sync_threads.thread, sync_threads.name, NETDATA_THREAD_OPTION_JOINABLE,
                           ebpf_sync_read_hash, em);
 
@@ -107,7 +203,7 @@ static void sync_collector(ebpf_module_t *em)
 
         pthread_mutex_lock(&lock);
 
-        sync_send_global();
+        sync_send_data();
 
         pthread_mutex_unlock(&lock);
         pthread_mutex_unlock(&collect_data_mutex);
@@ -121,6 +217,30 @@ static void sync_collector(ebpf_module_t *em)
  *
  *****************************************************************/
 
+/**
+ * Cleanup Objects
+ *
+ * Cleanup loaded objects when thread was initialized.
+ */
+void ebpf_sync_cleanup_objects()
+{
+    int i;
+    for (i = 0; local_syscalls[i].syscall; i++) {
+        ebpf_sync_syscalls_t *w = &local_syscalls[i];
+        if (w->probe_links) {
+            freez(w->kernel_info.map_fd);
+
+            struct bpf_program *prog;
+            size_t j = 0 ;
+            bpf_object__for_each_program(prog, w->objects) {
+                bpf_link__destroy(w->probe_links[j]);
+                j++;
+            }
+            bpf_object__close(w->objects);
+        }
+    }
+}
+
 /**
  * Clean up the main thread.
  *
@@ -140,15 +260,8 @@ static void ebpf_sync_cleanup(void *ptr)
         UNUSED(dt);
     }
 
+    ebpf_sync_cleanup_objects();
     freez(sync_threads.thread);
-
-    struct bpf_program *prog;
-    size_t i = 0 ;
-    bpf_object__for_each_program(prog, objects) {
-        bpf_link__destroy(probe_links[i]);
-        i++;
-    }
-    bpf_object__close(objects);
 }
 
 /*****************************************************************
@@ -157,6 +270,37 @@ static void ebpf_sync_cleanup(void *ptr)
  *
  *****************************************************************/
 
+/**
+ * Create Sync charts
+ *
+ * Create charts and dimensions according user input.
+ *
+ * @param id        chart id
+ * @param title     chart title
+ * @param order     order number of the specified chart
+ * @param idx       the first index with data.
+ * @param end       the last index with data.
+ */
+static void ebpf_create_sync_chart(char *id,
+                                   char *title,
+                                   int order,
+                                   int idx,
+                                   int end)
+{
+    ebpf_write_chart_cmd(NETDATA_EBPF_MEMORY_GROUP, id, title, EBPF_COMMON_DIMENSION_CALL,
+                         NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NULL, order);
+
+    netdata_publish_syscall_t *move = &sync_counter_publish_aggregated[idx];
+
+    while (move && idx <= end) {
+        if (local_syscalls[idx].enabled)
+            ebpf_write_global_dimension(move->name, move->dimension, move->algorithm);
+
+        move = move->next;
+        idx++;
+    }
+}
+
 /**
  * Create global charts
  *
@@ -164,11 +308,39 @@ static void ebpf_sync_cleanup(void *ptr)
  */
 static void ebpf_create_sync_charts()
 {
-    ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_SYNC_CHART,
-                      "Monitor calls for <a href=\"https://linux.die.net/man/2/sync\">sync(2)</a> syscall.",
-                      EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_SYNC_SUBMENU, NULL,
-                      NETDATA_EBPF_CHART_TYPE_LINE, 21300,
-                      ebpf_create_global_dimension, &sync_counter_publish_aggregated, 1);
+    if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled)
+        ebpf_create_sync_chart(NETDATA_EBPF_FILE_SYNC_CHART,
+                               "Monitor calls for <code>fsync(2)</code> and <code>fdatasync(2)</code>.", 21300,
+                               NETDATA_SYNC_FSYNC_IDX, NETDATA_SYNC_FDATASYNC_IDX);
+
+    if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled)
+        ebpf_create_sync_chart(NETDATA_EBPF_MSYNC_CHART,
+                               "Monitor calls for <code>msync(2)</code>.", 21301,
+                               NETDATA_SYNC_MSYNC_IDX, NETDATA_SYNC_MSYNC_IDX);
+
+    if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled)
+        ebpf_create_sync_chart(NETDATA_EBPF_SYNC_CHART,
+                               "Monitor calls for <code>sync(2)</code> and <code>syncfs(2)</code>.", 21302,
+                               NETDATA_SYNC_SYNC_IDX, NETDATA_SYNC_SYNCFS_IDX);
+
+    if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled)
+        ebpf_create_sync_chart(NETDATA_EBPF_FILE_SEGMENT_CHART,
+                               "Monitor calls for <code>sync_file_range(2)</code>.", 21303,
+                               NETDATA_SYNC_SYNC_FILE_RANGE_IDX, NETDATA_SYNC_SYNC_FILE_RANGE_IDX);
+}
+
+/**
+ * Parse Syscalls
+ *
+ * Parse syscall options available inside ebpf.d/sync.conf
+ */
+static void ebpf_sync_parse_syscalls()
+{
+    int i;
+    for (i = 0; local_syscalls[i].syscall; i++) {
+        local_syscalls[i].enabled = appconfig_get_boolean(&sync_config, NETDATA_SYNC_CONFIG_NAME,
+                                                          local_syscalls[i].syscall, CONFIG_BOOLEAN_YES);
+    }
 }
 
 /**
@@ -188,25 +360,22 @@ void *ebpf_sync_thread(void *ptr)
     fill_ebpf_data(&sync_data);
 
     ebpf_update_module(em, &sync_config, NETDATA_SYNC_CONFIG_FILE);
+    ebpf_sync_parse_syscalls();
 
     if (!em->enabled)
         goto endsync;
 
-    if (ebpf_update_kernel(&sync_data)) {
-        pthread_mutex_unlock(&lock);
-        goto endsync;
-    }
-
-    probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects, sync_data.map_fd);
-    if (!probe_links) {
+    if (ebpf_sync_initialize_syscall(em)) {
         pthread_mutex_unlock(&lock);
         goto endsync;
     }
 
-    int algorithm =  NETDATA_EBPF_INCREMENTAL_IDX;
-    ebpf_global_labels(&sync_counter_aggregated_data, &sync_counter_publish_aggregated,
+    int algorithms[NETDATA_SYNC_IDX_END] = { NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX,
+                                             NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX,
+                                             NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX };
+    ebpf_global_labels(sync_counter_aggregated_data, sync_counter_publish_aggregated,
                        sync_counter_dimension_name, sync_counter_dimension_name,
-                       &algorithm, NETDATA_SYNC_END);
+                       algorithms, NETDATA_SYNC_IDX_END);
 
     pthread_mutex_lock(&lock);
     ebpf_create_sync_charts();

+ 26 - 0
collectors/ebpf.plugin/ebpf_sync.h

@@ -5,12 +5,38 @@
 
 // charts
 #define NETDATA_EBPF_SYNC_CHART "sync"
+#define NETDATA_EBPF_MSYNC_CHART "memory_map"
+#define NETDATA_EBPF_FILE_SYNC_CHART "file_sync"
+#define NETDATA_EBPF_FILE_SEGMENT_CHART "file_segment"
 #define NETDATA_EBPF_SYNC_SUBMENU "synchronization (eBPF)"
 
 #define NETDATA_EBPF_SYNC_SLEEP_MS 800000ULL
 
 // configuration file
 #define NETDATA_SYNC_CONFIG_FILE "sync.conf"
+#define NETDATA_SYNC_CONFIG_NAME "syscalls"
+
+enum sync_syscalls_index {
+    NETDATA_SYNC_SYNC_IDX,
+    NETDATA_SYNC_SYNCFS_IDX,
+    NETDATA_SYNC_MSYNC_IDX,
+    NETDATA_SYNC_FSYNC_IDX,
+    NETDATA_SYNC_FDATASYNC_IDX,
+    NETDATA_SYNC_SYNC_FILE_RANGE_IDX,
+
+    NETDATA_SYNC_IDX_END
+};
+
+typedef struct ebpf_sync_syscalls {
+    char *syscall;
+    int enabled;
+    uint32_t flags;
+
+    struct bpf_object *objects;
+    struct bpf_link **probe_links;
+
+    ebpf_data_t kernel_info;
+} ebpf_sync_syscalls_t;
 
 enum netdata_sync_charts {
     NETDATA_SYNC_CALL,

+ 3 - 3
packaging/ebpf.checksums

@@ -1,3 +1,3 @@
-fd93c6cda92c1eda6bedb384dd79095bb1ba7e87493153ec455efa9cb6365b48  netdata-kernel-collector-glibc-v0.6.0.tar.xz
-a19e776eb714f2ed08eb4e43bfd9798a06fad8f1b15421e18b22a36583f41a6f  netdata-kernel-collector-musl-v0.6.0.tar.xz
-582b2c5dec077266c5f993f3a735cc46f09448cd6b24e6397daafde97616b113  netdata-kernel-collector-static-v0.6.0.tar.xz
+380e31fe143e7b53bcebaaf03a04d143ae82e13318b264461ebb5d3ac9026ae5  netdata-kernel-collector-glibc-v0.6.1.tar.xz
+5a196ab8a00d307a4f6a5c213178bd62e5720173f433afc6e77dfa911fb6ca56  netdata-kernel-collector-musl-v0.6.1.tar.xz
+683e6676c1eee0cd4a7da5be953e94052e780de1ca375146a488d62593220c46  netdata-kernel-collector-static-v0.6.1.tar.xz

+ 1 - 1
packaging/ebpf.version

@@ -1 +1 @@
-v0.6.0
+v0.6.1

+ 13 - 1
web/gui/dashboard_info.js

@@ -1035,7 +1035,19 @@ netdataDashboard.context = {
     },
 
     'mem.sync': {
-        info: 'System calls for <code>sync()</code> which flushes file system buffers to storage devices. These calls can cause performance perturbations, and it can be useful to know if they are happening and how frequently. Based on the eBPF <a href="https://github.com/iovisor/bcc/blob/master/tools/syncsnoop.py" target="_blank">syncsnoop</a> from BCC tools.'
+        info: 'System calls for <a href="https://man7.org/linux/man-pages/man2/sync.2.html" target="_blank">sync() and syncfs()</a> which flush the file system buffers to storage devices. Performance perturbations might be caused by these calls. The <code>sync()</code> calls are based on the eBPF <a href="https://github.com/iovisor/bcc/blob/master/tools/syncsnoop.py" target="_blank">syncsnoop</a> from BCC tools.'
+    },
+
+    'mem.file_sync': {
+        info: 'System calls for <a href="https://man7.org/linux/man-pages/man2/fsync.2.html" target="_blank">fsync() and fdatasync()</a> transfer all modified page caches for the files on disk devices. These calls block until the device reports that the transfer has been completed.'
+    },
+
+    'mem.memory_map': {
+        info: 'System calls for <a href="https://man7.org/linux/man-pages/man2/msync.2.html" target="_blank">msync()</a> which flushes changes made to the in-core copy of a file that was mapped.'
+    },
+
+    'mem.file_segment': {
+        info: 'System calls for <a href="https://man7.org/linux/man-pages/man2/sync_file_range.2.html" target="_blank">sync_file_range()</a> permits fine control when synchronizing the open file referred to by the file descriptor fd with disk. This system call is extremely dangerous and should not be used in portable programs.'
     },
 
     // ------------------------------------------------------------------------