Просмотр исходного кода

add randomness per thread to heartbeat (#18929)

* add randomness per thread to heartbeat

* tune randomness

* use the thread tag name in the randomness hash

* use os_gettid()

* fix xenstat

* fix all occurences of heartbeat_init and heartbeat_next

* initialize clocks with a constructor; on windows enable high resolution timer globally

* better distribution of heartbeats; prevent heartbeats to be aligned with system HZ

* move randomness away from scheduler ticks

* make sure randomness is not the same across processes

* make randomness spread even across processes
Costa Tsaousis 4 месяцев назад
Родитель
Сommit
4b25987ed4

+ 0 - 1
src/cli/cli.c

@@ -155,7 +155,6 @@ static void connect_cb(uv_connect_t* req, int status)
 
 int main(int argc, char **argv)
 {
-    clocks_init();
     nd_log_initialize_for_external_plugins("netdatacli");
 
     int ret, i;

+ 2 - 5
src/collectors/apps.plugin/apps_plugin.c

@@ -665,7 +665,6 @@ netdata_mutex_t apps_and_stdout_mutex = NETDATA_MUTEX_INITIALIZER;
 static bool apps_plugin_exit = false;
 
 int main(int argc, char **argv) {
-    clocks_init();
     nd_log_initialize_for_external_plugins("apps.plugin");
 
     pagesize = (size_t)sysconf(_SC_PAGESIZE);
@@ -709,7 +708,6 @@ int main(int argc, char **argv) {
 #endif /* NETDATA_INTERNAL_CHECKS */
 
     procfile_set_adaptive_allocation(true, 0, 0, 0);
-    os_get_system_HZ();
     os_get_system_cpus_uncached();
     apps_managers_and_aggregators_init(); // before parsing args!
     parse_args(argc, argv);
@@ -763,10 +761,9 @@ int main(int argc, char **argv) {
     netdata_mutex_lock(&apps_and_stdout_mutex);
     APPS_PLUGIN_GLOBAL_FUNCTIONS();
 
-    usec_t step = update_every * USEC_PER_SEC;
     global_iterations_counter = 1;
     heartbeat_t hb;
-    heartbeat_init(&hb);
+    heartbeat_init(&hb, update_every * USEC_PER_SEC);
     for(; !apps_plugin_exit ; global_iterations_counter++) {
         netdata_mutex_unlock(&apps_and_stdout_mutex);
 
@@ -778,7 +775,7 @@ int main(int argc, char **argv) {
             dt = update_every * USEC_PER_SEC;
         }
         else
-            dt = heartbeat_next(&hb, step);
+            dt = heartbeat_next(&hb);
 
         netdata_mutex_lock(&apps_and_stdout_mutex);
 

+ 0 - 2
src/collectors/cgroups.plugin/cgroup-network.c

@@ -717,8 +717,6 @@ void usage(void) {
 int main(int argc, const char **argv) {
     pid_t pid = 0;
 
-    clocks_init();
-
     if (setresuid(0, 0, 0) == -1)
         collector_error("setresuid(0, 0, 0) failed.");
 

+ 2 - 3
src/collectors/cgroups.plugin/sys_fs_cgroup.c

@@ -1417,14 +1417,13 @@ void *cgroups_main(void *ptr) {
                             cgroup_function_systemd_top);
 
     heartbeat_t hb;
-    heartbeat_init(&hb);
-    usec_t step = cgroup_update_every * USEC_PER_SEC;
+    heartbeat_init(&hb, cgroup_update_every * USEC_PER_SEC);
     usec_t find_every = cgroup_check_for_new_every * USEC_PER_SEC, find_dt = 0;
 
     while(service_running(SERVICE_COLLECTORS)) {
         worker_is_idle();
 
-        usec_t hb_dt = heartbeat_next(&hb, step);
+        usec_t hb_dt = heartbeat_next(&hb);
 
         if (unlikely(!service_running(SERVICE_COLLECTORS)))
             break;

+ 2 - 4
src/collectors/cups.plugin/cups_plugin.c

@@ -226,7 +226,6 @@ void reset_metrics() {
 }
 
 int main(int argc, char **argv) {
-    clocks_init();
     nd_log_initialize_for_external_plugins("cups.plugin");
 
     parse_command_line(argc, argv);
@@ -243,12 +242,11 @@ int main(int argc, char **argv) {
 
     time_t started_t = now_monotonic_sec();
     size_t iteration = 0;
-    usec_t step = netdata_update_every * USEC_PER_SEC;
 
     heartbeat_t hb;
-    heartbeat_init(&hb);
+    heartbeat_init(&hb, netdata_update_every * USEC_PER_SEC);
     for (iteration = 0; 1; iteration++) {
-        heartbeat_next(&hb, step);
+        heartbeat_next(&hb);
 
         if (unlikely(netdata_exit))
             break;

+ 2 - 4
src/collectors/debugfs.plugin/debugfs_plugin.c

@@ -159,7 +159,6 @@ static void debugfs_parse_args(int argc, char **argv)
 
 int main(int argc, char **argv)
 {
-    clocks_init();
     nd_log_initialize_for_external_plugins("debugfs.plugin");
 
     netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX");
@@ -214,12 +213,11 @@ int main(int argc, char **argv)
     debugfs_parse_args(argc, argv);
 
     size_t iteration;
-    usec_t step = update_every * USEC_PER_SEC;
     heartbeat_t hb;
-    heartbeat_init(&hb);
+    heartbeat_init(&hb, update_every * USEC_PER_SEC);
 
     for (iteration = 0; iteration < 86400; iteration++) {
-        heartbeat_next(&hb, step);
+        heartbeat_next(&hb);
         int enabled = 0;
 
         for (int i = 0; debugfs_modules[i].name; i++) {

+ 4 - 5
src/collectors/diskspace.plugin/plugin_diskspace.c

@@ -544,11 +544,11 @@ void *diskspace_slow_worker(void *ptr)
     usec_t step = slow_update_every * USEC_PER_SEC;
     usec_t real_step = USEC_PER_SEC;
     heartbeat_t hb;
-    heartbeat_init(&hb);
+    heartbeat_init(&hb, USEC_PER_SEC);
 
     while(service_running(SERVICE_COLLECTORS)) {
         worker_is_idle();
-        heartbeat_next(&hb, USEC_PER_SEC);
+        heartbeat_next(&hb);
 
         if (real_step < step) {
             real_step += USEC_PER_SEC;
@@ -876,12 +876,11 @@ void *diskspace_main(void *ptr) {
         diskspace_slow_worker,
         &slow_worker_data);
 
-    usec_t step = update_every * USEC_PER_SEC;
     heartbeat_t hb;
-    heartbeat_init(&hb);
+    heartbeat_init(&hb, update_every * USEC_PER_SEC);
     while(service_running(SERVICE_COLLECTORS)) {
         worker_is_idle();
-        /* usec_t hb_dt = */ heartbeat_next(&hb, step);
+        /* usec_t hb_dt = */ heartbeat_next(&hb);
 
         if(unlikely(!service_running(SERVICE_COLLECTORS))) break;
 

+ 2 - 4
src/collectors/ebpf.plugin/ebpf.c

@@ -4005,7 +4005,6 @@ static void ebpf_manage_pid(pid_t pid)
  */
 int main(int argc, char **argv)
 {
-    clocks_init();
     nd_log_initialize_for_external_plugins(NETDATA_EBPF_PLUGIN_NAME);
 
     ebpf_set_global_variables();
@@ -4076,15 +4075,14 @@ int main(int argc, char **argv)
         }
     }
 
-    usec_t step = USEC_PER_SEC;
     heartbeat_t hb;
-    heartbeat_init(&hb);
+    heartbeat_init(&hb, USEC_PER_SEC);
     int update_apps_every = (int) EBPF_CFG_UPDATE_APPS_EVERY_DEFAULT;
     int update_apps_list = update_apps_every - 1;
     int process_maps_per_core = ebpf_modules[EBPF_MODULE_PROCESS_IDX].maps_per_core;
     //Plugin will be killed when it receives a signal
     for ( ; !ebpf_plugin_stop(); global_iterations_counter++) {
-        (void)heartbeat_next(&hb, step);
+        (void)heartbeat_next(&hb);
 
         if (global_iterations_counter % EBPF_DEFAULT_UPDATE_EVERY == 0) {
             pthread_mutex_lock(&lock);

+ 5 - 7
src/collectors/ebpf.plugin/ebpf_cachestat.c

@@ -837,9 +837,6 @@ void ebpf_resume_apps_data()
  */
 void *ebpf_read_cachestat_thread(void *ptr)
 {
-    heartbeat_t hb;
-    heartbeat_init(&hb);
-
     ebpf_module_t *em = (ebpf_module_t *)ptr;
 
     int maps_per_core = em->maps_per_core;
@@ -849,10 +846,11 @@ void *ebpf_read_cachestat_thread(void *ptr)
 
     uint32_t lifetime = em->lifetime;
     uint32_t running_time = 0;
-    usec_t period = update_every * USEC_PER_SEC;
     pids_fd[EBPF_PIDS_CACHESTAT_IDX] = cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd;
+    heartbeat_t hb;
+    heartbeat_init(&hb, update_every * USEC_PER_SEC);
     while (!ebpf_plugin_stop() && running_time < lifetime) {
-        (void)heartbeat_next(&hb, period);
+        (void)heartbeat_next(&hb);
         if (ebpf_plugin_stop() || ++counter != update_every)
             continue;
 
@@ -1401,7 +1399,7 @@ static void cachestat_collector(ebpf_module_t *em)
     int update_every = em->update_every;
     int maps_per_core = em->maps_per_core;
     heartbeat_t hb;
-    heartbeat_init(&hb);
+    heartbeat_init(&hb, USEC_PER_SEC);
     int counter = update_every - 1;
     //This will be cancelled by its parent
     uint32_t running_time = 0;
@@ -1409,7 +1407,7 @@ static void cachestat_collector(ebpf_module_t *em)
     netdata_idx_t *stats = em->hash_table_stats;
     memset(stats, 0, sizeof(em->hash_table_stats));
     while (!ebpf_plugin_stop() && running_time < lifetime) {
-        (void)heartbeat_next(&hb, USEC_PER_SEC);
+        (void)heartbeat_next(&hb);
 
         if (ebpf_plugin_stop() || ++counter != update_every)
             continue;

+ 2 - 3
src/collectors/ebpf.plugin/ebpf_cgroup.c

@@ -373,13 +373,12 @@ void ebpf_create_charts_on_systemd(ebpf_systemd_args_t *chart)
  */
 void *ebpf_cgroup_integration(void *ptr __maybe_unused)
 {
-    usec_t step = USEC_PER_SEC;
     int counter = NETDATA_EBPF_CGROUP_UPDATE - 1;
     heartbeat_t hb;
-    heartbeat_init(&hb);
+    heartbeat_init(&hb, USEC_PER_SEC);
     //Plugin will be killed when it receives a signal
     while (!ebpf_plugin_stop()) {
-        (void)heartbeat_next(&hb, step);
+        heartbeat_next(&hb);
 
         // We are using a small heartbeat time to wake up thread,
         // but we should not update so frequently the shared memory data

Некоторые файлы не были показаны из-за большого количества измененных файлов