Browse Source

introduce new chart for process states metrics (#12305)

* 12139: introduce new chart for process states metrics

This commit introduces new chart for total number of processes
in different states i.e running, sleeping, sleeping_d, zombie
and stopped.

* fix recursive chart generation issue

* fix recursive chart addition

* fixing comments

* Update web/gui/dashboard_info.js

Co-authored-by: Ilya Mashchenko <ilya@netdata.cloud>

* Update collectors/apps.plugin/apps_plugin.c

Co-authored-by: Ilya Mashchenko <ilya@netdata.cloud>

* fixing commenets

* Apply suggestions from code review

* Update collectors/apps.plugin/apps_plugin.c

* Update collectors/apps.plugin/apps_plugin.c

Co-authored-by: Timotej S. <6674623+underhood@users.noreply.github.com>

* Apply suggestions from code review

Co-authored-by: Tina Luedtke <kickoke@users.noreply.github.com>

Co-authored-by: Ilya Mashchenko <ilya@netdata.cloud>
Co-authored-by: Timotej S. <6674623+underhood@users.noreply.github.com>
Co-authored-by: Tina Luedtke <kickoke@users.noreply.github.com>
Suraj Neupane 3 years ago
parent
commit
64375154fe
3 changed files with 89 additions and 9 deletions
  1. 1 0
      collectors/all.h
  2. 75 9
      collectors/apps.plugin/apps_plugin.c
  3. 13 0
      web/gui/dashboard_info.js

+ 1 - 0
collectors/all.h

@@ -30,6 +30,7 @@
 #define NETDATA_CHART_PRIO_SYSTEM_IP                    501
 #define NETDATA_CHART_PRIO_SYSTEM_IPV6                  502
 #define NETDATA_CHART_PRIO_SYSTEM_PROCESSES             600
+#define NETDATA_CHART_PRIO_SYSTEM_PROCESS_STATES        601
 #define NETDATA_CHART_PRIO_SYSTEM_FORKS                 700
 #define NETDATA_CHART_PRIO_SYSTEM_ACTIVE_PROCESSES      750
 #define NETDATA_CHART_PRIO_SYSTEM_CTXT                  800

+ 75 - 9
collectors/apps.plugin/apps_plugin.c

@@ -6,6 +6,7 @@
  * Released under GPL v3+
  */
 
+#include "collectors/all.h"
 #include "libnetdata/libnetdata.h"
 #include "libnetdata/required_dummies.h"
 
@@ -107,6 +108,25 @@ static int
 static char *user_config_dir = CONFIG_DIR;
 static char *stock_config_dir = LIBCONFIG_DIR;
 
+// some variables for keeping track of processes count by states
+typedef enum {
+    PROC_STATUS_RUNNING = 0,
+    PROC_STATUS_SLEEPING_D, // uninterruptible sleep
+    PROC_STATUS_SLEEPING,   // interruptible sleep
+    PROC_STATUS_ZOMBIE,
+    PROC_STATUS_STOPPED,
+    PROC_STATUS_END, //place holder for ending enum fields
+} proc_state;
+
+static proc_state proc_state_count[PROC_STATUS_END];
+static const char *proc_states[] = {
+    [PROC_STATUS_RUNNING] = "running",
+    [PROC_STATUS_SLEEPING] = "sleeping_interruptible",
+    [PROC_STATUS_SLEEPING_D] = "sleeping_uninterruptible",
+    [PROC_STATUS_ZOMBIE] = "zombie",
+    [PROC_STATUS_STOPPED] = "stopped",
+    };
+
 // ----------------------------------------------------------------------------
 // internal flags
 // handled in code (automatically set)
@@ -286,7 +306,7 @@ struct pid_stat {
 
     uint32_t log_thrown;
 
-    // char state;
+    char state;
     int32_t ppid;
     // int32_t pgrp;
     // int32_t session;
@@ -1234,6 +1254,28 @@ void arl_callback_status_rssshmem(const char *name, uint32_t hash, const char *v
 }
 #endif // !__FreeBSD__
 
+static void update_proc_state_count(char proc_state) {
+    switch (proc_state) {
+        case 'S':
+            proc_state_count[PROC_STATUS_SLEEPING] += 1;
+            break;
+        case 'R':
+            proc_state_count[PROC_STATUS_RUNNING] += 1;
+            break;
+        case 'D':
+            proc_state_count[PROC_STATUS_SLEEPING_D] += 1;
+            break;
+        case 'Z':
+            proc_state_count[PROC_STATUS_ZOMBIE] += 1;
+            break;
+        case 'T':
+            proc_state_count[PROC_STATUS_STOPPED] += 1;
+            break;
+        default:
+            break;
+    }
+}
+
 static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) {
     p->status_vmsize           = 0;
     p->status_vmrss            = 0;
@@ -1268,6 +1310,7 @@ static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) {
         arl_expect_custom(p->status_arl, "VmSwap", arl_callback_status_vmswap, &arl_ptr);
     }
 
+
     if(unlikely(!p->status_filename)) {
         char filename[FILENAME_MAX + 1];
         snprintfz(filename, FILENAME_MAX, "%s/proc/%d/status", netdata_configured_host_prefix, p->pid);
@@ -1313,7 +1356,6 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) {
 
 #ifdef __FreeBSD__
     struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr;
-
     if (unlikely(proc_info->ki_tdflags & TDF_IDLETD))
         goto cleanup;
 #else
@@ -1348,7 +1390,7 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) {
 #else
     // p->pid           = str2pid_t(procfile_lineword(ff, 0, 0));
     char *comm          = procfile_lineword(ff, 0, 1);
-    // p->state         = *(procfile_lineword(ff, 0, 2));
+    p->state            = *(procfile_lineword(ff, 0, 2));
     p->ppid             = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3));
     // p->pgrp          = (int32_t)str2pid_t(procfile_lineword(ff, 0, 4));
     // p->session       = (int32_t)str2pid_t(procfile_lineword(ff, 0, 5));
@@ -1356,7 +1398,6 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) {
     // p->tpgid         = (int32_t)str2pid_t(procfile_lineword(ff, 0, 7));
     // p->flags         = str2uint64_t(procfile_lineword(ff, 0, 8));
 #endif
-
     if(strcmp(p->comm, comm) != 0) {
         if(unlikely(debug_enabled)) {
             if(p->comm[0])
@@ -1454,7 +1495,7 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) {
         p->cstime           = 0;
         p->cgtime           = 0;
     }
-
+    update_proc_state_count(p->state);
     return 1;
 
 cleanup:
@@ -2534,6 +2575,8 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) {
 static int collect_data_for_all_processes(void) {
     struct pid_stat *p = NULL;
 
+    // clear process state counter
+    memset(proc_state_count, 0, sizeof proc_state_count);
 #ifdef __FreeBSD__
     int i, procnum;
 
@@ -2608,8 +2651,9 @@ static int collect_data_for_all_processes(void) {
             // we forward read all running processes
             // collect_data_for_pid() is smart enough,
             // not to read the same pid twice per iteration
-            for(slc = 0; slc < all_pids_count; slc++)
+            for(slc = 0; slc < all_pids_count; slc++) {
                 collect_data_for_pid(all_pids_sortlist[slc], NULL);
+            }
         }
 #endif
     }
@@ -2666,7 +2710,6 @@ static int collect_data_for_all_processes(void) {
     // we do this by collecting the ownership of process
     // if we manage to get the ownership, the process still runs
     process_exited_processes();
-
     return 1;
 }
 
@@ -3640,7 +3683,7 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type
                 debug_log_int("%s just added - regenerating charts.", w->name);
         }
     }
- 
+
     // nothing more to show
     if(!newly_added && show_guest_time == show_guest_time_old) return;
 
@@ -3806,6 +3849,29 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type
     }
 }
 
+static void send_proc_states_count(usec_t dt)
+{
+    static bool chart_added = false;
+    // create chart for count of processes in different states
+    if (!chart_added) {
+        fprintf(
+                stdout,
+                "CHART system.processes_state '' 'System Processes State' 'processes' processes system.processes_state line %d %d\n",
+                NETDATA_CHART_PRIO_SYSTEM_PROCESS_STATES,
+                update_every);
+        for (proc_state i = PROC_STATUS_RUNNING; i < PROC_STATUS_END; i++) {
+          fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", proc_states[i]);
+        }
+        chart_added = true;
+    }
+
+    // send process state count
+    send_BEGIN("system", "processes_state", dt);
+    for (proc_state i = PROC_STATUS_RUNNING; i < PROC_STATUS_END; i++) {
+        send_SET(proc_states[i], proc_state_count[i]);
+    }
+    send_END();
+}
 
 // ----------------------------------------------------------------------------
 // parse command line arguments
@@ -4181,10 +4247,10 @@ int main(int argc, char **argv) {
         normalize_utilization(apps_groups_root_target);
 
         send_resource_usage_to_netdata(dt);
+        send_proc_states_count(dt);
 
         // this is smart enough to show only newly added apps, when needed
         send_charts_updates_to_netdata(apps_groups_root_target, "apps", "Apps");
-
         if(likely(enable_users_charts))
             send_charts_updates_to_netdata(users_root_target, "users", "Users");
 

+ 13 - 0
web/gui/dashboard_info.js

@@ -1218,6 +1218,19 @@ netdataDashboard.context = {
         '<b>Blocked</b> - currently blocked, waiting for I/O to complete.</p>'
     },
 
+    'system.processes_state': {
+        info: '<p>The number of processes in different states. </p> '+
+        '<p><b>Running</b> - Process using the CPU at a particular moment. '+
+        '<b>Sleeping (uninterruptible)</b> - Process will wake when a waited-upon resource becomes available or after a time-out occurs during that wait. '+
+        'Mostly used by device drivers waiting for disk or network I/O. '+
+        '<b>Sleeping (interruptible)</b> - Process is waiting either for a particular time slot or for a particular event to occur. '+
+        '<b>Zombie</b> - Process that has completed its execution, released the system resources, but its entry is not removed from the process table. '+
+        'Usually occurs in child processes when the parent process still needs to read its child’s exit status. '+
+        'A process that stays a zombie for a long time is generally an error and causes system PID space leak. '+
+        '<b>Stopped</b> - Process is suspended from proceeding further due to STOP or TSTP signals. ' +
+        'In this state, a process will not do anything (not even terminate) until it receives a CONT signal.</p>'
+    },
+
     'system.active_processes': {
         info: 'The total number of processes in the system.'
     },