Browse Source

Easily disable alarms, by persisting the silencers configuration (#6360)

This PR was created to fix #3414, here I am completing the job initiated by Christopher, among the newest features that we are bring we have

JSON inside the core - We are bringing to the core the capacity to work with JSON files, this is available either using the JSON-C library case it is present in the system or using JSMN library that was incorporated to our core. The preference is to have JSON-C, because it is a more complete library, but case the user does not have the library installed we are keeping the JSMN for we do not lose the feature.
Health LIST - We are bringing more one command to the Health API, now with the LIST it is possible to get in JSON format the alarms active with Netdata.
Health reorganized - Previously we had duplicated code in different files, this PR is fixing this (Thanks @cakrit !), the Health is now better organized.
Removing memory leak - The first implementation of the json.c was creating SILENCERS without to link it in anywhere. Now it has been linked properly.
Script updated - We are bringing some changes to the script that tests the Health.
This PR also fixes the race condition created by the previous new position of the SILENCERS creation, I had to move it to daemon/main.c, because after various tests, it was confirmed that the error could happen in different parts of the code, case it was not initialized before the threads starts.

Component Name
health directory
health-cmd

Additional Information
Fixes #6356 and #3414
thiagoftsm 5 years ago
parent
commit
c56e086ba3
10 changed files with 338 additions and 47 deletions
  1. 15 0
      CMakeLists.txt
  2. 7 0
      Makefile.am
  3. 38 0
      configure.ac
  4. 11 0
      daemon/main.c
  5. 84 6
      health/health.c
  6. 2 40
      health/health.h
  7. 1 1
      health/health_config.c
  8. 2 0
      libnetdata/Makefile.am
  9. 8 0
      libnetdata/health/Makefile.am
  10. 170 0
      libnetdata/health/health.c

+ 15 - 0
CMakeLists.txt

@@ -130,6 +130,15 @@ set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${OPENSSL_CFLAGS_OTHER})
 set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${OPENSSL_LIBRARIES})
 set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${OPENSSL_INCLUDE_DIRS})
 
+# -----------------------------------------------------------------------------
+# JSON-C used to health
+
+pkg_check_modules(JSON REQUIRED json-c)
+set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${JSONC_CFLAGS_OTHER})
+set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${JSON_LIBRARIES})
+set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${JSON_INCLUDE_DIRS})
+
+
 # -----------------------------------------------------------------------------
 # Detect libcap
 
@@ -305,6 +314,12 @@ set(LIBNETDATA_FILES
         libnetdata/threads/threads.h
         libnetdata/url/url.c
         libnetdata/url/url.h
+        libnetdata/json/json.c
+        libnetdata/json/json.h
+        libnetdata/json/jsmn.c
+        libnetdata/json/jsmn.h
+        libnetdata/health/health.c
+        libnetdata/health/health.h
         libnetdata/socket/security.c
         libnetdata/socket/security.h)
 

+ 7 - 0
Makefile.am

@@ -161,6 +161,12 @@ LIBNETDATA_FILES = \
     libnetdata/threads/threads.h \
 	libnetdata/url/url.c \
 	libnetdata/url/url.h \
+	libnetdata/json/json.c \
+	libnetdata/json/json.h \
+	libnetdata/json/jsmn.c \
+	libnetdata/json/jsmn.h \
+	libnetdata/health/health.c \
+	libnetdata/health/health.h \
     $(NULL)
 
 APPS_PLUGIN_FILES = \
@@ -512,6 +518,7 @@ NETDATA_COMMON_LIBS = \
 	$(OPTIONAL_LZ4_LIBS) \
 	$(OPTIONAL_JUDY_LIBS) \
 	$(OPTIONAL_SSL_LIBS) \
+	$(OPTIONAL_JSONC_LIBS) \
 	$(NULL)
 
 sbin_PROGRAMS += netdata

+ 38 - 0
configure.ac

@@ -148,6 +148,12 @@ AC_ARG_ENABLE(
     ,
     [enable_dbengine="detect"]
 )
+AC_ARG_ENABLE(
+    [jsonc],
+    [AS_HELP_STRING([--enable-jsonc], [Enable JSON-C support @<:@default autodetect@:>@])],
+    ,
+    [enable_jsonc="detect"]
+)
 
 # -----------------------------------------------------------------------------
 # netdata required checks
@@ -347,6 +353,20 @@ AC_CHECK_LIB(
 OPTIONAL_SSL_CFLAGS="${SSL_CFLAGS}"
 OPTIONAL_SSL_LIBS="${SSL_LIBS}"
 
+# -----------------------------------------------------------------------------
+# JSON-C library
+
+PKG_CHECK_MODULES([JSON],[json-c],AC_CHECK_LIB(
+    [json-c],
+    [json_object_get_type],
+    [JSONC_LIBS="-ljson-c"]),AC_CHECK_LIB(
+        [json],
+        [json_object_get_type],
+        [JSONC_LIBS="-ljson"])
+    )
+
+OPTIONAL_JSONC_LIBS="${JSONC_LIBS}"
+
 # -----------------------------------------------------------------------------
 # DB engine and HTTPS
 test "${enable_dbengine}" = "yes" -a -z "${UV_LIBS}" && \
@@ -381,6 +401,21 @@ fi
 AC_MSG_RESULT([${enable_https}])
 AM_CONDITIONAL([ENABLE_HTTPS], [test "${enable_https}" = "yes"])
 
+# -----------------------------------------------------------------------------
+# JSON-C
+test "${enable_jsonc}" = "yes" -a -z "${JSONC_LIBS}" && \
+    AC_MSG_ERROR([JSON-C required but not found. Try installing 'libjson-c-dev' or 'json-c'.])
+
+AC_MSG_CHECKING([if json-c should be used])
+if test "${enable_jsonc}" != "no" -a "${JSONC_LIBS}"; then
+	enable_jsonc="yes"
+	AC_DEFINE([ENABLE_JSONC], [1], [netdata json-c usability])
+else
+	enable_jsonc="no"
+fi
+AC_MSG_RESULT([${enable_jsonc}])
+AM_CONDITIONAL([ENABLE_JSONC], [test "${enable_jsonc}" = "yes"])
+
 # -----------------------------------------------------------------------------
 # compiler options
 
@@ -975,6 +1010,7 @@ AC_SUBST([OPTIONAL_UV_LIBS])
 AC_SUBST([OPTIONAL_LZ4_LIBS])
 AC_SUBST([OPTIONAL_JUDY_LIBS])
 AC_SUBST([OPTIONAL_SSL_LIBS])
+AC_SUBST([OPTIONAL_JSONC_LIBS])
 AC_SUBST([OPTIONAL_NFACCT_CFLAGS])
 AC_SUBST([OPTIONAL_NFACCT_LIBS])
 AC_SUBST([OPTIONAL_ZLIB_CFLAGS])
@@ -1051,6 +1087,8 @@ AC_CONFIG_FILES([
     libnetdata/storage_number/Makefile
     libnetdata/threads/Makefile
     libnetdata/url/Makefile
+    libnetdata/json/Makefile
+    libnetdata/health/Makefile
     registry/Makefile
     streaming/Makefile
     system/Makefile

+ 11 - 0
daemon/main.c

@@ -774,6 +774,12 @@ void send_statistics( const char *action, const char *action_result, const char
     freez(command_to_run);
 }
 
+void set_silencers_filename() {
+    char filename[FILENAME_MAX + 1];
+    snprintfz(filename, FILENAME_MAX, "%s/health.silencers.json", netdata_configured_varlib_dir);
+    silencers_filename = config_get(CONFIG_SECTION_HEALTH, "silencers file", filename);
+}
+
 int main(int argc, char **argv) {
     int i;
     int config_loaded = 0;
@@ -1101,6 +1107,11 @@ int main(int argc, char **argv) {
         security_init();
 #endif
 
+        // --------------------------------------------------------------------
+        // This is the safest place to start the SILENCERS structure
+        set_silencers_filename();
+        health_initialize_global_silencers();
+
         // --------------------------------------------------------------------
         // setup process signals
 

+ 84 - 6
health/health.c

@@ -13,18 +13,69 @@ unsigned int default_health_enabled = 1;
 // ----------------------------------------------------------------------------
 // health initialization
 
+/**
+ * User Config directory
+ *
+ * Get the config directory for health and return it.
+ *
+ * @return a pointer to the user config directory
+ */
 inline char *health_user_config_dir(void) {
     char buffer[FILENAME_MAX + 1];
     snprintfz(buffer, FILENAME_MAX, "%s/health.d", netdata_configured_user_config_dir);
     return config_get(CONFIG_SECTION_HEALTH, "health configuration directory", buffer);
 }
 
+/**
+ * Stock Config Directory
+ *
+ * Get the Stock config directory and return it.
+ *
+ * @return a pointer to the stock config directory.
+ */
 inline char *health_stock_config_dir(void) {
     char buffer[FILENAME_MAX + 1];
     snprintfz(buffer, FILENAME_MAX, "%s/health.d", netdata_configured_stock_config_dir);
     return config_get(CONFIG_SECTION_HEALTH, "stock health configuration directory", buffer);
 }
 
+/**
+ * Silencers init
+ *
+ * Function used to initialize the silencer structure.
+ */
+void health_silencers_init(void) {
+    struct stat statbuf;
+    if (!stat(silencers_filename,&statbuf)) {
+        off_t length = statbuf.st_size;
+        if (length && length < HEALTH_SILENCERS_MAX_FILE_LEN) {
+            FILE *fd = fopen(silencers_filename, "r");
+            if (fd) {
+                char *str = mallocz((length+1)* sizeof(char));
+                if(str) {
+                    fread(str, sizeof(char), length, fd);
+                    str[length] = 0x00;
+                    json_parse(str, NULL, health_silencers_json_read_callback);
+                    freez(str);
+                    info("Parsed health silencers file %s", silencers_filename);
+                }
+                fclose(fd);
+            } else {
+                error("Cannot open the file %s",silencers_filename);
+            }
+        } else {
+            error("Health silencers file %s has the size %ld that is out of range[ 1 , %d ]. Aborting read.", silencers_filename, length, HEALTH_SILENCERS_MAX_FILE_LEN);
+        }
+    } else {
+        error("Cannot open the file %s",silencers_filename);
+    }
+}
+
+/**
+ * Health Init
+ *
+ * Initialize the health thread.
+ */
 void health_init(void) {
     debug(D_HEALTH, "Health configuration initializing");
 
@@ -32,11 +83,20 @@ void health_init(void) {
         debug(D_HEALTH, "Health is disabled.");
         return;
     }
+
+    health_silencers_init();
 }
 
 // ----------------------------------------------------------------------------
 // re-load health configuration
 
+/**
+ * Reload host
+ *
+ * Reload configuration for a specific host.
+ *
+ * @param host the structure of the host that the function will reload the configuration.
+ */
 void health_reload_host(RRDHOST *host) {
     if(unlikely(!host->health_enabled))
         return;
@@ -84,6 +144,11 @@ void health_reload_host(RRDHOST *host) {
     rrdhost_unlock(host);
 }
 
+/**
+ * Reload
+ *
+ * Reload the host configuration for all hosts.
+ */
 void health_reload(void) {
 
     rrd_rdlock();
@@ -430,6 +495,16 @@ SILENCE_TYPE check_silenced(RRDCALC *rc, char* host, SILENCERS *silencers) {
     return STYPE_NONE;
 }
 
+/**
+ * Update Disabled Silenced
+ *
+ * Update the variable rrdcalc_flags of the structure RRDCALC according with the values of the host structure
+ *
+ * @param host structure that contains information about the host monitored.
+ * @param rc structure with information about the alarm
+ *
+ * @return It returns 1 case rrdcalc_flags is DISABLED or 0 otherwise
+ */
 int update_disabled_silenced(RRDHOST *host, RRDCALC *rc) {
 	uint32_t rrdcalc_flags_old = rc->rrdcalc_flags;
 	// Clear the flags
@@ -459,6 +534,15 @@ int update_disabled_silenced(RRDHOST *host, RRDCALC *rc) {
 		return 0;
 }
 
+/**
+ * Health Main
+ *
+ * The main thread of the health system. In this function all the alarms will be processed.
+ *
+ * @param ptr is a pointer to the netdata_static_thread structure.
+ *
+ * @return It always returns NULL
+ */
 void *health_main(void *ptr) {
     netdata_thread_cleanup_push(health_main_cleanup, ptr);
 
@@ -469,12 +553,6 @@ void *health_main(void *ptr) {
     time_t hibernation_delay  = config_get_number(CONFIG_SECTION_HEALTH, "postpone alarms during hibernation for seconds", 60);
 
     unsigned int loop = 0;
-
-    silencers =  mallocz(sizeof(SILENCERS));
-    silencers->all_alarms=0;
-    silencers->stype=STYPE_NONE;
-    silencers->silencers=NULL;
-
     while(!netdata_exit) {
 		loop++;
 		debug(D_HEALTH, "Health monitoring iteration no %u started", loop);

+ 2 - 40
health/health.h

@@ -35,16 +35,7 @@ extern unsigned int default_health_enabled;
 #define HEALTH_LISTEN_BACKLOG 4096
 #endif
 
-#define HEALTH_ALARM_KEY "alarm"
-#define HEALTH_TEMPLATE_KEY "template"
 #define HEALTH_ON_KEY "on"
-#define HEALTH_CONTEXT_KEY "context"
-#define HEALTH_CHART_KEY "chart"
-#define HEALTH_HOST_KEY "hosts"
-#define HEALTH_OS_KEY "os"
-#define HEALTH_FAMILIES_KEY "families"
-#define HEALTH_LOOKUP_KEY "lookup"
-#define HEALTH_CALC_KEY "calc"
 #define HEALTH_EVERY_KEY "every"
 #define HEALTH_GREEN_KEY "green"
 #define HEALTH_RED_KEY "red"
@@ -57,38 +48,9 @@ extern unsigned int default_health_enabled;
 #define HEALTH_DELAY_KEY "delay"
 #define HEALTH_OPTIONS_KEY "options"
 
-typedef struct silencer {
-    char *alarms;
-    SIMPLE_PATTERN *alarms_pattern;
+#define HEALTH_SILENCERS_MAX_FILE_LEN 10000
 
-    char *hosts;
-    SIMPLE_PATTERN *hosts_pattern;
-
-    char *contexts;
-    SIMPLE_PATTERN *contexts_pattern;
-
-    char *charts;
-    SIMPLE_PATTERN *charts_pattern;
-
-    char *families;
-    SIMPLE_PATTERN *families_pattern;
-
-    struct silencer *next;
-} SILENCER;
-
-typedef enum silence_type {
-    STYPE_NONE,
-    STYPE_DISABLE_ALARMS,
-    STYPE_SILENCE_NOTIFICATIONS
-} SILENCE_TYPE;
-
-typedef struct silencers {
-    int all_alarms;
-    SILENCE_TYPE stype;
-    SILENCER *silencers;
-} SILENCERS;
-
-SILENCERS *silencers;
+char *silencers_filename;
 
 extern void health_init(void);
 extern void *health_main(void *ptr);

+ 1 - 1
health/health_config.c

@@ -490,7 +490,7 @@ static int health_readfile(const char *filename, void *data) {
             if(append < HEALTH_CONF_MAX_LINE)
                 continue;
             else {
-                error("Health configuration has too long muli-line at line %zu of file '%s'.", line, filename);
+                error("Health configuration has too long multi-line at line %zu of file '%s'.", line, filename);
             }
         }
         append = 0;

+ 2 - 0
libnetdata/Makefile.am

@@ -11,6 +11,8 @@ SUBDIRS = \
     config \
     dictionary \
     eval \
+    json \
+    health \
     locks \
     log \
     popen \

+ 8 - 0
libnetdata/health/Makefile.am

@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+AUTOMAKE_OPTIONS = subdir-objects
+MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
+
+
+dist_noinst_DATA = \
+	$(NULL)

+ 170 - 0
libnetdata/health/health.c

@@ -0,0 +1,170 @@
+#include "health.h"
+
+/**
+ * Create Silencer
+ *
+ * Allocate a new silencer to Netdata.
+ *
+ * @return It returns the address off the silencer on success and NULL otherwise
+ */
+SILENCER *create_silencer(void) {
+    SILENCER *t = callocz(1, sizeof(SILENCER));
+    debug(D_HEALTH, "HEALTH command API: Created empty silencer");
+
+    return t;
+}
+
+/**
+ * Health Silencers add
+ *
+ * Add more one silencer to the list of silenecers.
+ *
+ * @param silencer
+ */
+void health_silencers_add(SILENCER *silencer) {
+    // Add the created instance to the linked list in silencers
+    silencer->next = silencers->silencers;
+    silencers->silencers = silencer;
+    debug(D_HEALTH, "HEALTH command API: Added silencer %s:%s:%s:%s:%s", silencer->alarms,
+          silencer->charts, silencer->contexts, silencer->hosts, silencer->families
+    );
+}
+
+/**
+ * Silencers Add Parameter
+ *
+ * Create a new silencer and adjust the variables
+ *
+ * @param silencer a pointer to the silencer that will be adjusted
+ * @param key the key value sent by client
+ * @param value the value sent to the key
+ *
+ * @return It returns the silencer configured on success and NULL otherwise
+ */
+SILENCER *health_silencers_addparam(SILENCER *silencer, char *key, char *value) {
+    static uint32_t
+            hash_alarm = 0,
+            hash_template = 0,
+            hash_chart = 0,
+            hash_context = 0,
+            hash_host = 0,
+            hash_families = 0;
+
+    if (unlikely(!hash_alarm)) {
+        hash_alarm = simple_uhash(HEALTH_ALARM_KEY);
+        hash_template = simple_uhash(HEALTH_TEMPLATE_KEY);
+        hash_chart = simple_uhash(HEALTH_CHART_KEY);
+        hash_context = simple_uhash(HEALTH_CONTEXT_KEY);
+        hash_host = simple_uhash(HEALTH_HOST_KEY);
+        hash_families = simple_uhash(HEALTH_FAMILIES_KEY);
+    }
+
+    uint32_t hash = simple_uhash(key);
+    if (unlikely(silencer == NULL)) {
+        if (
+                (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) ||
+                (hash == hash_template && !strcasecmp(key, HEALTH_TEMPLATE_KEY)) ||
+                (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) ||
+                (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) ||
+                (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) ||
+                (hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY))
+                ) {
+            silencer = create_silencer();
+            if(!silencer) {
+                error("Cannot add a new silencer to Netdata");
+                return NULL;
+            }
+        }
+    }
+
+    if (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) {
+        silencer->alarms = strdupz(value);
+        silencer->alarms_pattern = simple_pattern_create(silencer->alarms, NULL, SIMPLE_PATTERN_EXACT);
+    } else if (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) {
+        silencer->charts = strdupz(value);
+        silencer->charts_pattern = simple_pattern_create(silencer->charts, NULL, SIMPLE_PATTERN_EXACT);
+    } else if (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) {
+        silencer->contexts = strdupz(value);
+        silencer->contexts_pattern = simple_pattern_create(silencer->contexts, NULL, SIMPLE_PATTERN_EXACT);
+    } else if (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) {
+        silencer->hosts = strdupz(value);
+        silencer->hosts_pattern = simple_pattern_create(silencer->hosts, NULL, SIMPLE_PATTERN_EXACT);
+    } else if (hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY)) {
+        silencer->families = strdupz(value);
+        silencer->families_pattern = simple_pattern_create(silencer->families, NULL, SIMPLE_PATTERN_EXACT);
+    }
+
+    return silencer;
+}
+
+/**
+ * JSON Read Callback
+ *
+ * Callback called by netdata to create the silencer.
+ *
+ * @param e the main json structure
+ *
+ * @return It always return 0.
+ */
+int health_silencers_json_read_callback(JSON_ENTRY *e)
+{
+    switch(e->type) {
+        case JSON_OBJECT:
+#ifndef ENABLE_JSONC
+            e->callback_function = health_silencers_json_read_callback;
+            if(e->name && strcmp(e->name,"")) {
+                // init silencer
+                debug(D_HEALTH, "JSON: Got object with a name, initializing new silencer for %s",e->name);
+#endif
+            e->callback_data = create_silencer();
+            if(e->callback_data) {
+                health_silencers_add(e->callback_data);
+            }
+#ifndef ENABLE_JSONC
+            }
+#endif
+            break;
+
+        case JSON_ARRAY:
+            e->callback_function = health_silencers_json_read_callback;
+            break;
+
+        case JSON_STRING:
+            if(!strcmp(e->name,"type")) {
+                debug(D_HEALTH, "JSON: Processing type=%s",e->data.string);
+                if (!strcmp(e->data.string,"SILENCE")) silencers->stype = STYPE_SILENCE_NOTIFICATIONS;
+                else if (!strcmp(e->data.string,"DISABLE")) silencers->stype = STYPE_DISABLE_ALARMS;
+            } else {
+                debug(D_HEALTH, "JSON: Adding %s=%s", e->name, e->data.string);
+                health_silencers_addparam(e->callback_data, e->name, e->data.string);
+            }
+            break;
+
+        case JSON_BOOLEAN:
+            debug(D_HEALTH, "JSON: Processing all_alarms");
+            silencers->all_alarms=e->data.boolean?1:0;
+            break;
+
+        case JSON_NUMBER:
+        case JSON_NULL:
+            break;
+    }
+
+    return 0;
+}
+
+/**
+ * Initialize Global Silencers
+ *
+ * Initialize the silencer  for the whole netdata system.
+ *
+ * @return It returns 0 on success and -1 otherwise
+ */
+int health_initialize_global_silencers() {
+    silencers =  mallocz(sizeof(SILENCERS));
+    silencers->all_alarms=0;
+    silencers->stype=STYPE_NONE;
+    silencers->silencers=NULL;
+
+    return 0;
+}

Some files were not shown because too many files changed in this diff