Browse Source

Fixed duplicate alarm ids in health-log.db (#9428)

Fixed duplicate alarm ids in health-log.db
Stelios Fragkakis 4 years ago
parent
commit
adc37259c2
3 changed files with 53 additions and 31 deletions
  1. 3 0
      database/rrdcalc.c
  2. 30 30
      database/rrdhost.c
  3. 20 1
      health/health_log.c

+ 3 - 0
database/rrdcalc.c

@@ -252,6 +252,9 @@ inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const ch
         }
     }
 
+    if (unlikely(!host->health_log.next_alarm_id))
+        host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
+
     return host->health_log.next_alarm_id++;
 }
 

+ 30 - 30
database/rrdhost.c

@@ -199,8 +199,8 @@ RRDHOST *rrdhost_create(const char *hostname,
     host->health_log.next_log_id = 1;
     host->health_log.next_alarm_id = 1;
     host->health_log.max = 1000;
-    host->health_log.next_log_id =
-    host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
+    host->health_log.next_log_id = (uint32_t)now_realtime_sec();
+    host->health_log.next_alarm_id = 0;
 
     long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max);
     if(n < 10) {
@@ -243,6 +243,34 @@ RRDHOST *rrdhost_create(const char *hostname,
        }
 
     }
+
+    if(host->health_enabled) {
+        snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
+        int r = mkdir(filename, 0775);
+        if(r != 0 && errno != EEXIST)
+            error("Host '%s': cannot create directory '%s'", host->hostname, filename);
+    }
+
+    snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
+    host->health_log_filename = strdupz(filename);
+
+    snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_primary_plugins_dir);
+    host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
+    host->health_default_recipient = strdupz("root");
+
+
+    // ------------------------------------------------------------------------
+    // load health configuration
+
+    if(host->health_enabled) {
+        rrdhost_wrlock(host);
+        health_readdir(host, health_user_config_dir(), health_stock_config_dir(), NULL);
+        rrdhost_unlock(host);
+
+        health_alarm_log_load(host);
+        health_alarm_log_open(host);
+    }
+
     if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
 #ifdef ENABLE_DBENGINE
         if (unlikely(-1 == uuid_parse(host->machine_guid, host->host_uuid))) {
@@ -273,34 +301,6 @@ RRDHOST *rrdhost_create(const char *hostname,
 #endif
     }
 
-    if(host->health_enabled) {
-        snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
-        int r = mkdir(filename, 0775);
-        if(r != 0 && errno != EEXIST)
-            error("Host '%s': cannot create directory '%s'", host->hostname, filename);
-    }
-
-    snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
-    host->health_log_filename = strdupz(filename);
-
-    snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_primary_plugins_dir);
-    host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
-    host->health_default_recipient = strdupz("root");
-
-
-    // ------------------------------------------------------------------------
-    // load health configuration
-
-    if(host->health_enabled) {
-        rrdhost_wrlock(host);
-        health_readdir(host, health_user_config_dir(), health_stock_config_dir(), NULL);
-        rrdhost_unlock(host);
-
-        health_alarm_log_load(host);
-        health_alarm_log_open(host);
-    }
-
-
     // ------------------------------------------------------------------------
     // link it and add it to the index
 

+ 20 - 1
health/health_log.c

@@ -158,6 +158,22 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
 #endif
 }
 
+uint32_t is_valid_alarm_id(RRDHOST *host, const char *chart, const char *name, uint32_t alarm_id)
+{
+    uint32_t hash_chart = simple_hash(chart);
+    uint32_t hash_name = simple_hash(name);
+
+    ALARM_ENTRY *ae;
+    for(ae = host->health_log.alarms; ae ;ae = ae->next) {
+        if (unlikely(
+                ae->alarm_id == alarm_id && (!(ae->hash_name == hash_name && ae->hash_chart == hash_chart &&
+                                               !strcmp(name, ae->name) && !strcmp(chart, ae->chart))))) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
 inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filename) {
     errno = 0;
 
@@ -286,6 +302,8 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena
             //    error("HEALTH [%s]: line %zu of file '%s' provides an alarm for host '%s' but this is named '%s'.", host->hostname, line, filename, pointers[1], host->hostname);
 
             ae->unique_id               = unique_id;
+            if (!is_valid_alarm_id(host, pointers[14], pointers[13], alarm_id))
+                alarm_id = rrdcalc_get_unique_id(host, pointers[14], pointers[13], NULL);
             ae->alarm_id                = alarm_id;
             ae->alarm_event_id          = (uint32_t)strtoul(pointers[4], NULL, 16);
             ae->updated_by_id           = (uint32_t)strtoul(pointers[5], NULL, 16);
@@ -376,7 +394,8 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena
     if(!host->health_max_alarm_id)  host->health_max_alarm_id  = (uint32_t)now_realtime_sec();
 
     host->health_log.next_log_id = host->health_max_unique_id + 1;
-    host->health_log.next_alarm_id = host->health_max_alarm_id + 1;
+    if (unlikely(!host->health_log.next_alarm_id || host->health_log.next_alarm_id <= host->health_max_alarm_id))
+        host->health_log.next_alarm_id = host->health_max_alarm_id + 1;
 
     debug(D_HEALTH, "HEALTH [%s]: loaded file '%s' with %zd new alarm entries, updated %zd alarms, errors %zd entries, duplicate %zd", host->hostname, filename, loaded, updated, errored, duplicate);
     return loaded;