Просмотр исходного кода

Generate, store and transmit a unique alert event_hash_id (#15111)

* generate and store an event_hash_id

* transmit to cloud

* transmit to the cloud
Emmanuel Vasilakis 1 год назад
Родитель
Сommit
81174475a3

+ 1 - 1
aclk/aclk-schemas

@@ -1 +1 @@
-Subproject commit d3a5c636b6dacf364834f2ba99ce0170c71ef861
+Subproject commit a9fac9a0e4ebfa021d6f900403626213d28d6852

+ 4 - 0
aclk/schema-wrappers/alarm_stream.cc

@@ -86,6 +86,7 @@ void destroy_alarm_log_entry(struct alarm_log_entry *entry)
 
     freez(entry->rendered_info);
     freez(entry->chart_context);
+    freez(entry->transition_id);
 }
 
 static void fill_alarm_log_entry(struct alarm_log_entry *data, AlarmLogEntry *proto)
@@ -134,6 +135,9 @@ static void fill_alarm_log_entry(struct alarm_log_entry *data, AlarmLogEntry *pr
     proto->set_rendered_info(data->rendered_info);
 
     proto->set_chart_context(data->chart_context);
+
+    proto->set_event_id(data->event_id);
+    proto->set_transition_id(data->transition_id);
 }
 
 char *generate_alarm_log_entry(size_t *len, struct alarm_log_entry *data)

+ 3 - 0
aclk/schema-wrappers/alarm_stream.h

@@ -73,6 +73,9 @@ struct alarm_log_entry {
     char *rendered_info;
 
     char *chart_context;
+
+    uint64_t event_id;
+    char *transition_id;  
 };
 
 struct send_alarm_checkpoint {

+ 1 - 0
database/rrd.h

@@ -977,6 +977,7 @@ struct alarm_entry {
     uint32_t alarm_id;
     uint32_t alarm_event_id;
     uuid_t config_hash_id;
+    uuid_t transition_id;
 
     time_t when;
     time_t duration;

+ 13 - 1
database/sqlite/sqlite_aclk_alert.c

@@ -285,7 +285,7 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
     buffer_sprintf(sql, "select aa.sequence_id, hl.unique_id, hl.alarm_id, hl.config_hash_id, hl.updated_by_id, hl.when_key, " \
         " hl.duration, hl.non_clear_duration, hl.flags, hl.exec_run_timestamp, hl.delay_up_to_timestamp, hl.name,  " \
         " hl.chart, hl.family, hl.exec, hl.recipient, hl.source, hl.units, hl.info, hl.exec_code, hl.new_status,  " \
-        " hl.old_status, hl.delay, hl.new_value, hl.old_value, hl.last_repeat, hl.chart_context  " \
+        " hl.old_status, hl.delay, hl.new_value, hl.old_value, hl.last_repeat, hl.chart_context, hl.transition_id, hl.alarm_event_id  " \
         " from health_log_%s hl, aclk_alert_%s aa " \
         " where hl.unique_id = aa.alert_unique_id and aa.date_submitted is null " \
         " order by aa.sequence_id asc limit %d;", wc->uuid_str, wc->uuid_str, limit);
@@ -395,6 +395,13 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
                                       strdupz((char *)"") :
                                       strdupz((char *)sqlite3_column_text(res, 26));
 
+        uuid_unparse_lower(*((uuid_t *) sqlite3_column_blob(res, 27)), uuid_str);
+        alarm_log.transition_id = sqlite3_column_type(res, 27) == SQLITE_NULL ?
+                                      strdupz((char *)"") :
+                                      strdupz((char *)uuid_str);
+
+        alarm_log.event_id = (time_t) sqlite3_column_int64(res, 28);
+
         aclk_send_alarm_log_entry(&alarm_log);
 
         if (first_sequence_id == 0)
@@ -749,6 +756,8 @@ void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_EN
     char *edit_command = ae->source ? health_edit_command_from_source(ae_source(ae)) : strdupz("UNKNOWN=0=UNKNOWN");
     char config_hash_id[UUID_STR_LEN];
     uuid_unparse_lower(ae->config_hash_id, config_hash_id);
+    char transition_id[UUID_STR_LEN];
+    uuid_unparse_lower(ae->transition_id, transition_id);
 
     alarm_log->chart = strdupz(ae_chart_name(ae));
     alarm_log->name = strdupz(ae_name(ae));
@@ -790,6 +799,9 @@ void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_EN
     alarm_log->rendered_info = strdupz(ae_info(ae));
     alarm_log->chart_context = strdupz(ae_chart_context(ae));
 
+    alarm_log->transition_id = strdupz((char *)transition_id);
+    alarm_log->event_id = (uint64_t) ae->alarm_event_id;
+
     freez(edit_command);
 }
 #endif

+ 33 - 0
database/sqlite/sqlite_db_migration.c

@@ -182,6 +182,38 @@ static int do_migration_v6_v7(sqlite3 *database, const char *name)
     return 0;
 }
 
+static int do_migration_v7_v8(sqlite3 *database, const char *name)
+{
+    UNUSED(name);
+    info("Running database migration %s", name);
+
+    char sql[256];
+
+    int rc;
+    sqlite3_stmt *res = NULL;
+    snprintfz(sql, 255, "SELECT name FROM sqlite_schema WHERE type ='table' AND name LIKE 'health_log_%%';");
+    rc = sqlite3_prepare_v2(database, sql, -1, &res, 0);
+    if (rc != SQLITE_OK) {
+        error_report("Failed to prepare statement to alter health_log tables");
+        return 1;
+    }
+
+    while (sqlite3_step_monitored(res) == SQLITE_ROW) {
+         char *table = strdupz((char *) sqlite3_column_text(res, 0));
+         if (!column_exists_in_table(table, "transition_id")) {
+             snprintfz(sql, 255, "ALTER TABLE %s ADD transition_id blob", table);
+             sqlite3_exec_monitored(database, sql, 0, 0, NULL);
+         }
+         freez(table);
+    }
+
+    rc = sqlite3_finalize(res);
+    if (unlikely(rc != SQLITE_OK))
+        error_report("Failed to finalize statement when altering health_log tables, rc = %d", rc);
+
+    return 0;
+}
+
 
 static int do_migration_noop(sqlite3 *database, const char *name)
 {
@@ -233,6 +265,7 @@ DATABASE_FUNC_MIGRATION_LIST migration_action[] = {
     {.name = "v4 to v5",  .func = do_migration_v4_v5},
     {.name = "v5 to v6",  .func = do_migration_v5_v6},
     {.name = "v6 to v7",  .func = do_migration_v6_v7},
+    {.name = "v7 to v8",  .func = do_migration_v7_v8},
     // the terminator of this array
     {.name = NULL, .func = NULL}
 };

+ 1 - 1
database/sqlite/sqlite_functions.c

@@ -3,7 +3,7 @@
 #include "sqlite_functions.h"
 #include "sqlite_db_migration.h"
 
-#define DB_METADATA_VERSION 7
+#define DB_METADATA_VERSION 8
 
 const char *database_config[] = {
     "CREATE TABLE IF NOT EXISTS host(host_id BLOB PRIMARY KEY, hostname TEXT NOT NULL, "

+ 32 - 8
database/sqlite/sqlite_health.c

@@ -3,6 +3,7 @@
 #include "sqlite_health.h"
 #include "sqlite_functions.h"
 #include "sqlite_db_migration.h"
+#include "uuid.h"
 
 #define MAX_HEALTH_SQL_SIZE 2048
 #define sqlite3_bind_string_or_null(res,key,param) ((key) ? sqlite3_bind_text(res, param, string2str(key), -1, SQLITE_STATIC) : sqlite3_bind_null(res, param))
@@ -10,7 +11,7 @@
 /* Health related SQL queries
    Creates a health log table in sqlite, one per host guid
 */
-#define SQL_CREATE_HEALTH_LOG_TABLE(guid) "CREATE TABLE IF NOT EXISTS health_log_%s(hostname text, unique_id int, alarm_id int, alarm_event_id int, config_hash_id blob, updated_by_id int, updates_id int, when_key int, duration int, non_clear_duration int, flags int, exec_run_timestamp int, delay_up_to_timestamp int, name text, chart text, family text, exec text, recipient text, source text, units text, info text, exec_code int, new_status real, old_status real, delay int, new_value double, old_value double, last_repeat int, class text, component text, type text, chart_context text);", guid
+#define SQL_CREATE_HEALTH_LOG_TABLE(guid) "CREATE TABLE IF NOT EXISTS health_log_%s(hostname text, unique_id int, alarm_id int, alarm_event_id int, config_hash_id blob, updated_by_id int, updates_id int, when_key int, duration int, non_clear_duration int, flags int, exec_run_timestamp int, delay_up_to_timestamp int, name text, chart text, family text, exec text, recipient text, source text, units text, info text, exec_code int, new_status real, old_status real, delay int, new_value double, old_value double, last_repeat int, class text, component text, type text, chart_context text, transition_id blob);", guid
 int sql_create_health_log_table(RRDHOST *host) {
     int rc;
     char command[MAX_HEALTH_SQL_SIZE + 1];
@@ -116,7 +117,7 @@ failed:
     "config_hash_id, updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, " \
     "exec_run_timestamp, delay_up_to_timestamp, name, chart, family, exec, recipient, source, " \
     "units, info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, " \
-    "class, component, type, chart_context) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);", guid
+    "class, component, type, chart_context, transition_id) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);", guid
 
 void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) {
     sqlite3_stmt *res = NULL;
@@ -336,6 +337,12 @@ void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) {
         goto failed;
     }
 
+    rc = sqlite3_bind_blob(res, 33, &ae->transition_id, sizeof(ae->transition_id), SQLITE_STATIC);
+    if (unlikely(rc != SQLITE_OK)) {
+        error_report("Failed to bind transition_id parameter for SQL_INSERT_HEALTH_LOG");
+        goto failed;
+    }
+
     rc = execute_insert(res);
     if (unlikely(rc != SQLITE_DONE)) {
         error_report("HEALTH [%s]: Failed to execute SQL_INSERT_HEALTH_LOG, rc = %d", rrdhost_hostname(host), rc);
@@ -510,10 +517,10 @@ void sql_health_alarm_log_cleanup(RRDHOST *host) {
 }
 
 #define SQL_INJECT_REMOVED(guid, guid2) "insert into health_log_%s (hostname, unique_id, alarm_id, alarm_event_id, config_hash_id, updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, " \
-"delay_up_to_timestamp, name, chart, family, exec, recipient, source, units, info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, class, component, type, chart_context) " \
+"delay_up_to_timestamp, name, chart, family, exec, recipient, source, units, info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, class, component, type, chart_context, transition_id) " \
 "select hostname, ?1, ?2, ?3, config_hash_id, 0, ?4, unixepoch(), 0, 0, flags, exec_run_timestamp, " \
-"unixepoch(), name, chart, family, exec, recipient, source, units, info, exec_code, -2, new_status, delay, NULL, new_value, 0, class, component, type, chart_context " \
-"from health_log_%s where unique_id = ?5", guid, guid2
+"unixepoch(), name, chart, family, exec, recipient, source, units, info, exec_code, -2, new_status, delay, NULL, new_value, 0, class, component, type, chart_context, ?5 " \
+"from health_log_%s where unique_id = ?6", guid, guid2
 #define SQL_INJECT_REMOVED_UPDATE(guid) "update health_log_%s set flags = flags | ?1, updated_by_id = ?2 where unique_id = ?3; ", guid
 void sql_inject_removed_status(char *uuid_str, uint32_t alarm_id, uint32_t alarm_event_id, uint32_t unique_id, uint32_t max_unique_id)
 {
@@ -556,7 +563,15 @@ void sql_inject_removed_status(char *uuid_str, uint32_t alarm_id, uint32_t alarm
         goto failed;
     }
 
-    rc = sqlite3_bind_int64(res, 5, (sqlite3_int64) unique_id);
+    uuid_t transition_id;
+    uuid_generate_random(transition_id);
+    rc = sqlite3_bind_blob(res, 5, &transition_id, sizeof(transition_id), SQLITE_STATIC);
+    if (unlikely(rc != SQLITE_OK)) {
+        error_report("Failed to bind config_hash_id parameter for SQL_INSERT_HEALTH_LOG");
+        goto failed;
+    }
+
+    rc = sqlite3_bind_int64(res, 6, (sqlite3_int64) unique_id);
     if (unlikely(rc != SQLITE_OK)) {
         error_report("Failed to bind unique_id parameter for SQL_INJECT_REMOVED");
         goto failed;
@@ -674,7 +689,7 @@ void sql_check_removed_alerts_state(char *uuid_str)
 /* Health related SQL queries
    Load from the health log table
 */
-#define SQL_LOAD_HEALTH_LOG(guid) "SELECT hostname, unique_id, alarm_id, alarm_event_id, config_hash_id, updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, delay_up_to_timestamp, name, chart, family, exec, recipient, source, units, info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, class, component, type, chart_context FROM health_log_%s group by alarm_id having max(alarm_event_id);", guid
+#define SQL_LOAD_HEALTH_LOG(guid) "SELECT hostname, unique_id, alarm_id, alarm_event_id, config_hash_id, updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, delay_up_to_timestamp, name, chart, family, exec, recipient, source, units, info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, class, component, type, chart_context, transition_id FROM health_log_%s group by alarm_id having max(alarm_event_id);", guid
 void sql_health_alarm_log_load(RRDHOST *host) {
     sqlite3_stmt *res = NULL;
     int ret;
@@ -844,6 +859,9 @@ void sql_health_alarm_log_load(RRDHOST *host) {
         else
             ae->chart_context = NULL;
 
+         if (sqlite3_column_type(res, 32) != SQLITE_NULL)
+            uuid_copy(ae->transition_id, *((uuid_t *) sqlite3_column_blob(res, 32)));
+
         char value_string[100 + 1];
         string_freez(ae->old_value_string);
         string_freez(ae->new_value_string);
@@ -1190,7 +1208,7 @@ int sql_health_get_last_executed_event(RRDHOST *host, ALARM_ENTRY *ae, RRDCALC_S
      return ret;
 }
 
-#define SQL_SELECT_HEALTH_LOG(guid) "SELECT hostname, unique_id, alarm_id, alarm_event_id, config_hash_id, updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, delay_up_to_timestamp, name, chart, family, exec, recipient, source, units, info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, class, component, type, chart_context FROM health_log_%s WHERE 1=1 ", guid
+#define SQL_SELECT_HEALTH_LOG(guid) "SELECT hostname, unique_id, alarm_id, alarm_event_id, config_hash_id, updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, delay_up_to_timestamp, name, chart, family, exec, recipient, source, units, info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, class, component, type, chart_context, transition_id FROM health_log_%s WHERE 1=1 ", guid
 void sql_health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after, char *chart) {
 
     buffer_strcat(wb, "[");
@@ -1240,6 +1258,10 @@ void sql_health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after, char *
         char config_hash_id[UUID_STR_LEN];
         uuid_unparse_lower(*((uuid_t *) sqlite3_column_blob(res, 4)), config_hash_id);
 
+        char transition_id[UUID_STR_LEN] = {0};
+        if (sqlite3_column_type(res, 32) != SQLITE_NULL)
+            uuid_unparse_lower(*((uuid_t *) sqlite3_column_blob(res, 32)), transition_id);
+
         char *edit_command = health_edit_command_from_source((char *)sqlite3_column_text(res, 18));
 
         if (count)
@@ -1257,6 +1279,7 @@ void sql_health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after, char *
             "\t\t\"alarm_id\": %u,\n"
             "\t\t\"alarm_event_id\": %u,\n"
             "\t\t\"config_hash_id\": \"%s\",\n"
+            "\t\t\"transition_id\": \"%s\",\n"
             "\t\t\"name\": \"%s\",\n"
             "\t\t\"chart\": \"%s\",\n"
             "\t\t\"context\": \"%s\",\n"
@@ -1294,6 +1317,7 @@ void sql_health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after, char *
             (unsigned int) sqlite3_column_int64(res, 2),
             (unsigned int) sqlite3_column_int64(res, 3),
             config_hash_id,
+            transition_id,
             sqlite3_column_text(res, 13),
             sqlite3_column_text(res, 14),
             sqlite3_column_text(res, 31),

+ 2 - 0
health/health_log.c

@@ -46,6 +46,8 @@ inline ALARM_ENTRY* health_create_alarm_entry(
 
     uuid_copy(ae->config_hash_id, *((uuid_t *) config_hash_id));
 
+    uuid_generate_random(ae->transition_id);
+
     ae->family = string_dup(family);
     ae->classification = string_dup(class);
     ae->component = string_dup(component);