Browse Source

Store and transmit chart_name to cloud in alert events (#15441)

Emmanuel Vasilakis 1 year ago
parent
commit
5607d21c02

+ 1 - 1
aclk/aclk-schemas

@@ -1 +1 @@
-Subproject commit 2aba113db56ac32deccc1e83784c4b3b6fcfe1cd
+Subproject commit 40703f5e0258b3f4a97d3767e0f4e6833801b7d1

+ 2 - 4
aclk/schema-wrappers/alarm_stream.cc

@@ -87,6 +87,7 @@ void destroy_alarm_log_entry(struct alarm_log_entry *entry)
     freez(entry->rendered_info);
     freez(entry->chart_context);
     freez(entry->transition_id);
+    freez(entry->chart_name);
 }
 
 static void fill_alarm_log_entry(struct alarm_log_entry *data, AlarmLogEntry *proto)
@@ -129,15 +130,12 @@ static void fill_alarm_log_entry(struct alarm_log_entry *data, AlarmLogEntry *pr
 
     proto->set_value(data->value);
     proto->set_old_value(data->old_value);
-
     proto->set_updated(data->updated);
-
     proto->set_rendered_info(data->rendered_info);
-
     proto->set_chart_context(data->chart_context);
-
     proto->set_event_id(data->event_id);
     proto->set_transition_id(data->transition_id);
+    proto->set_chart_name(data->chart_name);
 }
 
 char *generate_alarm_log_entry(size_t *len, struct alarm_log_entry *data)

+ 1 - 0
aclk/schema-wrappers/alarm_stream.h

@@ -73,6 +73,7 @@ struct alarm_log_entry {
     char *rendered_info;
 
     char *chart_context;
+    char *chart_name;
 
     uint64_t event_id;
     char *transition_id;  

+ 3 - 1
database/rrd.h

@@ -1045,6 +1045,7 @@ struct alarm_entry {
     STRING *name;
     STRING *chart;
     STRING *chart_context;
+    STRING *chart_name;
     STRING *family;
 
     STRING *classification;
@@ -1086,7 +1087,8 @@ struct alarm_entry {
 };
 
 #define ae_name(ae) string2str((ae)->name)
-#define ae_chart_name(ae) string2str((ae)->chart)
+#define ae_chart_id(ae) string2str((ae)->chart)
+#define ae_chart_name(ae) string2str((ae)->chart_name)
 #define ae_chart_context(ae) string2str((ae)->chart_context)
 #define ae_family(ae) string2str((ae)->family)
 #define ae_classification(ae) string2str((ae)->classification)

+ 2 - 0
database/rrdcalc.c

@@ -292,6 +292,7 @@ static void rrdcalc_link_to_rrdset(RRDSET *st, RRDCALC *rc) {
         rc->name,
         rc->rrdset->id,
         rc->rrdset->context,
+        rc->rrdset->name,
         rc->rrdset->family,
         rc->classification,
         rc->component,
@@ -336,6 +337,7 @@ static void rrdcalc_unlink_from_rrdset(RRDCALC *rc, bool having_ll_wrlock) {
             rc->name,
             rc->rrdset->id,
             rc->rrdset->context,
+            rc->rrdset->name,
             rc->rrdset->family,
             rc->classification,
             rc->component,

+ 4 - 5
database/sqlite/sqlite_aclk_alert.c

@@ -268,7 +268,7 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
     buffer_sprintf(sql, "select aa.sequence_id, hld.unique_id, hld.alarm_id, hl.config_hash_id, hld.updated_by_id, hld.when_key, " \
         " hld.duration, hld.non_clear_duration, hld.flags, hld.exec_run_timestamp, hld.delay_up_to_timestamp, hl.name,  " \
         " hl.chart, hl.family, hl.exec, hl.recipient, ha.source, hl.units, hld.info, hld.exec_code, hld.new_status,  " \
-        " hld.old_status, hld.delay, hld.new_value, hld.old_value, hld.last_repeat, hl.chart_context, hld.transition_id, hld.alarm_event_id  " \
+        " hld.old_status, hld.delay, hld.new_value, hld.old_value, hld.last_repeat, hl.chart_context, hld.transition_id, hld.alarm_event_id, hl.chart_name  " \
         " from health_log hl, aclk_alert_%s aa, alert_hash ha, health_log_detail hld " \
         " where hld.unique_id = aa.alert_unique_id and hl.config_hash_id = ha.hash_id and aa.date_submitted is null " \
         " and hl.host_id = @host_id and hl.health_log_id = hld.health_log_id " \
@@ -371,14 +371,12 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
 
         alarm_log.value = (NETDATA_DOUBLE) sqlite3_column_double(res, 23);
         alarm_log.old_value = (NETDATA_DOUBLE) sqlite3_column_double(res, 24);
-
         alarm_log.updated = (sqlite3_column_int64(res, 8) & HEALTH_ENTRY_FLAG_UPDATED) ? 1 : 0;
         alarm_log.rendered_info = sqlite3_text_strdupz_empty(res, 18);
-
         alarm_log.chart_context = sqlite3_text_strdupz_empty(res, 26);
         alarm_log.transition_id = sqlite3_uuid_unparse_strdupz(res, 27);
-
         alarm_log.event_id = (time_t) sqlite3_column_int64(res, 28);
+        alarm_log.chart_name = sqlite3_text_strdupz_empty(res, 29);
 
         aclk_send_alarm_log_entry(&alarm_log);
 
@@ -788,7 +786,7 @@ void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_EN
     char transition_id[UUID_STR_LEN];
     uuid_unparse_lower(ae->transition_id, transition_id);
 
-    alarm_log->chart = strdupz(ae_chart_name(ae));
+    alarm_log->chart = strdupz(ae_chart_id(ae));
     alarm_log->name = strdupz(ae_name(ae));
     alarm_log->family = strdupz(ae_family(ae));
 
@@ -827,6 +825,7 @@ void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_EN
     alarm_log->updated = (ae->flags & HEALTH_ENTRY_FLAG_UPDATED) ? 1 : 0;
     alarm_log->rendered_info = strdupz(ae_info(ae));
     alarm_log->chart_context = strdupz(ae_chart_context(ae));
+    alarm_log->chart_name = strdupz(ae_chart_name(ae));
 
     alarm_log->transition_id = strdupz((char *)transition_id);
     alarm_log->event_id = (uint64_t) ae->alarm_event_id;

+ 16 - 1
database/sqlite/sqlite_db_migration.c

@@ -83,6 +83,11 @@ const char *database_migrate_v9_v10[] = {
     NULL
 };
 
+const char *database_migrate_v10_v11[] = {
+    "ALTER TABLE health_log ADD chart_name TEXT;",
+    NULL
+};
+
 static int do_migration_v1_v2(sqlite3 *database, const char *name)
 {
     UNUSED(name);
@@ -293,7 +298,6 @@ static int do_migration_v8_v9(sqlite3 *database, const char *name)
 
 static int do_migration_v9_v10(sqlite3 *database, const char *name)
 {
-    UNUSED(name);
     netdata_log_info("Running \"%s\" database migration", name);
 
     if (table_exists_in_database("alert_hash") && !column_exists_in_table("alert_hash", "chart_labels"))
@@ -301,6 +305,16 @@ static int do_migration_v9_v10(sqlite3 *database, const char *name)
     return 0;
 }
 
+static int do_migration_v10_v11(sqlite3 *database, const char *name)
+{
+    netdata_log_info("Running \"%s\" database migration", name);
+
+    if (table_exists_in_database("health_log") && !column_exists_in_table("health_log", "chart_name"))
+        return init_database_batch(database, DB_CHECK_NONE, 0, &database_migrate_v10_v11[0]);
+
+    return 0;
+}
+
 static int do_migration_noop(sqlite3 *database, const char *name)
 {
     UNUSED(database);
@@ -354,6 +368,7 @@ DATABASE_FUNC_MIGRATION_LIST migration_action[] = {
     {.name = "v7 to v8",  .func = do_migration_v7_v8},
     {.name = "v8 to v9",  .func = do_migration_v8_v9},
     {.name = "v9 to v10",  .func = do_migration_v9_v10},
+    {.name = "v10 to v11",  .func = do_migration_v10_v11},
     // the terminator of this array
     {.name = NULL, .func = NULL}
 };

+ 2 - 2
database/sqlite/sqlite_functions.c

@@ -3,7 +3,7 @@
 #include "sqlite_functions.h"
 #include "sqlite_db_migration.h"
 
-#define DB_METADATA_VERSION 10
+#define DB_METADATA_VERSION 11
 
 const char *database_config[] = {
     "CREATE TABLE IF NOT EXISTS host(host_id BLOB PRIMARY KEY, hostname TEXT NOT NULL, "
@@ -47,7 +47,7 @@ const char *database_config[] = {
 
     "CREATE TABLE IF NOT EXISTS health_log (health_log_id INTEGER PRIMARY KEY, host_id blob, alarm_id int, "
     "config_hash_id blob, name text, chart text, family text, recipient text, units text, exec text, "
-    "chart_context text, last_transition_id blob, UNIQUE (host_id, alarm_id)) ;",
+    "chart_context text, last_transition_id blob, chart_name text, UNIQUE (host_id, alarm_id)) ;",
 
     "CREATE INDEX IF NOT EXISTS health_log_ind_1 ON health_log (host_id);",
 

+ 105 - 29
database/sqlite/sqlite_health.c

@@ -83,8 +83,8 @@ failed:
    Inserts an entry in the table
 */
 #define SQL_INSERT_HEALTH_LOG "INSERT INTO health_log (host_id, alarm_id, " \
-    "config_hash_id, name, chart, family, exec, recipient, units, chart_context, last_transition_id) " \
-    "VALUES (?,?,?,?,?,?,?,?,?,?,?) " \
+    "config_hash_id, name, chart, family, exec, recipient, units, chart_context, last_transition_id, chart_name) " \
+    "VALUES (?,?,?,?,?,?,?,?,?,?,?,?) " \
     "ON CONFLICT (host_id, alarm_id) DO UPDATE SET last_transition_id = excluded.last_transition_id RETURNING health_log_id; "
 
 #define SQL_INSERT_HEALTH_LOG_DETAIL "INSERT INTO health_log_detail (health_log_id, unique_id, alarm_id, alarm_event_id, " \
@@ -174,6 +174,12 @@ void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) {
         goto failed;
     }
 
+    rc = sqlite3_bind_string_or_null(res, ae->chart_name, 12);
+    if (unlikely(rc != SQLITE_OK)) {
+        error_report("Failed to bind chart_name parameter for SQL_INSERT_HEALTH_LOG");
+        goto failed;
+    }
+
     rc = sqlite3_step_monitored(res);
     if (likely(rc == SQLITE_ROW))
         health_log_id = (size_t) sqlite3_column_int64(res, 0);
@@ -766,7 +772,7 @@ void sql_check_removed_alerts_state(RRDHOST *host)
             "hld.updates_id, hld.when_key, hld.duration, hld.non_clear_duration, hld.flags, hld.exec_run_timestamp, " \
             "hld.delay_up_to_timestamp, hl.name, hl.chart, hl.family, hl.exec, hl.recipient, ah.source, hl.units, " \
             "hld.info, hld.exec_code, hld.new_status, hld.old_status, hld.delay, hld.new_value, hld.old_value, " \
-            "hld.last_repeat, ah.class, ah.component, ah.type, hl.chart_context, hld.transition_id, hld.global_id " \
+            "hld.last_repeat, ah.class, ah.component, ah.type, hl.chart_context, hld.transition_id, hld.global_id, hl.chart_name " \
             "FROM health_log hl, alert_hash ah, health_log_detail hld " \
             "WHERE hl.config_hash_id = ah.hash_id and hl.host_id = @host_id and hl.last_transition_id = hld.transition_id;"
 void sql_health_alarm_log_load(RRDHOST *host) {
@@ -945,6 +951,11 @@ void sql_health_alarm_log_load(RRDHOST *host) {
         if (sqlite3_column_type(res, 32) != SQLITE_NULL)
             ae->global_id = sqlite3_column_int64(res, 32);
 
+        if (sqlite3_column_type(res, 33) != SQLITE_NULL)
+            ae->chart_name = string_strdupz((char *) sqlite3_column_text(res, 33));
+        else
+            ae->chart_name = NULL;
+
         char value_string[100 + 1];
         string_freez(ae->old_value_string);
         string_freez(ae->new_value_string);
@@ -1767,7 +1778,7 @@ fail:
 #define SQL_POPULATE_TEMP_ALERT_TRANSITION_TABLE "INSERT INTO v_%p (host_id) VALUES (@host_id)"
 
 #define SQL_SEARCH_ALERT_TRANSITION_SELECT "SELECT " \
-    "h.host_id, h.alarm_id, h.config_hash_id, h.name, h.chart, h.family, h.recipient, h.units, h.exec, " \
+    "h.host_id, h.alarm_id, h.config_hash_id, h.name, h.chart, h.chart_name, h.family, h.recipient, h.units, h.exec, " \
     "h.chart_context,  d.when_key, d.duration, d.non_clear_duration, d.flags, d.delay_up_to_timestamp, " \
     "d.info, d.exec_code, d.new_status, d.old_status, d.delay, d.new_value, d.old_value, d.last_repeat, " \
     "d.transition_id, d.global_id, ah.class, ah.type, ah.component, d.exec_run_timestamp"
@@ -1913,31 +1924,31 @@ run_query:;
         atd.config_hash_id = (uuid_t *)sqlite3_column_blob(res, 2);
         atd.alert_name = (const char *) sqlite3_column_text(res, 3);
         atd.chart = (const char *) sqlite3_column_text(res, 4);
-        atd.chart_name = (const char *) sqlite3_column_text(res, 4); // FIXME don't copy the id, find the name
-        atd.family = (const char *) sqlite3_column_text(res, 5);
-        atd.recipient = (const char *) sqlite3_column_text(res, 6);
-        atd.units = (const char *) sqlite3_column_text(res, 7);
-        atd.exec = (const char *) sqlite3_column_text(res, 8);
-        atd.chart_context = (const char *) sqlite3_column_text(res, 9);
-        atd.when_key = sqlite3_column_int64(res, 10);
-        atd.duration = sqlite3_column_int64(res, 11);
-        atd.non_clear_duration = sqlite3_column_int64(res, 12);
-        atd.flags = sqlite3_column_int64(res, 13);
-        atd.delay_up_to_timestamp = sqlite3_column_int64(res, 14);
-        atd.info = (const char *) sqlite3_column_text(res, 15);
-        atd.exec_code = sqlite3_column_int(res, 16);
-        atd.new_status = sqlite3_column_int(res, 17);
-        atd.old_status = sqlite3_column_int(res, 18);
-        atd.delay = (int) sqlite3_column_int(res, 19);
-        atd.new_value = (NETDATA_DOUBLE) sqlite3_column_double(res, 20);
-        atd.old_value = (NETDATA_DOUBLE) sqlite3_column_double(res, 21);
-        atd.last_repeat = sqlite3_column_int64(res, 22);
-        atd.transition_id = (uuid_t *) sqlite3_column_blob(res, 23);
-        atd.global_id = sqlite3_column_int64(res, 24);
-        atd.classification = (const char *) sqlite3_column_text(res, 25);
-        atd.type = (const char *) sqlite3_column_text(res, 26);
-        atd.component = (const char *) sqlite3_column_text(res, 27);
-        atd.exec_run_timestamp = sqlite3_column_int64(res, 28);
+        atd.chart_name = (const char *) sqlite3_column_text(res, 5);
+        atd.family = (const char *) sqlite3_column_text(res, 6);
+        atd.recipient = (const char *) sqlite3_column_text(res, 7);
+        atd.units = (const char *) sqlite3_column_text(res, 8);
+        atd.exec = (const char *) sqlite3_column_text(res, 9);
+        atd.chart_context = (const char *) sqlite3_column_text(res, 10);
+        atd.when_key = sqlite3_column_int64(res, 11);
+        atd.duration = sqlite3_column_int64(res, 12);
+        atd.non_clear_duration = sqlite3_column_int64(res, 13);
+        atd.flags = sqlite3_column_int64(res, 14);
+        atd.delay_up_to_timestamp = sqlite3_column_int64(res, 15);
+        atd.info = (const char *) sqlite3_column_text(res, 16);
+        atd.exec_code = sqlite3_column_int(res, 17);
+        atd.new_status = sqlite3_column_int(res, 18);
+        atd.old_status = sqlite3_column_int(res, 19);
+        atd.delay = (int) sqlite3_column_int(res, 20);
+        atd.new_value = (NETDATA_DOUBLE) sqlite3_column_double(res, 21);
+        atd.old_value = (NETDATA_DOUBLE) sqlite3_column_double(res, 22);
+        atd.last_repeat = sqlite3_column_int64(res, 23);
+        atd.transition_id = (uuid_t *) sqlite3_column_blob(res, 24);
+        atd.global_id = sqlite3_column_int64(res, 25);
+        atd.classification = (const char *) sqlite3_column_text(res, 26);
+        atd.type = (const char *) sqlite3_column_text(res, 27);
+        atd.component = (const char *) sqlite3_column_text(res, 28);
+        atd.exec_run_timestamp = sqlite3_column_int64(res, 29);
 
         cb(&atd, data);
     }
@@ -2087,3 +2098,68 @@ fail_only_drop:
     return added;
 }
 
+#define SQL_FETCH_CHART_NAME "SELECT chart_name FROM health_log where host_id = @host_id LIMIT 1;"
+bool is_chart_name_populated(uuid_t  *host_uuid)
+{
+    sqlite3_stmt *res = NULL;
+    int rc;
+
+    bool status = true;
+
+    rc = sqlite3_prepare_v2(db_meta, SQL_FETCH_CHART_NAME, -1, &res, 0);
+    if (unlikely(rc != SQLITE_OK)) {
+        error_report("Failed to prepare statement to check health_log chart_name");
+        return true;
+    }
+
+    rc = sqlite3_bind_blob(res, 1, host_uuid, sizeof(*host_uuid), SQLITE_STATIC);
+    if (unlikely(rc != SQLITE_OK)) {
+        error_report("Failed to bind host_id for health_log chart_name check");
+        goto fail;
+    }
+
+    rc = sqlite3_step_monitored(res);
+    if (likely(rc == SQLITE_ROW))
+        status = sqlite3_column_type(res, 0) != SQLITE_NULL;
+fail:
+
+    rc = sqlite3_finalize(res);
+    if (unlikely(rc != SQLITE_OK))
+        error_report("Failed to finalize the prepared statement for health_log chart_name check");
+
+    return status;
+}
+
+#define SQL_POPULATE_CHART_NAME " UPDATE health_log SET chart_name = upd.chart_name FROM " \
+    "(SELECT c.type || '.' || IFNULL(c.name, c.id) AS chart_name, hl.host_id, hl.health_log_id FROM " \
+    "chart c, health_log hl WHERE (c.type || '.' || c.id) = hl.chart AND c.host_id = hl.host_id " \
+    "AND hl.host_id = @host_id) AS upd WHERE health_log.host_id = upd.host_id " \
+    "AND health_log.health_log_id = upd.health_log_id"
+
+void chart_name_populate(uuid_t *host_uuid)
+{
+    sqlite3_stmt *res = NULL;
+    int rc;
+
+    rc = sqlite3_prepare_v2(db_meta, SQL_POPULATE_CHART_NAME, -1, &res, 0);
+    if (unlikely(rc != SQLITE_OK)) {
+        error_report("Failed to prepare statement to update health_log chart_name");
+        return;
+    }
+
+    rc = sqlite3_bind_blob(res, 1, host_uuid, sizeof(*host_uuid), SQLITE_STATIC);
+    if (unlikely(rc != SQLITE_OK)) {
+        error_report("Failed to bind host_id for health_log chart_name update");
+        goto fail;
+    }
+
+    rc = execute_insert(res);
+    if (unlikely(rc != SQLITE_DONE))
+        error_report("Failed to update chart name in health_log, rc = %d", rc);
+
+fail:
+
+    rc = sqlite3_finalize(res);
+    if (unlikely(rc != SQLITE_OK))
+        error_report("Failed to finalize the prepared statement for health_log chart_name update");
+}

+ 2 - 0
database/sqlite/sqlite_health.h

@@ -37,4 +37,6 @@ int sql_get_alert_configuration(
     bool debug __maybe_unused);
 
 bool sql_find_alert_transition(const char *transition, void (*cb)(const char *machine_guid, const char *context, time_t alert_id, void *data), void *data);
+bool is_chart_name_populated(uuid_t  *host_uuid);
+void chart_name_populate(uuid_t *host_uuid);
 #endif //NETDATA_SQLITE_HEALTH_H

Some files were not shown because too many files changed in this diff