123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439 |
- // SPDX-License-Identifier: GPL-3.0-or-later
- #include "health.h"
- void health_string2json(BUFFER *wb, const char *prefix, const char *label, const char *value, const char *suffix) {
- if(value && *value) {
- buffer_sprintf(wb, "%s\"%s\":\"", prefix, label);
- buffer_strcat_htmlescape(wb, value);
- buffer_strcat(wb, "\"");
- buffer_strcat(wb, suffix);
- }
- else
- buffer_sprintf(wb, "%s\"%s\":null%s", prefix, label, suffix);
- }
- void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host) {
- char *edit_command = ae->source ? health_edit_command_from_source(ae_source(ae)) : strdupz("UNKNOWN=0=UNKNOWN");
- char config_hash_id[GUID_LEN + 1];
- uuid_unparse_lower(ae->config_hash_id, config_hash_id);
- buffer_sprintf(wb,
- "\n\t{\n"
- "\t\t\"hostname\": \"%s\",\n"
- "\t\t\"utc_offset\": %d,\n"
- "\t\t\"timezone\": \"%s\",\n"
- "\t\t\"unique_id\": %u,\n"
- "\t\t\"alarm_id\": %u,\n"
- "\t\t\"alarm_event_id\": %u,\n"
- "\t\t\"config_hash_id\": \"%s\",\n"
- "\t\t\"name\": \"%s\",\n"
- "\t\t\"chart\": \"%s\",\n"
- "\t\t\"context\": \"%s\",\n"
- "\t\t\"family\": \"%s\",\n"
- "\t\t\"class\": \"%s\",\n"
- "\t\t\"component\": \"%s\",\n"
- "\t\t\"type\": \"%s\",\n"
- "\t\t\"processed\": %s,\n"
- "\t\t\"updated\": %s,\n"
- "\t\t\"exec_run\": %lu,\n"
- "\t\t\"exec_failed\": %s,\n"
- "\t\t\"exec\": \"%s\",\n"
- "\t\t\"recipient\": \"%s\",\n"
- "\t\t\"exec_code\": %d,\n"
- "\t\t\"source\": \"%s\",\n"
- "\t\t\"command\": \"%s\",\n"
- "\t\t\"units\": \"%s\",\n"
- "\t\t\"when\": %lu,\n"
- "\t\t\"duration\": %lu,\n"
- "\t\t\"non_clear_duration\": %lu,\n"
- "\t\t\"status\": \"%s\",\n"
- "\t\t\"old_status\": \"%s\",\n"
- "\t\t\"delay\": %d,\n"
- "\t\t\"delay_up_to_timestamp\": %lu,\n"
- "\t\t\"updated_by_id\": %u,\n"
- "\t\t\"updates_id\": %u,\n"
- "\t\t\"value_string\": \"%s\",\n"
- "\t\t\"old_value_string\": \"%s\",\n"
- "\t\t\"last_repeat\": \"%lu\",\n"
- "\t\t\"silenced\": \"%s\",\n"
- , rrdhost_hostname(host)
- , host->utc_offset
- , rrdhost_abbrev_timezone(host)
- , ae->unique_id
- , ae->alarm_id
- , ae->alarm_event_id
- , config_hash_id
- , ae_name(ae)
- , ae_chart_name(ae)
- , ae_chart_context(ae)
- , ae_family(ae)
- , ae->classification?ae_classification(ae):"Unknown"
- , ae->component?ae_component(ae):"Unknown"
- , ae->type?ae_type(ae):"Unknown"
- , (ae->flags & HEALTH_ENTRY_FLAG_PROCESSED)?"true":"false"
- , (ae->flags & HEALTH_ENTRY_FLAG_UPDATED)?"true":"false"
- , (unsigned long)ae->exec_run_timestamp
- , (ae->flags & HEALTH_ENTRY_FLAG_EXEC_FAILED)?"true":"false"
- , ae->exec?ae_exec(ae):string2str(host->health.health_default_exec)
- , ae->recipient?ae_recipient(ae):string2str(host->health.health_default_recipient)
- , ae->exec_code
- , ae_source(ae)
- , edit_command
- , ae_units(ae)
- , (unsigned long)ae->when
- , (unsigned long)ae->duration
- , (unsigned long)ae->non_clear_duration
- , rrdcalc_status2string(ae->new_status)
- , rrdcalc_status2string(ae->old_status)
- , ae->delay
- , (unsigned long)ae->delay_up_to_timestamp
- , ae->updated_by_id
- , ae->updates_id
- , ae_new_value_string(ae)
- , ae_old_value_string(ae)
- , (unsigned long)ae->last_repeat
- , (ae->flags & HEALTH_ENTRY_FLAG_SILENCED)?"true":"false"
- );
- health_string2json(wb, "\t\t", "info", ae->info ? ae_info(ae) : "", ",\n");
- if(unlikely(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)) {
- buffer_strcat(wb, "\t\t\"no_clear_notification\": true,\n");
- }
- buffer_strcat(wb, "\t\t\"value\":");
- buffer_print_netdata_double(wb, ae->new_value);
- buffer_strcat(wb, ",\n");
- buffer_strcat(wb, "\t\t\"old_value\":");
- buffer_print_netdata_double(wb, ae->old_value);
- buffer_strcat(wb, "\n");
- buffer_strcat(wb, "\t}");
- freez(edit_command);
- }
- void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after, char *chart) {
- buffer_strcat(wb, "[");
- unsigned int max = host->health_log.max;
- unsigned int count = 0;
- STRING *chart_string = string_strdupz(chart);
- netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
- ALARM_ENTRY *ae;
- for (ae = host->health_log.alarms; ae && count < max; ae = ae->next) {
- if ((ae->unique_id > after) && (!chart || chart_string == ae->chart)) {
- if (likely(count))
- buffer_strcat(wb, ",");
- health_alarm_entry2json_nolock(wb, ae, host);
- count++;
- }
- }
- netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
- string_freez(chart_string);
- buffer_strcat(wb, "\n]\n");
- }
- static inline void health_rrdcalc_values2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC *rc) {
- (void)host;
- buffer_sprintf(wb,
- "\t\t\"%s.%s\": {\n"
- "\t\t\t\"id\": %lu,\n"
- , rrdcalc_chart_name(rc), rrdcalc_name(rc)
- , (unsigned long)rc->id);
- buffer_strcat(wb, "\t\t\t\"value\":");
- buffer_print_netdata_double(wb, rc->value);
- buffer_strcat(wb, ",\n");
- buffer_strcat(wb, "\t\t\t\"last_updated\":");
- buffer_sprintf(wb, "%lu", (unsigned long)rc->last_updated);
- buffer_strcat(wb, ",\n");
- buffer_sprintf(wb,
- "\t\t\t\"status\": \"%s\"\n"
- , rrdcalc_status2string(rc->status));
- buffer_strcat(wb, "\t\t}");
- }
- static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC *rc) {
- char value_string[100 + 1];
- format_value_and_unit(value_string, 100, rc->value, rrdcalc_units(rc), -1);
- char hash_id[GUID_LEN + 1];
- uuid_unparse_lower(rc->config_hash_id, hash_id);
- buffer_sprintf(wb,
- "\t\t\"%s.%s\": {\n"
- "\t\t\t\"id\": %lu,\n"
- "\t\t\t\"config_hash_id\": \"%s\",\n"
- "\t\t\t\"name\": \"%s\",\n"
- "\t\t\t\"chart\": \"%s\",\n"
- "\t\t\t\"family\": \"%s\",\n"
- "\t\t\t\"class\": \"%s\",\n"
- "\t\t\t\"component\": \"%s\",\n"
- "\t\t\t\"type\": \"%s\",\n"
- "\t\t\t\"active\": %s,\n"
- "\t\t\t\"disabled\": %s,\n"
- "\t\t\t\"silenced\": %s,\n"
- "\t\t\t\"exec\": \"%s\",\n"
- "\t\t\t\"recipient\": \"%s\",\n"
- "\t\t\t\"source\": \"%s\",\n"
- "\t\t\t\"units\": \"%s\",\n"
- "\t\t\t\"info\": \"%s\",\n"
- "\t\t\t\"status\": \"%s\",\n"
- "\t\t\t\"last_status_change\": %lu,\n"
- "\t\t\t\"last_updated\": %lu,\n"
- "\t\t\t\"next_update\": %lu,\n"
- "\t\t\t\"update_every\": %d,\n"
- "\t\t\t\"delay_up_duration\": %d,\n"
- "\t\t\t\"delay_down_duration\": %d,\n"
- "\t\t\t\"delay_max_duration\": %d,\n"
- "\t\t\t\"delay_multiplier\": %f,\n"
- "\t\t\t\"delay\": %d,\n"
- "\t\t\t\"delay_up_to_timestamp\": %lu,\n"
- "\t\t\t\"warn_repeat_every\": \"%u\",\n"
- "\t\t\t\"crit_repeat_every\": \"%u\",\n"
- "\t\t\t\"value_string\": \"%s\",\n"
- "\t\t\t\"last_repeat\": \"%lu\",\n"
- "\t\t\t\"times_repeat\": %lu,\n"
- , rrdcalc_chart_name(rc), rrdcalc_name(rc)
- , (unsigned long)rc->id
- , hash_id
- , rrdcalc_name(rc)
- , rrdcalc_chart_name(rc)
- , (rc->rrdset)?rrdset_family(rc->rrdset):""
- , rc->classification?rrdcalc_classification(rc):"Unknown"
- , rc->component?rrdcalc_component(rc):"Unknown"
- , rc->type?rrdcalc_type(rc):"Unknown"
- , (rc->rrdset)?"true":"false"
- , (rc->run_flags & RRDCALC_FLAG_DISABLED)?"true":"false"
- , (rc->run_flags & RRDCALC_FLAG_SILENCED)?"true":"false"
- , rc->exec?rrdcalc_exec(rc):string2str(host->health.health_default_exec)
- , rc->recipient?rrdcalc_recipient(rc):string2str(host->health.health_default_recipient)
- , rrdcalc_source(rc)
- , rrdcalc_units(rc)
- , rrdcalc_info(rc)
- , rrdcalc_status2string(rc->status)
- , (unsigned long)rc->last_status_change
- , (unsigned long)rc->last_updated
- , (unsigned long)rc->next_update
- , rc->update_every
- , rc->delay_up_duration
- , rc->delay_down_duration
- , rc->delay_max_duration
- , rc->delay_multiplier
- , rc->delay_last
- , (unsigned long)rc->delay_up_to_timestamp
- , rc->warn_repeat_every
- , rc->crit_repeat_every
- , value_string
- , (unsigned long)rc->last_repeat
- , (unsigned long)rc->times_repeat
- );
- if(unlikely(rc->options & RRDCALC_OPTION_NO_CLEAR_NOTIFICATION)) {
- buffer_strcat(wb, "\t\t\t\"no_clear_notification\": true,\n");
- }
- if(RRDCALC_HAS_DB_LOOKUP(rc)) {
- if(rc->dimensions)
- health_string2json(wb, "\t\t\t", "lookup_dimensions", rrdcalc_dimensions(rc), ",\n");
- buffer_sprintf(wb,
- "\t\t\t\"db_after\": %lu,\n"
- "\t\t\t\"db_before\": %lu,\n"
- "\t\t\t\"lookup_method\": \"%s\",\n"
- "\t\t\t\"lookup_after\": %d,\n"
- "\t\t\t\"lookup_before\": %d,\n"
- "\t\t\t\"lookup_options\": \"",
- (unsigned long) rc->db_after,
- (unsigned long) rc->db_before,
- time_grouping_method2string(rc->group),
- rc->after,
- rc->before
- );
- buffer_data_options2string(wb, rc->options);
- buffer_strcat(wb, "\",\n");
- }
- if(rc->calculation) {
- health_string2json(wb, "\t\t\t", "calc", rc->calculation->source, ",\n");
- health_string2json(wb, "\t\t\t", "calc_parsed", rc->calculation->parsed_as, ",\n");
- }
- if(rc->warning) {
- health_string2json(wb, "\t\t\t", "warn", rc->warning->source, ",\n");
- health_string2json(wb, "\t\t\t", "warn_parsed", rc->warning->parsed_as, ",\n");
- }
- if(rc->critical) {
- health_string2json(wb, "\t\t\t", "crit", rc->critical->source, ",\n");
- health_string2json(wb, "\t\t\t", "crit_parsed", rc->critical->parsed_as, ",\n");
- }
- buffer_strcat(wb, "\t\t\t\"green\":");
- buffer_print_netdata_double(wb, rc->green);
- buffer_strcat(wb, ",\n");
- buffer_strcat(wb, "\t\t\t\"red\":");
- buffer_print_netdata_double(wb, rc->red);
- buffer_strcat(wb, ",\n");
- buffer_strcat(wb, "\t\t\t\"value\":");
- buffer_print_netdata_double(wb, rc->value);
- buffer_strcat(wb, "\n");
- buffer_strcat(wb, "\t\t}");
- }
- //void health_rrdcalctemplate2json_nolock(BUFFER *wb, RRDCALCTEMPLATE *rt) {
- //
- //}
- void health_aggregate_alarms(RRDHOST *host, BUFFER *wb, BUFFER* contexts, RRDCALC_STATUS status) {
- RRDCALC *rc;
- int numberOfAlarms = 0;
- char *tok = NULL;
- char *p = NULL;
- if (contexts) {
- p = (char*)buffer_tostring(contexts);
- while(p && *p && (tok = strsep_skip_consecutive_separators(&p, ", |"))) {
- if(!*tok) continue;
- STRING *tok_string = string_strdupz(tok);
- foreach_rrdcalc_in_rrdhost_read(host, rc) {
- if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
- continue;
- if (unlikely(!rrdset_is_available_for_exporting_and_alarms(rc->rrdset)))
- continue;
- if(unlikely(rc->rrdset
- && rc->rrdset->context == tok_string
- && ((status==RRDCALC_STATUS_RAISED)?(rc->status >= RRDCALC_STATUS_WARNING):rc->status == status)))
- numberOfAlarms++;
- }
- foreach_rrdcalc_in_rrdhost_done(rc);
- string_freez(tok_string);
- }
- }
- else {
- foreach_rrdcalc_in_rrdhost_read(host, rc) {
- if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
- continue;
- if (unlikely(!rrdset_is_available_for_exporting_and_alarms(rc->rrdset)))
- continue;
- if(unlikely((status==RRDCALC_STATUS_RAISED)?(rc->status >= RRDCALC_STATUS_WARNING):rc->status == status))
- numberOfAlarms++;
- }
- foreach_rrdcalc_in_rrdhost_done(rc);
- }
- buffer_sprintf(wb, "%d", numberOfAlarms);
- }
- static void health_alarms2json_fill_alarms(RRDHOST *host, BUFFER *wb, int all, void (*fp)(RRDHOST *, BUFFER *, RRDCALC *)) {
- RRDCALC *rc;
- int i = 0;
- foreach_rrdcalc_in_rrdhost_read(host, rc) {
- if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
- continue;
- if (unlikely(!rrdset_is_available_for_exporting_and_alarms(rc->rrdset)))
- continue;
- if(likely(!all && !(rc->status == RRDCALC_STATUS_WARNING || rc->status == RRDCALC_STATUS_CRITICAL)))
- continue;
- if(likely(i)) buffer_strcat(wb, ",\n");
- fp(host, wb, rc);
- i++;
- }
- foreach_rrdcalc_in_rrdhost_done(rc);
- }
- void health_alarms2json(RRDHOST *host, BUFFER *wb, int all) {
- buffer_sprintf(wb, "{\n\t\"hostname\": \"%s\","
- "\n\t\"latest_alarm_log_unique_id\": %u,"
- "\n\t\"status\": %s,"
- "\n\t\"now\": %lu,"
- "\n\t\"alarms\": {\n",
- rrdhost_hostname(host),
- (host->health_log.next_log_id > 0)?(host->health_log.next_log_id - 1):0,
- host->health.health_enabled?"true":"false",
- (unsigned long)now_realtime_sec());
- health_alarms2json_fill_alarms(host, wb, all, health_rrdcalc2json_nolock);
- // rrdhost_rdlock(host);
- // buffer_strcat(wb, "\n\t},\n\t\"templates\": {");
- // RRDCALCTEMPLATE *rt;
- // for(rt = host->templates; rt ; rt = rt->next)
- // health_rrdcalctemplate2json_nolock(wb, rt);
- // rrdhost_unlock(host);
- buffer_strcat(wb, "\n\t}\n}\n");
- }
- void health_alarms_values2json(RRDHOST *host, BUFFER *wb, int all) {
- buffer_sprintf(wb, "{\n\t\"hostname\": \"%s\","
- "\n\t\"alarms\": {\n",
- rrdhost_hostname(host));
- health_alarms2json_fill_alarms(host, wb, all, health_rrdcalc_values2json_nolock);
- buffer_strcat(wb, "\n\t}\n}\n");
- }
- static int have_recent_alarm(RRDHOST *host, uint32_t alarm_id, uint32_t mark)
- {
- ALARM_ENTRY *ae = host->health_log.alarms;
- while(ae) {
- if (ae->alarm_id == alarm_id && ae->unique_id > mark &&
- (ae->new_status != RRDCALC_STATUS_WARNING && ae->new_status != RRDCALC_STATUS_CRITICAL))
- return 1;
- ae = ae->next;
- }
- return 0;
- }
- void health_active_log_alarms_2json(RRDHOST *host, BUFFER *wb) {
- netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
- buffer_sprintf(wb, "[\n");
- unsigned int max = host->health_log.max;
- unsigned int count = 0;
- ALARM_ENTRY *ae;
- for(ae = host->health_log.alarms; ae && count < max ; ae = ae->next) {
- if (!ae->updated_by_id &&
- ((ae->new_status == RRDCALC_STATUS_WARNING || ae->new_status == RRDCALC_STATUS_CRITICAL) ||
- ((ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL) &&
- ae->new_status == RRDCALC_STATUS_REMOVED))) {
- if (have_recent_alarm(host, ae->alarm_id, ae->unique_id))
- continue;
- if (likely(count))
- buffer_strcat(wb, ",");
- health_alarm_entry2json_nolock(wb, ae, host);
- count++;
- }
- }
- buffer_strcat(wb, "]");
- netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
- }
|