health_log.c 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "health.h"
  3. // ----------------------------------------------------------------------------
  4. inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
  5. sql_health_alarm_log_save(host, ae);
  6. }
  7. // ----------------------------------------------------------------------------
  8. // health alarm log management
  9. inline ALARM_ENTRY* health_create_alarm_entry(
  10. RRDHOST *host,
  11. uint32_t alarm_id,
  12. uint32_t alarm_event_id,
  13. const uuid_t config_hash_id,
  14. time_t when,
  15. STRING *name,
  16. STRING *chart,
  17. STRING *chart_context,
  18. STRING *chart_name,
  19. STRING *class,
  20. STRING *component,
  21. STRING *type,
  22. STRING *exec,
  23. STRING *recipient,
  24. time_t duration,
  25. NETDATA_DOUBLE old_value,
  26. NETDATA_DOUBLE new_value,
  27. RRDCALC_STATUS old_status,
  28. RRDCALC_STATUS new_status,
  29. STRING *source,
  30. STRING *units,
  31. STRING *summary,
  32. STRING *info,
  33. int delay,
  34. HEALTH_ENTRY_FLAGS flags
  35. ) {
  36. netdata_log_debug(D_HEALTH, "Health adding alarm log entry with id: %u", host->health_log.next_log_id);
  37. ALARM_ENTRY *ae = callocz(1, sizeof(ALARM_ENTRY));
  38. ae->name = string_dup(name);
  39. ae->chart = string_dup(chart);
  40. ae->chart_context = string_dup(chart_context);
  41. ae->chart_name = string_dup(chart_name);
  42. uuid_copy(ae->config_hash_id, *((uuid_t *) config_hash_id));
  43. uuid_generate_random(ae->transition_id);
  44. ae->global_id = now_realtime_usec();
  45. ae->classification = string_dup(class);
  46. ae->component = string_dup(component);
  47. ae->type = string_dup(type);
  48. ae->exec = string_dup(exec);
  49. ae->recipient = string_dup(recipient);
  50. ae->source = string_dup(source);
  51. ae->units = string_dup(units);
  52. ae->unique_id = host->health_log.next_log_id++;
  53. ae->alarm_id = alarm_id;
  54. ae->alarm_event_id = alarm_event_id;
  55. ae->when = when;
  56. ae->old_value = old_value;
  57. ae->new_value = new_value;
  58. char value_string[100 + 1];
  59. ae->old_value_string = string_strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae_units(ae), -1));
  60. ae->new_value_string = string_strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae_units(ae), -1));
  61. ae->summary = string_dup(summary);
  62. ae->info = string_dup(info);
  63. ae->old_status = old_status;
  64. ae->new_status = new_status;
  65. ae->duration = duration;
  66. ae->delay = delay;
  67. ae->delay_up_to_timestamp = when + delay;
  68. ae->flags |= flags;
  69. ae->last_repeat = 0;
  70. if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL)
  71. ae->non_clear_duration += ae->duration;
  72. return ae;
  73. }
  74. inline void health_alarm_log_add_entry(
  75. RRDHOST *host,
  76. ALARM_ENTRY *ae
  77. ) {
  78. netdata_log_debug(D_HEALTH, "Health adding alarm log entry with id: %u", ae->unique_id);
  79. __atomic_add_fetch(&host->health_transitions, 1, __ATOMIC_RELAXED);
  80. // link it
  81. rw_spinlock_write_lock(&host->health_log.spinlock);
  82. ae->next = host->health_log.alarms;
  83. host->health_log.alarms = ae;
  84. host->health_log.count++;
  85. rw_spinlock_write_unlock(&host->health_log.spinlock);
  86. // match previous alarms
  87. rw_spinlock_read_lock(&host->health_log.spinlock);
  88. ALARM_ENTRY *t;
  89. for(t = host->health_log.alarms ; t ; t = t->next) {
  90. if(t != ae && t->alarm_id == ae->alarm_id) {
  91. if(!(t->flags & HEALTH_ENTRY_FLAG_UPDATED) && !t->updated_by_id) {
  92. t->flags |= HEALTH_ENTRY_FLAG_UPDATED;
  93. t->updated_by_id = ae->unique_id;
  94. ae->updates_id = t->unique_id;
  95. if((t->new_status == RRDCALC_STATUS_WARNING || t->new_status == RRDCALC_STATUS_CRITICAL) &&
  96. (t->old_status == RRDCALC_STATUS_WARNING || t->old_status == RRDCALC_STATUS_CRITICAL))
  97. ae->non_clear_duration += t->non_clear_duration;
  98. health_alarm_log_save(host, t);
  99. }
  100. // no need to continue
  101. break;
  102. }
  103. }
  104. rw_spinlock_read_unlock(&host->health_log.spinlock);
  105. health_alarm_log_save(host, ae);
  106. }
  107. inline void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae) {
  108. string_freez(ae->name);
  109. string_freez(ae->chart);
  110. string_freez(ae->chart_context);
  111. string_freez(ae->classification);
  112. string_freez(ae->component);
  113. string_freez(ae->type);
  114. string_freez(ae->exec);
  115. string_freez(ae->recipient);
  116. string_freez(ae->source);
  117. string_freez(ae->units);
  118. string_freez(ae->info);
  119. string_freez(ae->old_value_string);
  120. string_freez(ae->new_value_string);
  121. freez(ae);
  122. }
  123. inline void health_alarm_log_free(RRDHOST *host) {
  124. rw_spinlock_write_lock(&host->health_log.spinlock);
  125. ALARM_ENTRY *ae;
  126. while((ae = host->health_log.alarms)) {
  127. host->health_log.alarms = ae->next;
  128. health_alarm_log_free_one_nochecks_nounlink(ae);
  129. }
  130. rw_spinlock_write_unlock(&host->health_log.spinlock);
  131. }