health_log.c 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "health.h"
  3. // ----------------------------------------------------------------------------
  4. inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
  5. sql_health_alarm_log_save(host, ae);
  6. }
  7. // ----------------------------------------------------------------------------
  8. // health alarm log management
  9. inline ALARM_ENTRY* health_create_alarm_entry(
  10. RRDHOST *host,
  11. uint32_t alarm_id,
  12. uint32_t alarm_event_id,
  13. const uuid_t config_hash_id,
  14. time_t when,
  15. STRING *name,
  16. STRING *chart,
  17. STRING *chart_context,
  18. STRING *family,
  19. STRING *class,
  20. STRING *component,
  21. STRING *type,
  22. STRING *exec,
  23. STRING *recipient,
  24. time_t duration,
  25. NETDATA_DOUBLE old_value,
  26. NETDATA_DOUBLE new_value,
  27. RRDCALC_STATUS old_status,
  28. RRDCALC_STATUS new_status,
  29. STRING *source,
  30. STRING *units,
  31. STRING *info,
  32. int delay,
  33. uint32_t flags
  34. ) {
  35. debug(D_HEALTH, "Health adding alarm log entry with id: %u", host->health_log.next_log_id);
  36. ALARM_ENTRY *ae = callocz(1, sizeof(ALARM_ENTRY));
  37. ae->name = string_dup(name);
  38. ae->chart = string_dup(chart);
  39. ae->chart_context = string_dup(chart_context);
  40. uuid_copy(ae->config_hash_id, *((uuid_t *) config_hash_id));
  41. uuid_generate_random(ae->transition_id);
  42. ae->family = string_dup(family);
  43. ae->classification = string_dup(class);
  44. ae->component = string_dup(component);
  45. ae->type = string_dup(type);
  46. ae->exec = string_dup(exec);
  47. ae->recipient = string_dup(recipient);
  48. ae->source = string_dup(source);
  49. ae->units = string_dup(units);
  50. ae->unique_id = host->health_log.next_log_id++;
  51. ae->alarm_id = alarm_id;
  52. ae->alarm_event_id = alarm_event_id;
  53. ae->when = when;
  54. ae->old_value = old_value;
  55. ae->new_value = new_value;
  56. char value_string[100 + 1];
  57. ae->old_value_string = string_strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae_units(ae), -1));
  58. ae->new_value_string = string_strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae_units(ae), -1));
  59. ae->info = string_dup(info);
  60. ae->old_status = old_status;
  61. ae->new_status = new_status;
  62. ae->duration = duration;
  63. ae->delay = delay;
  64. ae->delay_up_to_timestamp = when + delay;
  65. ae->flags |= flags;
  66. ae->last_repeat = 0;
  67. if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL)
  68. ae->non_clear_duration += ae->duration;
  69. return ae;
  70. }
  71. inline void health_alarm_log_add_entry(
  72. RRDHOST *host,
  73. ALARM_ENTRY *ae
  74. ) {
  75. debug(D_HEALTH, "Health adding alarm log entry with id: %u", ae->unique_id);
  76. __atomic_add_fetch(&host->health_transitions, 1, __ATOMIC_RELAXED);
  77. // link it
  78. netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock);
  79. ae->next = host->health_log.alarms;
  80. host->health_log.alarms = ae;
  81. host->health_log.count++;
  82. netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
  83. // match previous alarms
  84. netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
  85. ALARM_ENTRY *t;
  86. for(t = host->health_log.alarms ; t ; t = t->next) {
  87. if(t != ae && t->alarm_id == ae->alarm_id) {
  88. if(!(t->flags & HEALTH_ENTRY_FLAG_UPDATED) && !t->updated_by_id) {
  89. t->flags |= HEALTH_ENTRY_FLAG_UPDATED;
  90. t->updated_by_id = ae->unique_id;
  91. ae->updates_id = t->unique_id;
  92. if((t->new_status == RRDCALC_STATUS_WARNING || t->new_status == RRDCALC_STATUS_CRITICAL) &&
  93. (t->old_status == RRDCALC_STATUS_WARNING || t->old_status == RRDCALC_STATUS_CRITICAL))
  94. ae->non_clear_duration += t->non_clear_duration;
  95. health_alarm_log_save(host, t);
  96. }
  97. // no need to continue
  98. break;
  99. }
  100. }
  101. netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
  102. health_alarm_log_save(host, ae);
  103. }
  104. inline void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae) {
  105. string_freez(ae->name);
  106. string_freez(ae->chart);
  107. string_freez(ae->chart_context);
  108. string_freez(ae->family);
  109. string_freez(ae->classification);
  110. string_freez(ae->component);
  111. string_freez(ae->type);
  112. string_freez(ae->exec);
  113. string_freez(ae->recipient);
  114. string_freez(ae->source);
  115. string_freez(ae->units);
  116. string_freez(ae->info);
  117. string_freez(ae->old_value_string);
  118. string_freez(ae->new_value_string);
  119. freez(ae);
  120. }
  121. inline void health_alarm_log_free(RRDHOST *host) {
  122. netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock);
  123. ALARM_ENTRY *ae;
  124. while((ae = host->health_log.alarms)) {
  125. host->health_log.alarms = ae->next;
  126. health_alarm_log_free_one_nochecks_nounlink(ae);
  127. }
  128. netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
  129. }