health_log.c 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "health.h"
  3. // ----------------------------------------------------------------------------
  4. inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
  5. sql_health_alarm_log_save(host, ae);
  6. #ifdef ENABLE_ACLK
  7. if (netdata_cloud_setting) {
  8. sql_queue_alarm_to_aclk(host, ae, 0);
  9. }
  10. #endif
  11. }
  12. // ----------------------------------------------------------------------------
  13. // health alarm log management
  14. inline ALARM_ENTRY* health_create_alarm_entry(
  15. RRDHOST *host,
  16. uint32_t alarm_id,
  17. uint32_t alarm_event_id,
  18. const uuid_t config_hash_id,
  19. time_t when,
  20. STRING *name,
  21. STRING *chart,
  22. STRING *chart_context,
  23. STRING *family,
  24. STRING *class,
  25. STRING *component,
  26. STRING *type,
  27. STRING *exec,
  28. STRING *recipient,
  29. time_t duration,
  30. NETDATA_DOUBLE old_value,
  31. NETDATA_DOUBLE new_value,
  32. RRDCALC_STATUS old_status,
  33. RRDCALC_STATUS new_status,
  34. STRING *source,
  35. STRING *units,
  36. STRING *info,
  37. int delay,
  38. uint32_t flags
  39. ) {
  40. debug(D_HEALTH, "Health adding alarm log entry with id: %u", host->health_log.next_log_id);
  41. ALARM_ENTRY *ae = callocz(1, sizeof(ALARM_ENTRY));
  42. ae->name = string_dup(name);
  43. ae->chart = string_dup(chart);
  44. ae->chart_context = string_dup(chart_context);
  45. uuid_copy(ae->config_hash_id, *((uuid_t *) config_hash_id));
  46. ae->family = string_dup(family);
  47. ae->classification = string_dup(class);
  48. ae->component = string_dup(component);
  49. ae->type = string_dup(type);
  50. ae->exec = string_dup(exec);
  51. ae->recipient = string_dup(recipient);
  52. ae->source = string_dup(source);
  53. ae->units = string_dup(units);
  54. ae->unique_id = host->health_log.next_log_id++;
  55. ae->alarm_id = alarm_id;
  56. ae->alarm_event_id = alarm_event_id;
  57. ae->when = when;
  58. ae->old_value = old_value;
  59. ae->new_value = new_value;
  60. char value_string[100 + 1];
  61. ae->old_value_string = string_strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae_units(ae), -1));
  62. ae->new_value_string = string_strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae_units(ae), -1));
  63. ae->info = string_dup(info);
  64. ae->old_status = old_status;
  65. ae->new_status = new_status;
  66. ae->duration = duration;
  67. ae->delay = delay;
  68. ae->delay_up_to_timestamp = when + delay;
  69. ae->flags |= flags;
  70. ae->last_repeat = 0;
  71. if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL)
  72. ae->non_clear_duration += ae->duration;
  73. return ae;
  74. }
  75. inline void health_alarm_log_add_entry(
  76. RRDHOST *host,
  77. ALARM_ENTRY *ae
  78. ) {
  79. debug(D_HEALTH, "Health adding alarm log entry with id: %u", ae->unique_id);
  80. __atomic_add_fetch(&host->health_transitions, 1, __ATOMIC_RELAXED);
  81. // link it
  82. netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock);
  83. ae->next = host->health_log.alarms;
  84. host->health_log.alarms = ae;
  85. host->health_log.count++;
  86. netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
  87. // match previous alarms
  88. netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
  89. ALARM_ENTRY *t;
  90. for(t = host->health_log.alarms ; t ; t = t->next) {
  91. if(t != ae && t->alarm_id == ae->alarm_id) {
  92. if(!(t->flags & HEALTH_ENTRY_FLAG_UPDATED) && !t->updated_by_id) {
  93. t->flags |= HEALTH_ENTRY_FLAG_UPDATED;
  94. t->updated_by_id = ae->unique_id;
  95. ae->updates_id = t->unique_id;
  96. if((t->new_status == RRDCALC_STATUS_WARNING || t->new_status == RRDCALC_STATUS_CRITICAL) &&
  97. (t->old_status == RRDCALC_STATUS_WARNING || t->old_status == RRDCALC_STATUS_CRITICAL))
  98. ae->non_clear_duration += t->non_clear_duration;
  99. health_alarm_log_save(host, t);
  100. }
  101. // no need to continue
  102. break;
  103. }
  104. }
  105. netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
  106. health_alarm_log_save(host, ae);
  107. }
  108. inline void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae) {
  109. string_freez(ae->name);
  110. string_freez(ae->chart);
  111. string_freez(ae->chart_context);
  112. string_freez(ae->family);
  113. string_freez(ae->classification);
  114. string_freez(ae->component);
  115. string_freez(ae->type);
  116. string_freez(ae->exec);
  117. string_freez(ae->recipient);
  118. string_freez(ae->source);
  119. string_freez(ae->units);
  120. string_freez(ae->info);
  121. string_freez(ae->old_value_string);
  122. string_freez(ae->new_value_string);
  123. freez(ae);
  124. }
  125. inline void health_alarm_log_free(RRDHOST *host) {
  126. netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock);
  127. ALARM_ENTRY *ae;
  128. while((ae = host->health_log.alarms)) {
  129. host->health_log.alarms = ae->next;
  130. health_alarm_log_free_one_nochecks_nounlink(ae);
  131. }
  132. netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
  133. }