health.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. #ifndef NETDATA_HEALTH_H
  2. #define NETDATA_HEALTH_H
  3. extern int health_enabled;
  4. extern int rrdvar_compare(void *a, void *b);
  5. #define RRDVAR_TYPE_CALCULATED 1
  6. #define RRDVAR_TYPE_TIME_T 2
  7. #define RRDVAR_TYPE_COLLECTED 3
  8. #define RRDVAR_TYPE_TOTAL 4
  9. #define RRDVAR_TYPE_INT 5
  10. #define RRDVAR_TYPE_CALCULATED_ALLOCATED 6
  11. // the variables as stored in the variables indexes
  12. // there are 3 indexes:
  13. // 1. at each chart (RRDSET.variables_root_index)
  14. // 2. at each context (RRDFAMILY.variables_root_index)
  15. // 3. at each host (RRDHOST.variables_root_index)
  16. typedef struct rrdvar {
  17. avl avl;
  18. char *name;
  19. uint32_t hash;
  20. int type;
  21. void *value;
  22. time_t last_updated;
  23. } RRDVAR;
  24. // variables linked to charts
  25. // We link variables to point to the values that are already
  26. // calculated / processed by the normal data collection process
  27. // This means, there will be no speed penalty for using
  28. // these variables
  29. typedef struct rrdsetvar {
  30. char *key_fullid; // chart type.chart id.variable
  31. char *key_fullname; // chart type.chart name.variable
  32. char *variable; // variable
  33. int type;
  34. void *value;
  35. uint32_t options;
  36. RRDVAR *var_local;
  37. RRDVAR *var_family;
  38. RRDVAR *var_host;
  39. RRDVAR *var_family_name;
  40. RRDVAR *var_host_name;
  41. struct rrdset *rrdset;
  42. struct rrdsetvar *next;
  43. } RRDSETVAR;
  44. // variables linked to individual dimensions
  45. // We link variables to point the values that are already
  46. // calculated / processed by the normal data collection process
  47. // This means, there will be no speed penalty for using
  48. // these variables
  49. typedef struct rrddimvar {
  50. char *prefix;
  51. char *suffix;
  52. char *key_id; // dimension id
  53. char *key_name; // dimension name
  54. char *key_contextid; // context + dimension id
  55. char *key_contextname; // context + dimension name
  56. char *key_fullidid; // chart type.chart id + dimension id
  57. char *key_fullidname; // chart type.chart id + dimension name
  58. char *key_fullnameid; // chart type.chart name + dimension id
  59. char *key_fullnamename; // chart type.chart name + dimension name
  60. int type;
  61. void *value;
  62. uint32_t options;
  63. RRDVAR *var_local_id;
  64. RRDVAR *var_local_name;
  65. RRDVAR *var_family_id;
  66. RRDVAR *var_family_name;
  67. RRDVAR *var_family_contextid;
  68. RRDVAR *var_family_contextname;
  69. RRDVAR *var_host_chartidid;
  70. RRDVAR *var_host_chartidname;
  71. RRDVAR *var_host_chartnameid;
  72. RRDVAR *var_host_chartnamename;
  73. struct rrddim *rrddim;
  74. struct rrddimvar *next;
  75. } RRDDIMVAR;
  76. // calculated variables (defined in health configuration)
  77. // These aggregate time-series data at fixed intervals
  78. // (defined in their update_every member below)
  79. // These increase the overhead of netdata.
  80. //
  81. // These calculations are allocated and linked (->next)
  82. // under RRDHOST.
  83. // Then are also linked to RRDSET (of course only when the
  84. // chart is found, via ->rrdset_next and ->rrdset_prev).
  85. // This double-linked list is maintained sorted at all times
  86. // having as RRDSET.calculations the RRDCALC to be processed
  87. // next.
  88. #define RRDCALC_STATUS_REMOVED -2
  89. #define RRDCALC_STATUS_UNDEFINED -1
  90. #define RRDCALC_STATUS_UNINITIALIZED 0
  91. #define RRDCALC_STATUS_CLEAR 1
  92. #define RRDCALC_STATUS_RAISED 2
  93. #define RRDCALC_STATUS_WARNING 3
  94. #define RRDCALC_STATUS_CRITICAL 4
  95. #define RRDCALC_FLAG_DB_ERROR 0x00000001
  96. #define RRDCALC_FLAG_DB_NAN 0x00000002
  97. /* #define RRDCALC_FLAG_DB_STALE 0x00000004 */
  98. #define RRDCALC_FLAG_CALC_ERROR 0x00000008
  99. #define RRDCALC_FLAG_WARN_ERROR 0x00000010
  100. #define RRDCALC_FLAG_CRIT_ERROR 0x00000020
  101. #define RRDCALC_FLAG_RUNNABLE 0x00000040
  102. #define RRDCALC_FLAG_NO_CLEAR_NOTIFICATION 0x80000000
  103. typedef struct rrdcalc {
  104. uint32_t id; // the unique id of this alarm
  105. uint32_t next_event_id; // the next event id that will be used for this alarm
  106. char *name; // the name of this alarm
  107. uint32_t hash;
  108. char *exec; // the command to execute when this alarm switches state
  109. char *recipient; // the recipient of the alarm (the first parameter to exec)
  110. char *chart; // the chart id this should be linked to
  111. uint32_t hash_chart;
  112. char *source; // the source of this alarm
  113. char *units; // the units of the alarm
  114. char *info; // a short description of the alarm
  115. int update_every; // update frequency for the alarm
  116. // the red and green threshold of this alarm (to be set to the chart)
  117. calculated_number green;
  118. calculated_number red;
  119. // ------------------------------------------------------------------------
  120. // database lookup settings
  121. char *dimensions; // the chart dimensions
  122. int group; // grouping method: average, max, etc.
  123. int before; // ending point in time-series
  124. int after; // starting point in time-series
  125. uint32_t options; // calculation options
  126. // ------------------------------------------------------------------------
  127. // expressions related to the alarm
  128. EVAL_EXPRESSION *calculation; // expression to calculate the value of the alarm
  129. EVAL_EXPRESSION *warning; // expression to check the warning condition
  130. EVAL_EXPRESSION *critical; // expression to check the critical condition
  131. // ------------------------------------------------------------------------
  132. // notification delay settings
  133. int delay_up_duration; // duration to delay notifications when alarm raises
  134. int delay_down_duration; // duration to delay notifications when alarm lowers
  135. int delay_max_duration; // the absolute max delay to apply to this alarm
  136. float delay_multiplier; // multiplier for all delays when alarms switch status
  137. // while now < delay_up_to
  138. // ------------------------------------------------------------------------
  139. // runtime information
  140. int status; // the current status of the alarm
  141. calculated_number value; // the current value of the alarm
  142. calculated_number old_value; // the previous value of the alarm
  143. uint32_t rrdcalc_flags; // check RRDCALC_FLAG_*
  144. time_t last_updated; // the last update timestamp of the alarm
  145. time_t next_update; // the next update timestamp of the alarm
  146. time_t last_status_change; // the timestamp of the last time this alarm changed status
  147. time_t db_after; // the first timestamp evaluated by the db lookup
  148. time_t db_before; // the last timestamp evaluated by the db lookup
  149. time_t delay_up_to_timestamp; // the timestamp up to which we should delay notifications
  150. int delay_up_current; // the current up notification delay duration
  151. int delay_down_current; // the current down notification delay duration
  152. int delay_last; // the last delay we used
  153. // ------------------------------------------------------------------------
  154. // variables this alarm exposes to the rest of the alarms
  155. RRDVAR *local;
  156. RRDVAR *family;
  157. RRDVAR *hostid;
  158. RRDVAR *hostname;
  159. // ------------------------------------------------------------------------
  160. // the chart this alarm it is linked to
  161. struct rrdset *rrdset;
  162. // linking of this alarm on its chart
  163. struct rrdcalc *rrdset_next;
  164. struct rrdcalc *rrdset_prev;
  165. struct rrdcalc *next;
  166. } RRDCALC;
  167. #define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->after)
  168. // RRDCALCTEMPLATE
  169. // these are to be applied to charts found dynamically
  170. // based on their context.
  171. typedef struct rrdcalctemplate {
  172. char *name;
  173. uint32_t hash_name;
  174. char *exec;
  175. char *recipient;
  176. char *context;
  177. uint32_t hash_context;
  178. char *family_match;
  179. SIMPLE_PATTERN *family_pattern;
  180. char *source; // the source of this alarm
  181. char *units; // the units of the alarm
  182. char *info; // a short description of the alarm
  183. int update_every; // update frequency for the alarm
  184. // the red and green threshold of this alarm (to be set to the chart)
  185. calculated_number green;
  186. calculated_number red;
  187. // ------------------------------------------------------------------------
  188. // database lookup settings
  189. char *dimensions; // the chart dimensions
  190. int group; // grouping method: average, max, etc.
  191. int before; // ending point in time-series
  192. int after; // starting point in time-series
  193. uint32_t options; // calculation options
  194. // ------------------------------------------------------------------------
  195. // notification delay settings
  196. int delay_up_duration; // duration to delay notifications when alarm raises
  197. int delay_down_duration; // duration to delay notifications when alarm lowers
  198. int delay_max_duration; // the absolute max delay to apply to this alarm
  199. float delay_multiplier; // multiplier for all delays when alarms switch status
  200. // ------------------------------------------------------------------------
  201. // expressions related to the alarm
  202. EVAL_EXPRESSION *calculation;
  203. EVAL_EXPRESSION *warning;
  204. EVAL_EXPRESSION *critical;
  205. struct rrdcalctemplate *next;
  206. } RRDCALCTEMPLATE;
  207. #define RRDCALCTEMPLATE_HAS_CALCULATION(rt) ((rt)->after)
  208. #define HEALTH_ENTRY_FLAG_PROCESSED 0x00000001
  209. #define HEALTH_ENTRY_FLAG_UPDATED 0x00000002
  210. #define HEALTH_ENTRY_FLAG_EXEC_RUN 0x00000004
  211. #define HEALTH_ENTRY_FLAG_EXEC_FAILED 0x00000008
  212. #define HEALTH_ENTRY_FLAG_SAVED 0x10000000
  213. #define HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION 0x80000000
  214. typedef struct alarm_entry {
  215. uint32_t unique_id;
  216. uint32_t alarm_id;
  217. uint32_t alarm_event_id;
  218. time_t when;
  219. time_t duration;
  220. time_t non_clear_duration;
  221. char *name;
  222. uint32_t hash_name;
  223. char *chart;
  224. uint32_t hash_chart;
  225. char *family;
  226. char *exec;
  227. char *recipient;
  228. time_t exec_run_timestamp;
  229. int exec_code;
  230. char *source;
  231. char *units;
  232. char *info;
  233. calculated_number old_value;
  234. calculated_number new_value;
  235. char *old_value_string;
  236. char *new_value_string;
  237. int old_status;
  238. int new_status;
  239. uint32_t flags;
  240. int delay;
  241. time_t delay_up_to_timestamp;
  242. uint32_t updated_by_id;
  243. uint32_t updates_id;
  244. struct alarm_entry *next;
  245. } ALARM_ENTRY;
  246. typedef struct alarm_log {
  247. uint32_t next_log_id;
  248. uint32_t next_alarm_id;
  249. unsigned int count;
  250. unsigned int max;
  251. ALARM_ENTRY *alarms;
  252. pthread_rwlock_t alarm_log_rwlock;
  253. } ALARM_LOG;
  254. #include "rrd.h"
  255. extern void rrdsetvar_rename_all(RRDSET *st);
  256. extern RRDSETVAR *rrdsetvar_create(RRDSET *st, const char *variable, int type, void *value, uint32_t options);
  257. extern void rrdsetvar_free(RRDSETVAR *rs);
  258. extern void rrddimvar_rename_all(RRDDIM *rd);
  259. extern RRDDIMVAR *rrddimvar_create(RRDDIM *rd, int type, const char *prefix, const char *suffix, void *value, uint32_t options);
  260. extern void rrddimvar_free(RRDDIMVAR *rs);
  261. extern void rrdsetcalc_link_matching(RRDSET *st);
  262. extern void rrdsetcalc_unlink(RRDCALC *rc);
  263. extern void rrdcalctemplate_link_matching(RRDSET *st);
  264. extern RRDCALC *rrdcalc_find(RRDSET *st, const char *name);
  265. extern void health_init(void);
  266. extern void *health_main(void *ptr);
  267. extern void health_reload(void);
  268. extern int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, calculated_number *result);
  269. extern void health_alarms2json(RRDHOST *host, BUFFER *wb, int all);
  270. extern void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after);
  271. void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf);
  272. extern RRDVAR *rrdvar_custom_host_variable_create(RRDHOST *host, const char *name);
  273. extern void rrdvar_custom_host_variable_destroy(RRDHOST *host, const char *name);
  274. extern void rrdvar_custom_host_variable_set(RRDVAR *rv, calculated_number value);
  275. extern const char *rrdcalc_status2string(int status);
  276. extern int health_alarm_log_open(RRDHOST *host);
  277. extern void health_alarm_log_close(RRDHOST *host);
  278. extern void health_log_rotate(RRDHOST *host);
  279. extern void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae);
  280. extern ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filename);
  281. extern void health_alarm_log_load(RRDHOST *host);
  282. extern void health_alarm_log(
  283. RRDHOST *host,
  284. uint32_t alarm_id,
  285. uint32_t alarm_event_id,
  286. time_t when,
  287. const char *name,
  288. const char *chart,
  289. const char *family,
  290. const char *exec,
  291. const char *recipient,
  292. time_t duration,
  293. calculated_number old_value,
  294. calculated_number new_value,
  295. int old_status,
  296. int new_status,
  297. const char *source,
  298. const char *units,
  299. const char *info,
  300. int delay,
  301. uint32_t flags
  302. );
  303. extern void health_readdir(RRDHOST *host, const char *path);
  304. extern char *health_config_dir(void);
  305. extern void health_free_host_nolock(RRDHOST *host);
  306. extern void health_reload_host(RRDHOST *host);
  307. #ifdef NETDATA_HEALTH_INTERNALS
  308. extern int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name);
  309. extern uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id);
  310. extern void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc);
  311. extern void rrdcalc_free(RRDHOST *host, RRDCALC *rc);
  312. extern void rrdcalctemplate_free(RRDHOST *host, RRDCALCTEMPLATE *rt);
  313. extern int rrdvar_fix_name(char *variable);
  314. #endif
  315. #endif //NETDATA_HEALTH_H