rrdcalc.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrd.h"
  3. // ----------------------------------------------------------------------------
  4. // RRDCALC helpers
  5. void rrdcalc_flags_to_json_array(BUFFER *wb, const char *key, RRDCALC_FLAGS flags) {
  6. buffer_json_member_add_array(wb, key);
  7. if(flags & RRDCALC_FLAG_DB_ERROR)
  8. buffer_json_add_array_item_string(wb, "DB_ERROR");
  9. if(flags & RRDCALC_FLAG_DB_NAN)
  10. buffer_json_add_array_item_string(wb, "DB_NAN");
  11. if(flags & RRDCALC_FLAG_CALC_ERROR)
  12. buffer_json_add_array_item_string(wb, "CALC_ERROR");
  13. if(flags & RRDCALC_FLAG_WARN_ERROR)
  14. buffer_json_add_array_item_string(wb, "WARN_ERROR");
  15. if(flags & RRDCALC_FLAG_CRIT_ERROR)
  16. buffer_json_add_array_item_string(wb, "CRIT_ERROR");
  17. if(flags & RRDCALC_FLAG_RUNNABLE)
  18. buffer_json_add_array_item_string(wb, "RUNNABLE");
  19. if(flags & RRDCALC_FLAG_DISABLED)
  20. buffer_json_add_array_item_string(wb, "DISABLED");
  21. if(flags & RRDCALC_FLAG_SILENCED)
  22. buffer_json_add_array_item_string(wb, "SILENCED");
  23. if(flags & RRDCALC_FLAG_RUN_ONCE)
  24. buffer_json_add_array_item_string(wb, "RUN_ONCE");
  25. if(flags & RRDCALC_FLAG_FROM_TEMPLATE)
  26. buffer_json_add_array_item_string(wb, "FROM_TEMPLATE");
  27. buffer_json_array_close(wb);
  28. }
  29. inline const char *rrdcalc_status2string(RRDCALC_STATUS status) {
  30. switch(status) {
  31. case RRDCALC_STATUS_REMOVED:
  32. return "REMOVED";
  33. case RRDCALC_STATUS_UNDEFINED:
  34. return "UNDEFINED";
  35. case RRDCALC_STATUS_UNINITIALIZED:
  36. return "UNINITIALIZED";
  37. case RRDCALC_STATUS_CLEAR:
  38. return "CLEAR";
  39. case RRDCALC_STATUS_RAISED:
  40. return "RAISED";
  41. case RRDCALC_STATUS_WARNING:
  42. return "WARNING";
  43. case RRDCALC_STATUS_CRITICAL:
  44. return "CRITICAL";
  45. default:
  46. netdata_log_error("Unknown alarm status %d", status);
  47. return "UNKNOWN";
  48. }
  49. }
  50. uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id) {
  51. rw_spinlock_read_lock(&host->health_log.spinlock);
  52. // re-use old IDs, by looking them up in the alarm log
  53. ALARM_ENTRY *ae = NULL;
  54. for(ae = host->health_log.alarms; ae ;ae = ae->next) {
  55. if(unlikely(name == ae->name && chart == ae->chart && !uuid_memcmp(&ae->config_hash_id, config_hash_id))) {
  56. if(next_event_id) *next_event_id = ae->alarm_event_id + 1;
  57. break;
  58. }
  59. }
  60. uint32_t alarm_id;
  61. if(ae)
  62. alarm_id = ae->alarm_id;
  63. else {
  64. alarm_id = sql_get_alarm_id(host, chart, name, next_event_id, config_hash_id);
  65. if (!alarm_id) {
  66. //check possible stored config hash as zeroes or null
  67. alarm_id = sql_get_alarm_id_check_zero_hash(host, chart, name, next_event_id, config_hash_id);
  68. if (!alarm_id) {
  69. if (unlikely(!host->health_log.next_alarm_id))
  70. host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
  71. alarm_id = host->health_log.next_alarm_id++;
  72. }
  73. }
  74. }
  75. rw_spinlock_read_unlock(&host->health_log.spinlock);
  76. return alarm_id;
  77. }
  78. // ----------------------------------------------------------------------------
  79. // RRDCALC replacing info/summary text variables with RRDSET labels
  80. static STRING *rrdcalc_replace_variables_with_rrdset_labels(const char *line, RRDCALC *rc) {
  81. if (!line || !*line)
  82. return NULL;
  83. size_t pos = 0;
  84. char *temp = strdupz(line);
  85. char var[RRDCALC_VAR_MAX];
  86. char *m, *lbl_value = NULL;
  87. while ((m = strchr(temp + pos, '$')) && *(m+1) == '{') {
  88. int i = 0;
  89. char *e = m;
  90. while (*e) {
  91. var[i++] = *e;
  92. if (*e == '}' || i == RRDCALC_VAR_MAX - 1)
  93. break;
  94. e++;
  95. }
  96. var[i] = '\0';
  97. pos = m - temp + 1;
  98. if (!strcmp(var, RRDCALC_VAR_FAMILY)) {
  99. char *buf = find_and_replace(temp, var, (rc->rrdset && rc->rrdset->family) ? rrdset_family(rc->rrdset) : "", m);
  100. freez(temp);
  101. temp = buf;
  102. }
  103. else if (!strncmp(var, RRDCALC_VAR_LABEL, RRDCALC_VAR_LABEL_LEN)) {
  104. char label_val[RRDCALC_VAR_MAX + RRDCALC_VAR_LABEL_LEN + 1] = { 0 };
  105. strcpy(label_val, var+RRDCALC_VAR_LABEL_LEN);
  106. label_val[i - RRDCALC_VAR_LABEL_LEN - 1] = '\0';
  107. if(likely(rc->rrdset && rc->rrdset->rrdlabels)) {
  108. lbl_value = NULL;
  109. rrdlabels_get_value_strdup_or_null(rc->rrdset->rrdlabels, &lbl_value, label_val);
  110. if (lbl_value) {
  111. char *buf = find_and_replace(temp, var, lbl_value, m);
  112. freez(temp);
  113. temp = buf;
  114. freez(lbl_value);
  115. }
  116. }
  117. }
  118. }
  119. STRING *ret = string_strdupz(temp);
  120. freez(temp);
  121. return ret;
  122. }
  123. void rrdcalc_update_info_using_rrdset_labels(RRDCALC *rc) {
  124. if(!rc->rrdset || !rc->original_info || !rc->rrdset->rrdlabels) return;
  125. size_t labels_version = rrdlabels_version(rc->rrdset->rrdlabels);
  126. if(rc->labels_version != labels_version) {
  127. if (rc->original_info) {
  128. STRING *old = rc->info;
  129. rc->info = rrdcalc_replace_variables_with_rrdset_labels(rrdcalc_original_info(rc), rc);
  130. string_freez(old);
  131. }
  132. if (rc->original_summary) {
  133. STRING *old = rc->summary;
  134. rc->summary = rrdcalc_replace_variables_with_rrdset_labels(rrdcalc_original_summary(rc), rc);
  135. string_freez(old);
  136. }
  137. rc->labels_version = labels_version;
  138. }
  139. }
  140. // ----------------------------------------------------------------------------
  141. // RRDCALC index management for RRDSET
  142. // the dictionary requires a unique key for every item
  143. // we use {chart id}.{alert name} for both the RRDHOST and RRDSET alert indexes.
  144. #define RRDCALC_MAX_KEY_SIZE 1024
  145. static size_t rrdcalc_key(char *dst, size_t dst_len, const char *chart, const char *alert) {
  146. return snprintfz(dst, dst_len, "%s/%s", chart, alert);
  147. }
  148. const RRDCALC_ACQUIRED *rrdcalc_from_rrdset_get(RRDSET *st, const char *alert_name) {
  149. char key[RRDCALC_MAX_KEY_SIZE + 1];
  150. size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_id(st), alert_name);
  151. const RRDCALC_ACQUIRED *rca = (const RRDCALC_ACQUIRED *)dictionary_get_and_acquire_item_advanced(st->rrdhost->rrdcalc_root_index, key, (ssize_t)(key_len + 1));
  152. if(!rca) {
  153. key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_name(st), alert_name);
  154. rca = (const RRDCALC_ACQUIRED *)dictionary_get_and_acquire_item_advanced(st->rrdhost->rrdcalc_root_index, key, (ssize_t)(key_len + 1));
  155. }
  156. return rca;
  157. }
  158. void rrdcalc_from_rrdset_release(RRDSET *st, const RRDCALC_ACQUIRED *rca) {
  159. if(!rca) return;
  160. dictionary_acquired_item_release(st->rrdhost->rrdcalc_root_index, (const DICTIONARY_ITEM *)rca);
  161. }
  162. RRDCALC *rrdcalc_acquired_to_rrdcalc(const RRDCALC_ACQUIRED *rca) {
  163. if(rca)
  164. return dictionary_acquired_item_value((const DICTIONARY_ITEM *)rca);
  165. return NULL;
  166. }
  167. // ----------------------------------------------------------------------------
  168. // RRDCALC managing the linking with RRDSET
  169. static void rrdcalc_link_to_rrdset(RRDSET *st, RRDCALC *rc) {
  170. RRDHOST *host = st->rrdhost;
  171. netdata_log_debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rrdset_id(st), rrdhost_hostname(host));
  172. rc->last_status_change_value = rc->value;
  173. rc->last_status_change = now_realtime_sec();
  174. rc->rrdset = st;
  175. rw_spinlock_write_lock(&st->alerts.spinlock);
  176. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(st->alerts.base, rc, prev, next);
  177. rw_spinlock_write_unlock(&st->alerts.spinlock);
  178. if(rc->update_every < rc->rrdset->update_every) {
  179. netdata_log_error("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every);
  180. rc->update_every = rc->rrdset->update_every;
  181. }
  182. if(!isnan(rc->green) && isnan(st->green)) {
  183. netdata_log_debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from " NETDATA_DOUBLE_FORMAT_AUTO
  184. " to " NETDATA_DOUBLE_FORMAT_AUTO ".", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->rrdset->green, rc->green);
  185. st->green = rc->green;
  186. }
  187. if(!isnan(rc->red) && isnan(st->red)) {
  188. netdata_log_debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from " NETDATA_DOUBLE_FORMAT_AUTO " to " NETDATA_DOUBLE_FORMAT_AUTO
  189. ".", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->rrdset->red, rc->red);
  190. st->red = rc->red;
  191. }
  192. char buf[RRDVAR_MAX_LENGTH + 1];
  193. snprintfz(buf, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_name(st), rrdcalc_name(rc));
  194. STRING *rrdset_name_rrdcalc_name = string_strdupz(buf);
  195. snprintfz(buf, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_id(st), rrdcalc_name(rc));
  196. STRING *rrdset_id_rrdcalc_name = string_strdupz(buf);
  197. rc->rrdvar_local = rrdvar_add_and_acquire(
  198. "local",
  199. st->rrdvars,
  200. rc->name,
  201. RRDVAR_TYPE_CALCULATED,
  202. RRDVAR_FLAG_RRDCALC_LOCAL_VAR,
  203. &rc->value);
  204. rc->rrdvar_family = rrdvar_add_and_acquire(
  205. "family",
  206. rrdfamily_rrdvars_dict(st->rrdfamily),
  207. rc->name,
  208. RRDVAR_TYPE_CALCULATED,
  209. RRDVAR_FLAG_RRDCALC_FAMILY_VAR,
  210. &rc->value);
  211. rc->rrdvar_host_chart_name = rrdvar_add_and_acquire(
  212. "host",
  213. host->rrdvars,
  214. rrdset_name_rrdcalc_name,
  215. RRDVAR_TYPE_CALCULATED,
  216. RRDVAR_FLAG_RRDCALC_HOST_CHARTNAME_VAR,
  217. &rc->value);
  218. rc->rrdvar_host_chart_id = rrdvar_add_and_acquire(
  219. "host",
  220. host->rrdvars,
  221. rrdset_id_rrdcalc_name,
  222. RRDVAR_TYPE_CALCULATED,
  223. RRDVAR_FLAG_RRDCALC_HOST_CHARTID_VAR | ((rc->rrdvar_host_chart_name) ? 0 : RRDVAR_FLAG_RRDCALC_HOST_CHARTNAME_VAR),
  224. &rc->value);
  225. string_freez(rrdset_id_rrdcalc_name);
  226. string_freez(rrdset_name_rrdcalc_name);
  227. if(!rc->units)
  228. rc->units = string_dup(st->units);
  229. rrdvar_store_for_chart(host, st);
  230. rrdcalc_update_info_using_rrdset_labels(rc);
  231. if(!rc->summary) {
  232. rc->summary = string_dup(rc->name);
  233. rc->original_summary = string_dup(rc->name);
  234. }
  235. time_t now = now_realtime_sec();
  236. ALARM_ENTRY *ae = health_create_alarm_entry(
  237. host,
  238. rc->id,
  239. rc->next_event_id++,
  240. rc->config_hash_id,
  241. now,
  242. rc->name,
  243. rc->rrdset->id,
  244. rc->rrdset->context,
  245. rc->rrdset->name,
  246. rc->classification,
  247. rc->component,
  248. rc->type,
  249. rc->exec,
  250. rc->recipient,
  251. now - rc->last_status_change,
  252. rc->old_value,
  253. rc->value,
  254. RRDCALC_STATUS_REMOVED,
  255. rc->status,
  256. rc->source,
  257. rc->units,
  258. rc->summary,
  259. rc->info,
  260. 0,
  261. rrdcalc_isrepeating(rc)?HEALTH_ENTRY_FLAG_IS_REPEATING:0);
  262. rc->ae = ae;
  263. health_alarm_log_add_entry(host, ae);
  264. rrdset_flag_set(st, RRDSET_FLAG_HAS_RRDCALC_LINKED);
  265. }
  266. static void rrdcalc_unlink_from_rrdset(RRDCALC *rc, bool having_ll_wrlock) {
  267. RRDSET *st = rc->rrdset;
  268. if(!st) {
  269. netdata_log_debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rrdcalc_chart_name(rc), rrdcalc_name(rc));
  270. netdata_log_error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rrdcalc_chart_name(rc), rrdcalc_name(rc));
  271. return;
  272. }
  273. RRDHOST *host = st->rrdhost;
  274. time_t now = now_realtime_sec();
  275. if (likely(rc->status != RRDCALC_STATUS_REMOVED)) {
  276. ALARM_ENTRY *ae = health_create_alarm_entry(
  277. host,
  278. rc->id,
  279. rc->next_event_id++,
  280. rc->config_hash_id,
  281. now,
  282. rc->name,
  283. rc->rrdset->id,
  284. rc->rrdset->context,
  285. rc->rrdset->name,
  286. rc->classification,
  287. rc->component,
  288. rc->type,
  289. rc->exec,
  290. rc->recipient,
  291. now - rc->last_status_change,
  292. rc->old_value,
  293. rc->value,
  294. rc->status,
  295. RRDCALC_STATUS_REMOVED,
  296. rc->source,
  297. rc->units,
  298. rc->summary,
  299. rc->info,
  300. 0,
  301. 0);
  302. rc->ae = ae;
  303. health_alarm_log_add_entry(host, ae);
  304. }
  305. netdata_log_debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rrdset_id(st), rrdhost_hostname(host));
  306. // unlink it
  307. if(!having_ll_wrlock)
  308. rw_spinlock_write_lock(&st->alerts.spinlock);
  309. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(st->alerts.base, rc, prev, next);
  310. if(!having_ll_wrlock)
  311. rw_spinlock_write_unlock(&st->alerts.spinlock);
  312. rc->rrdset = NULL;
  313. rrdvar_release_and_del(st->rrdvars, rc->rrdvar_local);
  314. rc->rrdvar_local = NULL;
  315. rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rc->rrdvar_family);
  316. rc->rrdvar_family = NULL;
  317. rrdvar_release_and_del(host->rrdvars, rc->rrdvar_host_chart_id);
  318. rc->rrdvar_host_chart_id = NULL;
  319. rrdvar_release_and_del(host->rrdvars, rc->rrdvar_host_chart_name);
  320. rc->rrdvar_host_chart_name = NULL;
  321. // RRDCALC will remain in RRDHOST
  322. // so that if the matching chart is found in the future
  323. // it will be applied automatically
  324. }
  325. static inline bool rrdcalc_check_if_it_matches_rrdset(RRDCALC *rc, RRDSET *st) {
  326. if ( (rc->chart != st->id)
  327. && (rc->chart != st->name))
  328. return false;
  329. if (rc->module_pattern && !simple_pattern_matches_string(rc->module_pattern, st->module_name))
  330. return false;
  331. if (rc->plugin_pattern && !simple_pattern_matches_string(rc->plugin_pattern, st->module_name))
  332. return false;
  333. if (st->rrdhost->rrdlabels && rc->host_labels_pattern && !rrdlabels_match_simple_pattern_parsed(
  334. st->rrdhost->rrdlabels, rc->host_labels_pattern, '=', NULL))
  335. return false;
  336. if (st->rrdlabels && rc->chart_labels_pattern && !rrdlabels_match_simple_pattern_parsed(
  337. st->rrdlabels, rc->chart_labels_pattern, '=', NULL))
  338. return false;
  339. return true;
  340. }
  341. void rrdcalc_link_matching_alerts_to_rrdset(RRDSET *st) {
  342. RRDHOST *host = st->rrdhost;
  343. // netdata_log_debug(D_HEALTH, "find matching alarms for chart '%s'", st->id);
  344. RRDCALC *rc;
  345. foreach_rrdcalc_in_rrdhost_read(host, rc) {
  346. if(rc->rrdset)
  347. continue;
  348. if(unlikely(rrdcalc_check_if_it_matches_rrdset(rc, st)))
  349. rrdcalc_link_to_rrdset(st, rc);
  350. }
  351. foreach_rrdcalc_in_rrdhost_done(rc);
  352. }
  353. static inline int rrdcalc_check_and_link_rrdset_callback(RRDSET *st, void *rrdcalc) {
  354. RRDCALC *rc = rrdcalc;
  355. if(unlikely(rrdcalc_check_if_it_matches_rrdset(rc, st))) {
  356. rrdcalc_link_to_rrdset(st, rc);
  357. return -1;
  358. }
  359. return 0;
  360. }
  361. // ----------------------------------------------------------------------------
  362. // RRDCALC rrdhost index management - constructor
  363. struct rrdcalc_constructor {
  364. RRDHOST *rrdhost; // the host we operate upon
  365. RRDCALC *from_config; // points to the original RRDCALC, as loaded from the config
  366. RRDCALCTEMPLATE *from_rrdcalctemplate; // the template this alert is generated from
  367. RRDSET *rrdset; // when this comes from rrdcalctemplate, we have a matching rrdset
  368. const char *overwrite_alert_name; // when we have a dimension foreach, the alert is renamed
  369. const char *overwrite_dimensions; // when we have a dimension foreach, the dimensions filter is renamed
  370. enum {
  371. RRDCALC_REACT_NONE,
  372. RRDCALC_REACT_NEW,
  373. } react_action;
  374. bool existing_from_template;
  375. };
  376. static void rrdcalc_rrdhost_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) {
  377. RRDCALC *rc = rrdcalc;
  378. struct rrdcalc_constructor *ctr = constructor_data;
  379. RRDHOST *host = ctr->rrdhost;
  380. rc->key = string_strdupz(dictionary_acquired_item_name(item));
  381. if(ctr->from_rrdcalctemplate) {
  382. rc->run_flags |= RRDCALC_FLAG_FROM_TEMPLATE;
  383. RRDCALCTEMPLATE *rt = ctr->from_rrdcalctemplate;
  384. RRDSET *st = ctr->rrdset;
  385. rc->next_event_id = 1;
  386. rc->name = (ctr->overwrite_alert_name) ? string_strdupz(ctr->overwrite_alert_name) : string_dup(rt->name);
  387. rc->chart = string_dup(st->id);
  388. uuid_copy(rc->config_hash_id, rt->config_hash_id);
  389. rc->dimensions = (ctr->overwrite_dimensions) ? string_strdupz(ctr->overwrite_dimensions) : string_dup(rt->dimensions);
  390. rc->foreach_dimension = NULL;
  391. rc->foreach_dimension_pattern = NULL;
  392. rc->green = rt->green;
  393. rc->red = rt->red;
  394. rc->value = NAN;
  395. rc->old_value = NAN;
  396. rc->delay_up_duration = rt->delay_up_duration;
  397. rc->delay_down_duration = rt->delay_down_duration;
  398. rc->delay_max_duration = rt->delay_max_duration;
  399. rc->delay_multiplier = rt->delay_multiplier;
  400. rc->last_repeat = 0;
  401. rc->times_repeat = 0;
  402. rc->warn_repeat_every = rt->warn_repeat_every;
  403. rc->crit_repeat_every = rt->crit_repeat_every;
  404. rc->group = rt->group;
  405. rc->after = rt->after;
  406. rc->before = rt->before;
  407. rc->update_every = rt->update_every;
  408. rc->options = rt->options;
  409. rc->exec = string_dup(rt->exec);
  410. rc->recipient = string_dup(rt->recipient);
  411. rc->source = string_dup(rt->source);
  412. rc->units = string_dup(rt->units);
  413. rc->info = string_dup(rt->info);
  414. rc->original_info = string_dup(rt->info);
  415. if (!rt->summary)
  416. rt->summary = string_dup(rc->name);
  417. rc->summary = string_dup(rt->summary);
  418. rc->original_summary = string_dup(rt->summary);
  419. rc->classification = string_dup(rt->classification);
  420. rc->component = string_dup(rt->component);
  421. rc->type = string_dup(rt->type);
  422. if(rt->calculation) {
  423. rc->calculation = expression_parse(rt->calculation->source, NULL, NULL);
  424. if(!rc->calculation)
  425. netdata_log_error("Health alarm '%s.%s': failed to parse calculation expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->calculation->source);
  426. }
  427. if(rt->warning) {
  428. rc->warning = expression_parse(rt->warning->source, NULL, NULL);
  429. if(!rc->warning)
  430. netdata_log_error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->warning->source);
  431. }
  432. if(rt->critical) {
  433. rc->critical = expression_parse(rt->critical->source, NULL, NULL);
  434. if(!rc->critical)
  435. netdata_log_error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->critical->source);
  436. }
  437. }
  438. else if(ctr->from_config) {
  439. // dictionary has already copied all the members values and pointers
  440. // no need for additional work in this case
  441. ;
  442. }
  443. rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id, &rc->config_hash_id);
  444. if(rc->calculation) {
  445. rc->calculation->status = &rc->status;
  446. rc->calculation->myself = &rc->value;
  447. rc->calculation->after = &rc->db_after;
  448. rc->calculation->before = &rc->db_before;
  449. rc->calculation->rrdcalc = rc;
  450. }
  451. if(rc->warning) {
  452. rc->warning->status = &rc->status;
  453. rc->warning->myself = &rc->value;
  454. rc->warning->after = &rc->db_after;
  455. rc->warning->before = &rc->db_before;
  456. rc->warning->rrdcalc = rc;
  457. }
  458. if(rc->critical) {
  459. rc->critical->status = &rc->status;
  460. rc->critical->myself = &rc->value;
  461. rc->critical->after = &rc->db_after;
  462. rc->critical->before = &rc->db_before;
  463. rc->critical->rrdcalc = rc;
  464. }
  465. netdata_log_debug(D_HEALTH, "Health added alarm '%s.%s': exec '%s', recipient '%s', green " NETDATA_DOUBLE_FORMAT_AUTO
  466. ", red " NETDATA_DOUBLE_FORMAT_AUTO
  467. ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', for each dimension '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u",
  468. rrdcalc_chart_name(rc),
  469. rrdcalc_name(rc),
  470. (rc->exec)?rrdcalc_exec(rc):"DEFAULT",
  471. (rc->recipient)?rrdcalc_recipient(rc):"DEFAULT",
  472. rc->green,
  473. rc->red,
  474. (int)rc->group,
  475. rc->after,
  476. rc->before,
  477. rc->options,
  478. (rc->dimensions)?rrdcalc_dimensions(rc):"NONE",
  479. (rc->foreach_dimension)?rrdcalc_foreachdim(rc):"NONE",
  480. rc->update_every,
  481. (rc->calculation)?rc->calculation->parsed_as:"NONE",
  482. (rc->warning)?rc->warning->parsed_as:"NONE",
  483. (rc->critical)?rc->critical->parsed_as:"NONE",
  484. rrdcalc_source(rc),
  485. rc->delay_up_duration,
  486. rc->delay_down_duration,
  487. rc->delay_max_duration,
  488. rc->delay_multiplier,
  489. rc->warn_repeat_every,
  490. rc->crit_repeat_every
  491. );
  492. ctr->react_action = RRDCALC_REACT_NEW;
  493. }
  494. static bool rrdcalc_rrdhost_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *rrdcalc_new __maybe_unused, void *constructor_data ) {
  495. RRDCALC *rc = rrdcalc;
  496. struct rrdcalc_constructor *ctr = constructor_data;
  497. if(rc->run_flags & RRDCALC_FLAG_FROM_TEMPLATE)
  498. ctr->existing_from_template = true;
  499. else
  500. ctr->existing_from_template = false;
  501. ctr->react_action = RRDCALC_REACT_NONE;
  502. return false;
  503. }
  504. static void rrdcalc_rrdhost_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) {
  505. RRDCALC *rc = rrdcalc;
  506. struct rrdcalc_constructor *ctr = constructor_data;
  507. RRDHOST *host = ctr->rrdhost;
  508. if(ctr->react_action == RRDCALC_REACT_NEW) {
  509. if(ctr->rrdset)
  510. rrdcalc_link_to_rrdset(ctr->rrdset, rc);
  511. else if (ctr->from_rrdcalctemplate)
  512. rrdcontext_foreach_instance_with_rrdset_in_context(host, string2str(ctr->from_rrdcalctemplate->context), rrdcalc_check_and_link_rrdset_callback, rc);
  513. }
  514. }
  515. // ----------------------------------------------------------------------------
  516. // RRDCALC rrdhost index management - destructor
  517. static void rrdcalc_free_internals(RRDCALC *rc) {
  518. if(unlikely(!rc)) return;
  519. expression_free(rc->calculation);
  520. expression_free(rc->warning);
  521. expression_free(rc->critical);
  522. string_freez(rc->key);
  523. string_freez(rc->name);
  524. string_freez(rc->chart);
  525. string_freez(rc->dimensions);
  526. string_freez(rc->foreach_dimension);
  527. string_freez(rc->exec);
  528. string_freez(rc->recipient);
  529. string_freez(rc->source);
  530. string_freez(rc->units);
  531. string_freez(rc->info);
  532. string_freez(rc->original_info);
  533. string_freez(rc->classification);
  534. string_freez(rc->component);
  535. string_freez(rc->type);
  536. string_freez(rc->host_labels);
  537. string_freez(rc->module_match);
  538. string_freez(rc->plugin_match);
  539. string_freez(rc->chart_labels);
  540. simple_pattern_free(rc->foreach_dimension_pattern);
  541. simple_pattern_free(rc->host_labels_pattern);
  542. simple_pattern_free(rc->module_pattern);
  543. simple_pattern_free(rc->plugin_pattern);
  544. simple_pattern_free(rc->chart_labels_pattern);
  545. }
  546. static void rrdcalc_rrdhost_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *rrdhost __maybe_unused) {
  547. RRDCALC *rc = rrdcalc;
  548. //RRDHOST *host = rrdhost;
  549. if(unlikely(rc->rrdset))
  550. rrdcalc_unlink_from_rrdset(rc, false);
  551. // any destruction actions that require other locks
  552. // have to be placed in rrdcalc_del(), because the object is actually locked for deletion
  553. rrdcalc_free_internals(rc);
  554. }
  555. // ----------------------------------------------------------------------------
  556. // RRDCALC rrdhost index management - index API
  557. void rrdcalc_rrdhost_index_init(RRDHOST *host) {
  558. if(!host->rrdcalc_root_index) {
  559. host->rrdcalc_root_index = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
  560. &dictionary_stats_category_rrdhealth, sizeof(RRDCALC));
  561. dictionary_register_insert_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_insert_callback, NULL);
  562. dictionary_register_conflict_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_conflict_callback, NULL);
  563. dictionary_register_react_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_react_callback, NULL);
  564. dictionary_register_delete_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_delete_callback, host);
  565. }
  566. }
  567. void rrdcalc_rrdhost_index_destroy(RRDHOST *host) {
  568. dictionary_destroy(host->rrdcalc_root_index);
  569. host->rrdcalc_root_index = NULL;
  570. }
  571. void rrdcalc_add_from_rrdcalctemplate(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st, const char *overwrite_alert_name, const char *overwrite_dimensions) {
  572. char key[RRDCALC_MAX_KEY_SIZE + 1];
  573. size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_id(st),
  574. overwrite_alert_name?overwrite_alert_name:string2str(rt->name));
  575. struct rrdcalc_constructor tmp = {
  576. .rrdhost = host,
  577. .from_config = NULL,
  578. .from_rrdcalctemplate = rt,
  579. .rrdset = st,
  580. .overwrite_alert_name = overwrite_alert_name,
  581. .overwrite_dimensions = overwrite_dimensions,
  582. .react_action = RRDCALC_REACT_NONE,
  583. .existing_from_template = false,
  584. };
  585. dictionary_set_advanced(host->rrdcalc_root_index, key, (ssize_t)(key_len + 1), NULL, sizeof(RRDCALC), &tmp);
  586. if(tmp.react_action != RRDCALC_REACT_NEW && tmp.existing_from_template == false)
  587. netdata_log_error("RRDCALC: from template '%s' on chart '%s' with key '%s', failed to be added to host '%s'. It is manually configured.",
  588. string2str(rt->name), rrdset_id(st), key, rrdhost_hostname(host));
  589. }
  590. int rrdcalc_add_from_config(RRDHOST *host, RRDCALC *rc) {
  591. if(!rc->chart) {
  592. netdata_log_error("Health configuration for alarm '%s' does not have a chart", rrdcalc_name(rc));
  593. return 0;
  594. }
  595. if(!rc->update_every) {
  596. netdata_log_error("Health configuration for alarm '%s.%s' has no frequency (parameter 'every'). Ignoring it.", rrdcalc_chart_name(rc), rrdcalc_name(rc));
  597. return 0;
  598. }
  599. if(!RRDCALC_HAS_DB_LOOKUP(rc) && !rc->calculation && !rc->warning && !rc->critical) {
  600. netdata_log_error("Health configuration for alarm '%s.%s' is useless (no db lookup, no calculation, no warning and no critical expressions)", rrdcalc_chart_name(rc), rrdcalc_name(rc));
  601. return 0;
  602. }
  603. char key[RRDCALC_MAX_KEY_SIZE + 1];
  604. size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, string2str(rc->chart), string2str(rc->name));
  605. struct rrdcalc_constructor tmp = {
  606. .rrdhost = host,
  607. .from_config = rc,
  608. .from_rrdcalctemplate = NULL,
  609. .rrdset = NULL,
  610. .react_action = RRDCALC_REACT_NONE,
  611. };
  612. int ret = 1;
  613. RRDCALC *t = dictionary_set_advanced(host->rrdcalc_root_index, key, (ssize_t)(key_len + 1), rc, sizeof(RRDCALC), &tmp);
  614. if(tmp.react_action == RRDCALC_REACT_NEW) {
  615. // we copied rc into the dictionary, so we have to free the container here
  616. freez(rc);
  617. rc = t;
  618. // since we loaded this config from configuration, we need to check if we can link it to alarms
  619. RRDSET *st;
  620. rrdset_foreach_read(st, host) {
  621. if (unlikely(rrdcalc_check_and_link_rrdset_callback(st, rc) == -1))
  622. break;
  623. }
  624. rrdset_foreach_done(st);
  625. }
  626. else {
  627. netdata_log_error(
  628. "RRDCALC: from config '%s' on chart '%s' failed to be added to host '%s'. It already exists.",
  629. string2str(rc->name),
  630. string2str(rc->chart),
  631. rrdhost_hostname(host));
  632. ret = 0;
  633. // free all of it, internals and the container
  634. rrdcalc_free_unused_rrdcalc_loaded_from_config(rc);
  635. }
  636. return ret;
  637. }
  638. static void rrdcalc_unlink_and_delete(RRDHOST *host, RRDCALC *rc, bool having_ll_wrlock) {
  639. if(rc->rrdset)
  640. rrdcalc_unlink_from_rrdset(rc, having_ll_wrlock);
  641. dictionary_del_advanced(host->rrdcalc_root_index, string2str(rc->key), (ssize_t)string_strlen(rc->key) + 1);
  642. }
  643. // ----------------------------------------------------------------------------
  644. // RRDCALC cleanup API functions
  645. void rrdcalc_delete_alerts_not_matching_host_labels_from_this_host(RRDHOST *host) {
  646. RRDCALC *rc;
  647. foreach_rrdcalc_in_rrdhost_reentrant(host, rc) {
  648. if (!rc->host_labels)
  649. continue;
  650. if(!rrdlabels_match_simple_pattern_parsed(host->rrdlabels, rc->host_labels_pattern, '=', NULL)) {
  651. netdata_log_health("Health configuration for alarm '%s' cannot be applied, because the host %s does not have the label(s) '%s'",
  652. rrdcalc_name(rc),
  653. rrdhost_hostname(host),
  654. rrdcalc_host_labels(rc));
  655. rrdcalc_unlink_and_delete(host, rc, false);
  656. }
  657. }
  658. foreach_rrdcalc_in_rrdhost_done(rc);
  659. }
  660. void rrdcalc_delete_alerts_not_matching_host_labels_from_all_hosts() {
  661. RRDHOST *host;
  662. dfe_start_reentrant(rrdhost_root_index, host) {
  663. if (unlikely(!host->health.health_enabled))
  664. continue;
  665. if (host->rrdlabels)
  666. rrdcalc_delete_alerts_not_matching_host_labels_from_this_host(host);
  667. }
  668. dfe_done(host);
  669. }
  670. void rrdcalc_unlink_all_rrdset_alerts(RRDSET *st) {
  671. RRDCALC *rc, *last = NULL;
  672. rw_spinlock_write_lock(&st->alerts.spinlock);
  673. while((rc = st->alerts.base)) {
  674. if(last == rc) {
  675. netdata_log_error("RRDCALC: malformed list of alerts linked to chart - cannot cleanup - giving up.");
  676. break;
  677. }
  678. last = rc;
  679. if(rc->run_flags & RRDCALC_FLAG_FROM_TEMPLATE) {
  680. // if the alert comes from a template we can just delete it
  681. rrdcalc_unlink_and_delete(st->rrdhost, rc, true);
  682. }
  683. else {
  684. // this is a configuration for a specific chart
  685. // it should stay in the list
  686. rrdcalc_unlink_from_rrdset(rc, true);
  687. }
  688. }
  689. rw_spinlock_write_unlock(&st->alerts.spinlock);
  690. }
  691. void rrdcalc_delete_all(RRDHOST *host) {
  692. dictionary_flush(host->rrdcalc_root_index);
  693. }
  694. void rrdcalc_free_unused_rrdcalc_loaded_from_config(RRDCALC *rc) {
  695. if(rc->rrdset)
  696. rrdcalc_unlink_from_rrdset(rc, false);
  697. rrdcalc_free_internals(rc);
  698. freez(rc);
  699. }