health_json.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "health.h"
  3. void health_string2json(BUFFER *wb, const char *prefix, const char *label, const char *value, const char *suffix) {
  4. if(value && *value) {
  5. buffer_sprintf(wb, "%s\"%s\":\"", prefix, label);
  6. buffer_strcat_htmlescape(wb, value);
  7. buffer_strcat(wb, "\"");
  8. buffer_strcat(wb, suffix);
  9. }
  10. else
  11. buffer_sprintf(wb, "%s\"%s\":null%s", prefix, label, suffix);
  12. }
  13. void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host) {
  14. char *edit_command = ae->source ? health_edit_command_from_source(ae_source(ae)) : strdupz("UNKNOWN=0=UNKNOWN");
  15. char config_hash_id[GUID_LEN + 1];
  16. uuid_unparse_lower(ae->config_hash_id, config_hash_id);
  17. buffer_sprintf(wb,
  18. "\n\t{\n"
  19. "\t\t\"hostname\": \"%s\",\n"
  20. "\t\t\"utc_offset\": %d,\n"
  21. "\t\t\"timezone\": \"%s\",\n"
  22. "\t\t\"unique_id\": %u,\n"
  23. "\t\t\"alarm_id\": %u,\n"
  24. "\t\t\"alarm_event_id\": %u,\n"
  25. "\t\t\"config_hash_id\": \"%s\",\n"
  26. "\t\t\"name\": \"%s\",\n"
  27. "\t\t\"chart\": \"%s\",\n"
  28. "\t\t\"context\": \"%s\",\n"
  29. "\t\t\"family\": \"%s\",\n"
  30. "\t\t\"class\": \"%s\",\n"
  31. "\t\t\"component\": \"%s\",\n"
  32. "\t\t\"type\": \"%s\",\n"
  33. "\t\t\"processed\": %s,\n"
  34. "\t\t\"updated\": %s,\n"
  35. "\t\t\"exec_run\": %lu,\n"
  36. "\t\t\"exec_failed\": %s,\n"
  37. "\t\t\"exec\": \"%s\",\n"
  38. "\t\t\"recipient\": \"%s\",\n"
  39. "\t\t\"exec_code\": %d,\n"
  40. "\t\t\"source\": \"%s\",\n"
  41. "\t\t\"command\": \"%s\",\n"
  42. "\t\t\"units\": \"%s\",\n"
  43. "\t\t\"when\": %lu,\n"
  44. "\t\t\"duration\": %lu,\n"
  45. "\t\t\"non_clear_duration\": %lu,\n"
  46. "\t\t\"status\": \"%s\",\n"
  47. "\t\t\"old_status\": \"%s\",\n"
  48. "\t\t\"delay\": %d,\n"
  49. "\t\t\"delay_up_to_timestamp\": %lu,\n"
  50. "\t\t\"updated_by_id\": %u,\n"
  51. "\t\t\"updates_id\": %u,\n"
  52. "\t\t\"value_string\": \"%s\",\n"
  53. "\t\t\"old_value_string\": \"%s\",\n"
  54. "\t\t\"last_repeat\": \"%lu\",\n"
  55. "\t\t\"silenced\": \"%s\",\n"
  56. , rrdhost_hostname(host)
  57. , host->utc_offset
  58. , rrdhost_abbrev_timezone(host)
  59. , ae->unique_id
  60. , ae->alarm_id
  61. , ae->alarm_event_id
  62. , config_hash_id
  63. , ae_name(ae)
  64. , ae_chart_name(ae)
  65. , ae_chart_context(ae)
  66. , ae_family(ae)
  67. , ae->classification?ae_classification(ae):"Unknown"
  68. , ae->component?ae_component(ae):"Unknown"
  69. , ae->type?ae_type(ae):"Unknown"
  70. , (ae->flags & HEALTH_ENTRY_FLAG_PROCESSED)?"true":"false"
  71. , (ae->flags & HEALTH_ENTRY_FLAG_UPDATED)?"true":"false"
  72. , (unsigned long)ae->exec_run_timestamp
  73. , (ae->flags & HEALTH_ENTRY_FLAG_EXEC_FAILED)?"true":"false"
  74. , ae->exec?ae_exec(ae):string2str(host->health.health_default_exec)
  75. , ae->recipient?ae_recipient(ae):string2str(host->health.health_default_recipient)
  76. , ae->exec_code
  77. , ae_source(ae)
  78. , edit_command
  79. , ae_units(ae)
  80. , (unsigned long)ae->when
  81. , (unsigned long)ae->duration
  82. , (unsigned long)ae->non_clear_duration
  83. , rrdcalc_status2string(ae->new_status)
  84. , rrdcalc_status2string(ae->old_status)
  85. , ae->delay
  86. , (unsigned long)ae->delay_up_to_timestamp
  87. , ae->updated_by_id
  88. , ae->updates_id
  89. , ae_new_value_string(ae)
  90. , ae_old_value_string(ae)
  91. , (unsigned long)ae->last_repeat
  92. , (ae->flags & HEALTH_ENTRY_FLAG_SILENCED)?"true":"false"
  93. );
  94. health_string2json(wb, "\t\t", "info", ae->info ? ae_info(ae) : "", ",\n");
  95. if(unlikely(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)) {
  96. buffer_strcat(wb, "\t\t\"no_clear_notification\": true,\n");
  97. }
  98. buffer_strcat(wb, "\t\t\"value\":");
  99. buffer_rrd_value(wb, ae->new_value);
  100. buffer_strcat(wb, ",\n");
  101. buffer_strcat(wb, "\t\t\"old_value\":");
  102. buffer_rrd_value(wb, ae->old_value);
  103. buffer_strcat(wb, "\n");
  104. buffer_strcat(wb, "\t}");
  105. freez(edit_command);
  106. }
  107. void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after, char *chart) {
  108. buffer_strcat(wb, "[");
  109. unsigned int max = host->health_log.max;
  110. unsigned int count = 0;
  111. STRING *chart_string = string_strdupz(chart);
  112. netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
  113. ALARM_ENTRY *ae;
  114. for (ae = host->health_log.alarms; ae && count < max; ae = ae->next) {
  115. if ((ae->unique_id > after) && (!chart || chart_string == ae->chart)) {
  116. if (likely(count))
  117. buffer_strcat(wb, ",");
  118. health_alarm_entry2json_nolock(wb, ae, host);
  119. count++;
  120. }
  121. }
  122. netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
  123. string_freez(chart_string);
  124. buffer_strcat(wb, "\n]\n");
  125. }
  126. static inline void health_rrdcalc_values2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC *rc) {
  127. (void)host;
  128. buffer_sprintf(wb,
  129. "\t\t\"%s.%s\": {\n"
  130. "\t\t\t\"id\": %lu,\n"
  131. , rrdcalc_chart_name(rc), rrdcalc_name(rc)
  132. , (unsigned long)rc->id);
  133. buffer_strcat(wb, "\t\t\t\"value\":");
  134. buffer_rrd_value(wb, rc->value);
  135. buffer_strcat(wb, ",\n");
  136. buffer_strcat(wb, "\t\t\t\"last_updated\":");
  137. buffer_sprintf(wb, "%lu", (unsigned long)rc->last_updated);
  138. buffer_strcat(wb, ",\n");
  139. buffer_sprintf(wb,
  140. "\t\t\t\"status\": \"%s\"\n"
  141. , rrdcalc_status2string(rc->status));
  142. buffer_strcat(wb, "\t\t}");
  143. }
  144. static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC *rc) {
  145. char value_string[100 + 1];
  146. format_value_and_unit(value_string, 100, rc->value, rrdcalc_units(rc), -1);
  147. char hash_id[GUID_LEN + 1];
  148. uuid_unparse_lower(rc->config_hash_id, hash_id);
  149. buffer_sprintf(wb,
  150. "\t\t\"%s.%s\": {\n"
  151. "\t\t\t\"id\": %lu,\n"
  152. "\t\t\t\"config_hash_id\": \"%s\",\n"
  153. "\t\t\t\"name\": \"%s\",\n"
  154. "\t\t\t\"chart\": \"%s\",\n"
  155. "\t\t\t\"family\": \"%s\",\n"
  156. "\t\t\t\"class\": \"%s\",\n"
  157. "\t\t\t\"component\": \"%s\",\n"
  158. "\t\t\t\"type\": \"%s\",\n"
  159. "\t\t\t\"active\": %s,\n"
  160. "\t\t\t\"disabled\": %s,\n"
  161. "\t\t\t\"silenced\": %s,\n"
  162. "\t\t\t\"exec\": \"%s\",\n"
  163. "\t\t\t\"recipient\": \"%s\",\n"
  164. "\t\t\t\"source\": \"%s\",\n"
  165. "\t\t\t\"units\": \"%s\",\n"
  166. "\t\t\t\"info\": \"%s\",\n"
  167. "\t\t\t\"status\": \"%s\",\n"
  168. "\t\t\t\"last_status_change\": %lu,\n"
  169. "\t\t\t\"last_updated\": %lu,\n"
  170. "\t\t\t\"next_update\": %lu,\n"
  171. "\t\t\t\"update_every\": %d,\n"
  172. "\t\t\t\"delay_up_duration\": %d,\n"
  173. "\t\t\t\"delay_down_duration\": %d,\n"
  174. "\t\t\t\"delay_max_duration\": %d,\n"
  175. "\t\t\t\"delay_multiplier\": %f,\n"
  176. "\t\t\t\"delay\": %d,\n"
  177. "\t\t\t\"delay_up_to_timestamp\": %lu,\n"
  178. "\t\t\t\"warn_repeat_every\": \"%u\",\n"
  179. "\t\t\t\"crit_repeat_every\": \"%u\",\n"
  180. "\t\t\t\"value_string\": \"%s\",\n"
  181. "\t\t\t\"last_repeat\": \"%lu\",\n"
  182. "\t\t\t\"times_repeat\": %lu,\n"
  183. , rrdcalc_chart_name(rc), rrdcalc_name(rc)
  184. , (unsigned long)rc->id
  185. , hash_id
  186. , rrdcalc_name(rc)
  187. , rrdcalc_chart_name(rc)
  188. , (rc->rrdset)?rrdset_family(rc->rrdset):""
  189. , rc->classification?rrdcalc_classification(rc):"Unknown"
  190. , rc->component?rrdcalc_component(rc):"Unknown"
  191. , rc->type?rrdcalc_type(rc):"Unknown"
  192. , (rc->rrdset)?"true":"false"
  193. , (rc->run_flags & RRDCALC_FLAG_DISABLED)?"true":"false"
  194. , (rc->run_flags & RRDCALC_FLAG_SILENCED)?"true":"false"
  195. , rc->exec?rrdcalc_exec(rc):string2str(host->health.health_default_exec)
  196. , rc->recipient?rrdcalc_recipient(rc):string2str(host->health.health_default_recipient)
  197. , rrdcalc_source(rc)
  198. , rrdcalc_units(rc)
  199. , rrdcalc_info(rc)
  200. , rrdcalc_status2string(rc->status)
  201. , (unsigned long)rc->last_status_change
  202. , (unsigned long)rc->last_updated
  203. , (unsigned long)rc->next_update
  204. , rc->update_every
  205. , rc->delay_up_duration
  206. , rc->delay_down_duration
  207. , rc->delay_max_duration
  208. , rc->delay_multiplier
  209. , rc->delay_last
  210. , (unsigned long)rc->delay_up_to_timestamp
  211. , rc->warn_repeat_every
  212. , rc->crit_repeat_every
  213. , value_string
  214. , (unsigned long)rc->last_repeat
  215. , (unsigned long)rc->times_repeat
  216. );
  217. if(unlikely(rc->options & RRDCALC_OPTION_NO_CLEAR_NOTIFICATION)) {
  218. buffer_strcat(wb, "\t\t\t\"no_clear_notification\": true,\n");
  219. }
  220. if(RRDCALC_HAS_DB_LOOKUP(rc)) {
  221. if(rc->dimensions)
  222. health_string2json(wb, "\t\t\t", "lookup_dimensions", rrdcalc_dimensions(rc), ",\n");
  223. buffer_sprintf(wb,
  224. "\t\t\t\"db_after\": %lu,\n"
  225. "\t\t\t\"db_before\": %lu,\n"
  226. "\t\t\t\"lookup_method\": \"%s\",\n"
  227. "\t\t\t\"lookup_after\": %d,\n"
  228. "\t\t\t\"lookup_before\": %d,\n"
  229. "\t\t\t\"lookup_options\": \"",
  230. (unsigned long) rc->db_after,
  231. (unsigned long) rc->db_before,
  232. group_method2string(rc->group),
  233. rc->after,
  234. rc->before
  235. );
  236. buffer_data_options2string(wb, rc->options);
  237. buffer_strcat(wb, "\",\n");
  238. }
  239. if(rc->calculation) {
  240. health_string2json(wb, "\t\t\t", "calc", rc->calculation->source, ",\n");
  241. health_string2json(wb, "\t\t\t", "calc_parsed", rc->calculation->parsed_as, ",\n");
  242. }
  243. if(rc->warning) {
  244. health_string2json(wb, "\t\t\t", "warn", rc->warning->source, ",\n");
  245. health_string2json(wb, "\t\t\t", "warn_parsed", rc->warning->parsed_as, ",\n");
  246. }
  247. if(rc->critical) {
  248. health_string2json(wb, "\t\t\t", "crit", rc->critical->source, ",\n");
  249. health_string2json(wb, "\t\t\t", "crit_parsed", rc->critical->parsed_as, ",\n");
  250. }
  251. buffer_strcat(wb, "\t\t\t\"green\":");
  252. buffer_rrd_value(wb, rc->green);
  253. buffer_strcat(wb, ",\n");
  254. buffer_strcat(wb, "\t\t\t\"red\":");
  255. buffer_rrd_value(wb, rc->red);
  256. buffer_strcat(wb, ",\n");
  257. buffer_strcat(wb, "\t\t\t\"value\":");
  258. buffer_rrd_value(wb, rc->value);
  259. buffer_strcat(wb, "\n");
  260. buffer_strcat(wb, "\t\t}");
  261. }
  262. //void health_rrdcalctemplate2json_nolock(BUFFER *wb, RRDCALCTEMPLATE *rt) {
  263. //
  264. //}
  265. void health_aggregate_alarms(RRDHOST *host, BUFFER *wb, BUFFER* contexts, RRDCALC_STATUS status) {
  266. RRDCALC *rc;
  267. int numberOfAlarms = 0;
  268. char *tok = NULL;
  269. char *p = NULL;
  270. if (contexts) {
  271. p = (char*)buffer_tostring(contexts);
  272. while(p && *p && (tok = mystrsep(&p, ", |"))) {
  273. if(!*tok) continue;
  274. STRING *tok_string = string_strdupz(tok);
  275. foreach_rrdcalc_in_rrdhost_read(host, rc) {
  276. if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
  277. continue;
  278. if (unlikely(!rrdset_is_available_for_exporting_and_alarms(rc->rrdset)))
  279. continue;
  280. if(unlikely(rc->rrdset
  281. && rc->rrdset->context == tok_string
  282. && ((status==RRDCALC_STATUS_RAISED)?(rc->status >= RRDCALC_STATUS_WARNING):rc->status == status)))
  283. numberOfAlarms++;
  284. }
  285. foreach_rrdcalc_in_rrdhost_done(rc);
  286. string_freez(tok_string);
  287. }
  288. }
  289. else {
  290. foreach_rrdcalc_in_rrdhost_read(host, rc) {
  291. if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
  292. continue;
  293. if (unlikely(!rrdset_is_available_for_exporting_and_alarms(rc->rrdset)))
  294. continue;
  295. if(unlikely((status==RRDCALC_STATUS_RAISED)?(rc->status >= RRDCALC_STATUS_WARNING):rc->status == status))
  296. numberOfAlarms++;
  297. }
  298. foreach_rrdcalc_in_rrdhost_done(rc);
  299. }
  300. buffer_sprintf(wb, "%d", numberOfAlarms);
  301. }
  302. static void health_alarms2json_fill_alarms(RRDHOST *host, BUFFER *wb, int all, void (*fp)(RRDHOST *, BUFFER *, RRDCALC *)) {
  303. RRDCALC *rc;
  304. int i = 0;
  305. foreach_rrdcalc_in_rrdhost_read(host, rc) {
  306. if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
  307. continue;
  308. if (unlikely(!rrdset_is_available_for_exporting_and_alarms(rc->rrdset)))
  309. continue;
  310. if(likely(!all && !(rc->status == RRDCALC_STATUS_WARNING || rc->status == RRDCALC_STATUS_CRITICAL)))
  311. continue;
  312. if(likely(i)) buffer_strcat(wb, ",\n");
  313. fp(host, wb, rc);
  314. i++;
  315. }
  316. foreach_rrdcalc_in_rrdhost_done(rc);
  317. }
  318. void health_alarms2json(RRDHOST *host, BUFFER *wb, int all) {
  319. buffer_sprintf(wb, "{\n\t\"hostname\": \"%s\","
  320. "\n\t\"latest_alarm_log_unique_id\": %u,"
  321. "\n\t\"status\": %s,"
  322. "\n\t\"now\": %lu,"
  323. "\n\t\"alarms\": {\n",
  324. rrdhost_hostname(host),
  325. (host->health_log.next_log_id > 0)?(host->health_log.next_log_id - 1):0,
  326. host->health.health_enabled?"true":"false",
  327. (unsigned long)now_realtime_sec());
  328. health_alarms2json_fill_alarms(host, wb, all, health_rrdcalc2json_nolock);
  329. // rrdhost_rdlock(host);
  330. // buffer_strcat(wb, "\n\t},\n\t\"templates\": {");
  331. // RRDCALCTEMPLATE *rt;
  332. // for(rt = host->templates; rt ; rt = rt->next)
  333. // health_rrdcalctemplate2json_nolock(wb, rt);
  334. // rrdhost_unlock(host);
  335. buffer_strcat(wb, "\n\t}\n}\n");
  336. }
  337. void health_alarms_values2json(RRDHOST *host, BUFFER *wb, int all) {
  338. buffer_sprintf(wb, "{\n\t\"hostname\": \"%s\","
  339. "\n\t\"alarms\": {\n",
  340. rrdhost_hostname(host));
  341. health_alarms2json_fill_alarms(host, wb, all, health_rrdcalc_values2json_nolock);
  342. buffer_strcat(wb, "\n\t}\n}\n");
  343. }
  344. static int have_recent_alarm(RRDHOST *host, uint32_t alarm_id, uint32_t mark)
  345. {
  346. ALARM_ENTRY *ae = host->health_log.alarms;
  347. while(ae) {
  348. if (ae->alarm_id == alarm_id && ae->unique_id > mark &&
  349. (ae->new_status != RRDCALC_STATUS_WARNING && ae->new_status != RRDCALC_STATUS_CRITICAL))
  350. return 1;
  351. ae = ae->next;
  352. }
  353. return 0;
  354. }
  355. void health_active_log_alarms_2json(RRDHOST *host, BUFFER *wb) {
  356. netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
  357. buffer_sprintf(wb, "[\n");
  358. unsigned int max = host->health_log.max;
  359. unsigned int count = 0;
  360. ALARM_ENTRY *ae;
  361. for(ae = host->health_log.alarms; ae && count < max ; ae = ae->next) {
  362. if (!ae->updated_by_id &&
  363. ((ae->new_status == RRDCALC_STATUS_WARNING || ae->new_status == RRDCALC_STATUS_CRITICAL) ||
  364. ((ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL) &&
  365. ae->new_status == RRDCALC_STATUS_REMOVED))) {
  366. if (have_recent_alarm(host, ae->alarm_id, ae->unique_id))
  367. continue;
  368. if (likely(count))
  369. buffer_strcat(wb, ",");
  370. health_alarm_entry2json_nolock(wb, ae, host);
  371. count++;
  372. }
  373. }
  374. buffer_strcat(wb, "]");
  375. netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
  376. }