health_json.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "health.h"
  3. static inline void health_string2json(BUFFER *wb, const char *prefix, const char *label, const char *value, const char *suffix) {
  4. if(value && *value) {
  5. buffer_sprintf(wb, "%s\"%s\":\"", prefix, label);
  6. buffer_strcat_htmlescape(wb, value);
  7. buffer_strcat(wb, "\"");
  8. buffer_strcat(wb, suffix);
  9. }
  10. else
  11. buffer_sprintf(wb, "%s\"%s\":null%s", prefix, label, suffix);
  12. }
  13. static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host) {
  14. buffer_sprintf(wb,
  15. "\n\t{\n"
  16. "\t\t\"hostname\": \"%s\",\n"
  17. "\t\t\"unique_id\": %u,\n"
  18. "\t\t\"alarm_id\": %u,\n"
  19. "\t\t\"alarm_event_id\": %u,\n"
  20. "\t\t\"name\": \"%s\",\n"
  21. "\t\t\"chart\": \"%s\",\n"
  22. "\t\t\"family\": \"%s\",\n"
  23. "\t\t\"processed\": %s,\n"
  24. "\t\t\"updated\": %s,\n"
  25. "\t\t\"exec_run\": %lu,\n"
  26. "\t\t\"exec_failed\": %s,\n"
  27. "\t\t\"exec\": \"%s\",\n"
  28. "\t\t\"recipient\": \"%s\",\n"
  29. "\t\t\"exec_code\": %d,\n"
  30. "\t\t\"source\": \"%s\",\n"
  31. "\t\t\"units\": \"%s\",\n"
  32. "\t\t\"when\": %lu,\n"
  33. "\t\t\"duration\": %lu,\n"
  34. "\t\t\"non_clear_duration\": %lu,\n"
  35. "\t\t\"status\": \"%s\",\n"
  36. "\t\t\"old_status\": \"%s\",\n"
  37. "\t\t\"delay\": %d,\n"
  38. "\t\t\"delay_up_to_timestamp\": %lu,\n"
  39. "\t\t\"updated_by_id\": %u,\n"
  40. "\t\t\"updates_id\": %u,\n"
  41. "\t\t\"value_string\": \"%s\",\n"
  42. "\t\t\"old_value_string\": \"%s\",\n"
  43. "\t\t\"last_repeat\": \"%lu\",\n"
  44. "\t\t\"silenced\": \"%s\",\n"
  45. , host->hostname
  46. , ae->unique_id
  47. , ae->alarm_id
  48. , ae->alarm_event_id
  49. , ae->name
  50. , ae->chart
  51. , ae->family
  52. , (ae->flags & HEALTH_ENTRY_FLAG_PROCESSED)?"true":"false"
  53. , (ae->flags & HEALTH_ENTRY_FLAG_UPDATED)?"true":"false"
  54. , (unsigned long)ae->exec_run_timestamp
  55. , (ae->flags & HEALTH_ENTRY_FLAG_EXEC_FAILED)?"true":"false"
  56. , ae->exec?ae->exec:host->health_default_exec
  57. , ae->recipient?ae->recipient:host->health_default_recipient
  58. , ae->exec_code
  59. , ae->source
  60. , ae->units?ae->units:""
  61. , (unsigned long)ae->when
  62. , (unsigned long)ae->duration
  63. , (unsigned long)ae->non_clear_duration
  64. , rrdcalc_status2string(ae->new_status)
  65. , rrdcalc_status2string(ae->old_status)
  66. , ae->delay
  67. , (unsigned long)ae->delay_up_to_timestamp
  68. , ae->updated_by_id
  69. , ae->updates_id
  70. , ae->new_value_string
  71. , ae->old_value_string
  72. , (unsigned long)ae->last_repeat
  73. , (ae->flags & HEALTH_ENTRY_FLAG_SILENCED)?"true":"false"
  74. );
  75. health_string2json(wb, "\t\t", "info", ae->info?ae->info:"", ",\n");
  76. if(unlikely(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)) {
  77. buffer_strcat(wb, "\t\t\"no_clear_notification\": true,\n");
  78. }
  79. buffer_strcat(wb, "\t\t\"value\":");
  80. buffer_rrd_value(wb, ae->new_value);
  81. buffer_strcat(wb, ",\n");
  82. buffer_strcat(wb, "\t\t\"old_value\":");
  83. buffer_rrd_value(wb, ae->old_value);
  84. buffer_strcat(wb, "\n");
  85. buffer_strcat(wb, "\t}");
  86. }
  87. void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after) {
  88. netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
  89. buffer_strcat(wb, "[");
  90. unsigned int max = host->health_log.max;
  91. unsigned int count = 0;
  92. ALARM_ENTRY *ae;
  93. for(ae = host->health_log.alarms; ae && count < max ; count++, ae = ae->next) {
  94. if(ae->unique_id > after) {
  95. if(likely(count)) buffer_strcat(wb, ",");
  96. health_alarm_entry2json_nolock(wb, ae, host);
  97. }
  98. }
  99. buffer_strcat(wb, "\n]\n");
  100. netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
  101. }
  102. static inline void health_rrdcalc_values2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC *rc) {
  103. (void)host;
  104. buffer_sprintf(wb,
  105. "\t\t\"%s.%s\": {\n"
  106. "\t\t\t\"id\": %lu,\n"
  107. , rc->chart, rc->name
  108. , (unsigned long)rc->id);
  109. buffer_strcat(wb, "\t\t\t\"value\":");
  110. buffer_rrd_value(wb, rc->value);
  111. buffer_strcat(wb, ",\n");
  112. buffer_sprintf(wb,
  113. "\t\t\t\"status\": \"%s\"\n"
  114. , rrdcalc_status2string(rc->status));
  115. buffer_strcat(wb, "\t\t}");
  116. }
  117. static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC *rc) {
  118. char value_string[100 + 1];
  119. format_value_and_unit(value_string, 100, rc->value, rc->units, -1);
  120. buffer_sprintf(wb,
  121. "\t\t\"%s.%s\": {\n"
  122. "\t\t\t\"id\": %lu,\n"
  123. "\t\t\t\"name\": \"%s\",\n"
  124. "\t\t\t\"chart\": \"%s\",\n"
  125. "\t\t\t\"family\": \"%s\",\n"
  126. "\t\t\t\"active\": %s,\n"
  127. "\t\t\t\"disabled\": %s,\n"
  128. "\t\t\t\"silenced\": %s,\n"
  129. "\t\t\t\"exec\": \"%s\",\n"
  130. "\t\t\t\"recipient\": \"%s\",\n"
  131. "\t\t\t\"source\": \"%s\",\n"
  132. "\t\t\t\"units\": \"%s\",\n"
  133. "\t\t\t\"info\": \"%s\",\n"
  134. "\t\t\t\"status\": \"%s\",\n"
  135. "\t\t\t\"last_status_change\": %lu,\n"
  136. "\t\t\t\"last_updated\": %lu,\n"
  137. "\t\t\t\"next_update\": %lu,\n"
  138. "\t\t\t\"update_every\": %d,\n"
  139. "\t\t\t\"delay_up_duration\": %d,\n"
  140. "\t\t\t\"delay_down_duration\": %d,\n"
  141. "\t\t\t\"delay_max_duration\": %d,\n"
  142. "\t\t\t\"delay_multiplier\": %f,\n"
  143. "\t\t\t\"delay\": %d,\n"
  144. "\t\t\t\"delay_up_to_timestamp\": %lu,\n"
  145. "\t\t\t\"warn_repeat_every\": \"%u\",\n"
  146. "\t\t\t\"crit_repeat_every\": \"%u\",\n"
  147. "\t\t\t\"value_string\": \"%s\",\n"
  148. "\t\t\t\"last_repeat\": \"%lu\",\n"
  149. , rc->chart, rc->name
  150. , (unsigned long)rc->id
  151. , rc->name
  152. , rc->chart
  153. , (rc->rrdset && rc->rrdset->family)?rc->rrdset->family:""
  154. , (rc->rrdset)?"true":"false"
  155. , (rc->rrdcalc_flags & RRDCALC_FLAG_DISABLED)?"true":"false"
  156. , (rc->rrdcalc_flags & RRDCALC_FLAG_SILENCED)?"true":"false"
  157. , rc->exec?rc->exec:host->health_default_exec
  158. , rc->recipient?rc->recipient:host->health_default_recipient
  159. , rc->source
  160. , rc->units?rc->units:""
  161. , rc->info?rc->info:""
  162. , rrdcalc_status2string(rc->status)
  163. , (unsigned long)rc->last_status_change
  164. , (unsigned long)rc->last_updated
  165. , (unsigned long)rc->next_update
  166. , rc->update_every
  167. , rc->delay_up_duration
  168. , rc->delay_down_duration
  169. , rc->delay_max_duration
  170. , rc->delay_multiplier
  171. , rc->delay_last
  172. , (unsigned long)rc->delay_up_to_timestamp
  173. , rc->warn_repeat_every
  174. , rc->crit_repeat_every
  175. , value_string
  176. , (unsigned long)rc->last_repeat
  177. );
  178. if(unlikely(rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)) {
  179. buffer_strcat(wb, "\t\t\t\"no_clear_notification\": true,\n");
  180. }
  181. if(RRDCALC_HAS_DB_LOOKUP(rc)) {
  182. if(rc->dimensions && *rc->dimensions)
  183. health_string2json(wb, "\t\t\t", "lookup_dimensions", rc->dimensions, ",\n");
  184. buffer_sprintf(wb,
  185. "\t\t\t\"db_after\": %lu,\n"
  186. "\t\t\t\"db_before\": %lu,\n"
  187. "\t\t\t\"lookup_method\": \"%s\",\n"
  188. "\t\t\t\"lookup_after\": %d,\n"
  189. "\t\t\t\"lookup_before\": %d,\n"
  190. "\t\t\t\"lookup_options\": \"",
  191. (unsigned long) rc->db_after,
  192. (unsigned long) rc->db_before,
  193. group_method2string(rc->group),
  194. rc->after,
  195. rc->before
  196. );
  197. buffer_data_options2string(wb, rc->options);
  198. buffer_strcat(wb, "\",\n");
  199. }
  200. if(rc->calculation) {
  201. health_string2json(wb, "\t\t\t", "calc", rc->calculation->source, ",\n");
  202. health_string2json(wb, "\t\t\t", "calc_parsed", rc->calculation->parsed_as, ",\n");
  203. }
  204. if(rc->warning) {
  205. health_string2json(wb, "\t\t\t", "warn", rc->warning->source, ",\n");
  206. health_string2json(wb, "\t\t\t", "warn_parsed", rc->warning->parsed_as, ",\n");
  207. }
  208. if(rc->critical) {
  209. health_string2json(wb, "\t\t\t", "crit", rc->critical->source, ",\n");
  210. health_string2json(wb, "\t\t\t", "crit_parsed", rc->critical->parsed_as, ",\n");
  211. }
  212. buffer_strcat(wb, "\t\t\t\"green\":");
  213. buffer_rrd_value(wb, rc->green);
  214. buffer_strcat(wb, ",\n");
  215. buffer_strcat(wb, "\t\t\t\"red\":");
  216. buffer_rrd_value(wb, rc->red);
  217. buffer_strcat(wb, ",\n");
  218. buffer_strcat(wb, "\t\t\t\"value\":");
  219. buffer_rrd_value(wb, rc->value);
  220. buffer_strcat(wb, "\n");
  221. buffer_strcat(wb, "\t\t}");
  222. }
  223. //void health_rrdcalctemplate2json_nolock(BUFFER *wb, RRDCALCTEMPLATE *rt) {
  224. //
  225. //}
  226. void health_aggregate_alarms(RRDHOST *host, BUFFER *wb, BUFFER* contexts, RRDCALC_STATUS status) {
  227. RRDCALC *rc;
  228. int numberOfAlarms = 0;
  229. char *tok = NULL;
  230. char *p = NULL;
  231. rrdhost_rdlock(host);
  232. if (contexts) {
  233. p = (char*)buffer_tostring(contexts);
  234. while(p && *p && (tok = mystrsep(&p, ", |"))) {
  235. if(!*tok) continue;
  236. for(rc = host->alarms; rc ; rc = rc->next) {
  237. if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
  238. continue;
  239. if(unlikely(rc->rrdset && rc->rrdset->hash_context == simple_hash(tok)
  240. && !strcmp(rc->rrdset->context, tok)
  241. && ((status==RRDCALC_STATUS_RAISED)?(rc->status >= RRDCALC_STATUS_WARNING):rc->status == status)))
  242. numberOfAlarms++;
  243. }
  244. }
  245. }
  246. else {
  247. for(rc = host->alarms; rc ; rc = rc->next) {
  248. if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
  249. continue;
  250. if(unlikely((status==RRDCALC_STATUS_RAISED)?(rc->status >= RRDCALC_STATUS_WARNING):rc->status == status))
  251. numberOfAlarms++;
  252. }
  253. }
  254. buffer_sprintf(wb, "%d", numberOfAlarms);
  255. rrdhost_unlock(host);
  256. }
  257. void health_alarms2json_fill_alarms(RRDHOST *host, BUFFER *wb, int all, void (*fp)(RRDHOST *, BUFFER *, RRDCALC *)) {
  258. RRDCALC *rc;
  259. int i;
  260. for(i = 0, rc = host->alarms; rc ; rc = rc->next) {
  261. if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
  262. continue;
  263. if(likely(!all && !(rc->status == RRDCALC_STATUS_WARNING || rc->status == RRDCALC_STATUS_CRITICAL)))
  264. continue;
  265. if(likely(i)) buffer_strcat(wb, ",\n");
  266. fp(host, wb, rc);
  267. i++;
  268. }
  269. }
  270. void health_alarms2json(RRDHOST *host, BUFFER *wb, int all) {
  271. rrdhost_rdlock(host);
  272. buffer_sprintf(wb, "{\n\t\"hostname\": \"%s\","
  273. "\n\t\"latest_alarm_log_unique_id\": %u,"
  274. "\n\t\"status\": %s,"
  275. "\n\t\"now\": %lu,"
  276. "\n\t\"alarms\": {\n",
  277. host->hostname,
  278. (host->health_log.next_log_id > 0)?(host->health_log.next_log_id - 1):0,
  279. host->health_enabled?"true":"false",
  280. (unsigned long)now_realtime_sec());
  281. health_alarms2json_fill_alarms(host, wb, all, health_rrdcalc2json_nolock);
  282. // buffer_strcat(wb, "\n\t},\n\t\"templates\": {");
  283. // RRDCALCTEMPLATE *rt;
  284. // for(rt = host->templates; rt ; rt = rt->next)
  285. // health_rrdcalctemplate2json_nolock(wb, rt);
  286. buffer_strcat(wb, "\n\t}\n}\n");
  287. rrdhost_unlock(host);
  288. }
  289. void health_alarms_values2json(RRDHOST *host, BUFFER *wb, int all) {
  290. rrdhost_rdlock(host);
  291. buffer_sprintf(wb, "{\n\t\"hostname\": \"%s\","
  292. "\n\t\"alarms\": {\n",
  293. host->hostname);
  294. health_alarms2json_fill_alarms(host, wb, all, health_rrdcalc_values2json_nolock);
  295. buffer_strcat(wb, "\n\t}\n}\n");
  296. rrdhost_unlock(host);
  297. }