ad_charts.cc 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "ad_charts.h"
  3. void ml_update_dimensions_chart(ml_host_t *host, const ml_machine_learning_stats_t &mls) {
  4. /*
  5. * Machine learning status
  6. */
  7. if (Cfg.enable_statistics_charts) {
  8. if (!host->machine_learning_status_rs) {
  9. char id_buf[1024];
  10. char name_buf[1024];
  11. snprintfz(id_buf, 1024, "machine_learning_status_on_%s", localhost->machine_guid);
  12. snprintfz(name_buf, 1024, "machine_learning_status_on_%s", rrdhost_hostname(localhost));
  13. host->machine_learning_status_rs = rrdset_create(
  14. host->rh,
  15. "netdata", // type
  16. id_buf,
  17. name_buf, // name
  18. NETDATA_ML_CHART_FAMILY, // family
  19. "netdata.machine_learning_status", // ctx
  20. "Machine learning status", // title
  21. "dimensions", // units
  22. NETDATA_ML_PLUGIN, // plugin
  23. NETDATA_ML_MODULE_TRAINING, // module
  24. NETDATA_ML_CHART_PRIO_MACHINE_LEARNING_STATUS, // priority
  25. localhost->rrd_update_every, // update_every
  26. RRDSET_TYPE_LINE // chart_type
  27. );
  28. rrdset_flag_set(host->machine_learning_status_rs , RRDSET_FLAG_ANOMALY_DETECTION);
  29. host->machine_learning_status_enabled_rd =
  30. rrddim_add(host->machine_learning_status_rs, "enabled", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  31. host->machine_learning_status_disabled_sp_rd =
  32. rrddim_add(host->machine_learning_status_rs, "disabled-sp", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  33. }
  34. rrddim_set_by_pointer(host->machine_learning_status_rs,
  35. host->machine_learning_status_enabled_rd, mls.num_machine_learning_status_enabled);
  36. rrddim_set_by_pointer(host->machine_learning_status_rs,
  37. host->machine_learning_status_disabled_sp_rd, mls.num_machine_learning_status_disabled_sp);
  38. rrdset_done(host->machine_learning_status_rs);
  39. }
  40. /*
  41. * Metric type
  42. */
  43. if (Cfg.enable_statistics_charts) {
  44. if (!host->metric_type_rs) {
  45. char id_buf[1024];
  46. char name_buf[1024];
  47. snprintfz(id_buf, 1024, "metric_types_on_%s", localhost->machine_guid);
  48. snprintfz(name_buf, 1024, "metric_types_on_%s", rrdhost_hostname(localhost));
  49. host->metric_type_rs = rrdset_create(
  50. host->rh,
  51. "netdata", // type
  52. id_buf, // id
  53. name_buf, // name
  54. NETDATA_ML_CHART_FAMILY, // family
  55. "netdata.metric_types", // ctx
  56. "Dimensions by metric type", // title
  57. "dimensions", // units
  58. NETDATA_ML_PLUGIN, // plugin
  59. NETDATA_ML_MODULE_TRAINING, // module
  60. NETDATA_ML_CHART_PRIO_METRIC_TYPES, // priority
  61. localhost->rrd_update_every, // update_every
  62. RRDSET_TYPE_LINE // chart_type
  63. );
  64. rrdset_flag_set(host->metric_type_rs, RRDSET_FLAG_ANOMALY_DETECTION);
  65. host->metric_type_constant_rd =
  66. rrddim_add(host->metric_type_rs, "constant", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  67. host->metric_type_variable_rd =
  68. rrddim_add(host->metric_type_rs, "variable", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  69. }
  70. rrddim_set_by_pointer(host->metric_type_rs,
  71. host->metric_type_constant_rd, mls.num_metric_type_constant);
  72. rrddim_set_by_pointer(host->metric_type_rs,
  73. host->metric_type_variable_rd, mls.num_metric_type_variable);
  74. rrdset_done(host->metric_type_rs);
  75. }
  76. /*
  77. * Training status
  78. */
  79. if (Cfg.enable_statistics_charts) {
  80. if (!host->training_status_rs) {
  81. char id_buf[1024];
  82. char name_buf[1024];
  83. snprintfz(id_buf, 1024, "training_status_on_%s", localhost->machine_guid);
  84. snprintfz(name_buf, 1024, "training_status_on_%s", rrdhost_hostname(localhost));
  85. host->training_status_rs = rrdset_create(
  86. host->rh,
  87. "netdata", // type
  88. id_buf, // id
  89. name_buf, // name
  90. NETDATA_ML_CHART_FAMILY, // family
  91. "netdata.training_status", // ctx
  92. "Training status of dimensions", // title
  93. "dimensions", // units
  94. NETDATA_ML_PLUGIN, // plugin
  95. NETDATA_ML_MODULE_TRAINING, // module
  96. NETDATA_ML_CHART_PRIO_TRAINING_STATUS, // priority
  97. localhost->rrd_update_every, // update_every
  98. RRDSET_TYPE_LINE // chart_type
  99. );
  100. rrdset_flag_set(host->training_status_rs, RRDSET_FLAG_ANOMALY_DETECTION);
  101. host->training_status_untrained_rd =
  102. rrddim_add(host->training_status_rs, "untrained", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  103. host->training_status_pending_without_model_rd =
  104. rrddim_add(host->training_status_rs, "pending-without-model", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  105. host->training_status_trained_rd =
  106. rrddim_add(host->training_status_rs, "trained", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  107. host->training_status_pending_with_model_rd =
  108. rrddim_add(host->training_status_rs, "pending-with-model", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  109. host->training_status_silenced_rd =
  110. rrddim_add(host->training_status_rs, "silenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  111. }
  112. rrddim_set_by_pointer(host->training_status_rs,
  113. host->training_status_untrained_rd, mls.num_training_status_untrained);
  114. rrddim_set_by_pointer(host->training_status_rs,
  115. host->training_status_pending_without_model_rd, mls.num_training_status_pending_without_model);
  116. rrddim_set_by_pointer(host->training_status_rs,
  117. host->training_status_trained_rd, mls.num_training_status_trained);
  118. rrddim_set_by_pointer(host->training_status_rs,
  119. host->training_status_pending_with_model_rd, mls.num_training_status_pending_with_model);
  120. rrddim_set_by_pointer(host->training_status_rs,
  121. host->training_status_silenced_rd, mls.num_training_status_silenced);
  122. rrdset_done(host->training_status_rs);
  123. }
  124. /*
  125. * Prediction status
  126. */
  127. {
  128. if (!host->dimensions_rs) {
  129. char id_buf[1024];
  130. char name_buf[1024];
  131. snprintfz(id_buf, 1024, "dimensions_on_%s", localhost->machine_guid);
  132. snprintfz(name_buf, 1024, "dimensions_on_%s", rrdhost_hostname(localhost));
  133. host->dimensions_rs = rrdset_create(
  134. host->rh,
  135. "anomaly_detection", // type
  136. id_buf, // id
  137. name_buf, // name
  138. "dimensions", // family
  139. "anomaly_detection.dimensions", // ctx
  140. "Anomaly detection dimensions", // title
  141. "dimensions", // units
  142. NETDATA_ML_PLUGIN, // plugin
  143. NETDATA_ML_MODULE_TRAINING, // module
  144. ML_CHART_PRIO_DIMENSIONS, // priority
  145. localhost->rrd_update_every, // update_every
  146. RRDSET_TYPE_LINE // chart_type
  147. );
  148. rrdset_flag_set(host->dimensions_rs, RRDSET_FLAG_ANOMALY_DETECTION);
  149. host->dimensions_anomalous_rd =
  150. rrddim_add(host->dimensions_rs, "anomalous", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  151. host->dimensions_normal_rd =
  152. rrddim_add(host->dimensions_rs, "normal", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  153. }
  154. rrddim_set_by_pointer(host->dimensions_rs,
  155. host->dimensions_anomalous_rd, mls.num_anomalous_dimensions);
  156. rrddim_set_by_pointer(host->dimensions_rs,
  157. host->dimensions_normal_rd, mls.num_normal_dimensions);
  158. rrdset_done(host->dimensions_rs);
  159. }
  160. }
  161. void ml_update_host_and_detection_rate_charts(ml_host_t *host, collected_number AnomalyRate) {
  162. /*
  163. * Anomaly rate
  164. */
  165. {
  166. if (!host->anomaly_rate_rs) {
  167. char id_buf[1024];
  168. char name_buf[1024];
  169. snprintfz(id_buf, 1024, "anomaly_rate_on_%s", localhost->machine_guid);
  170. snprintfz(name_buf, 1024, "anomaly_rate_on_%s", rrdhost_hostname(localhost));
  171. host->anomaly_rate_rs = rrdset_create(
  172. host->rh,
  173. "anomaly_detection", // type
  174. id_buf, // id
  175. name_buf, // name
  176. "anomaly_rate", // family
  177. "anomaly_detection.anomaly_rate", // ctx
  178. "Percentage of anomalous dimensions", // title
  179. "percentage", // units
  180. NETDATA_ML_PLUGIN, // plugin
  181. NETDATA_ML_MODULE_DETECTION, // module
  182. ML_CHART_PRIO_ANOMALY_RATE, // priority
  183. localhost->rrd_update_every, // update_every
  184. RRDSET_TYPE_LINE // chart_type
  185. );
  186. rrdset_flag_set(host->anomaly_rate_rs, RRDSET_FLAG_ANOMALY_DETECTION);
  187. host->anomaly_rate_rd =
  188. rrddim_add(host->anomaly_rate_rs, "anomaly_rate", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
  189. }
  190. rrddim_set_by_pointer(host->anomaly_rate_rs, host->anomaly_rate_rd, AnomalyRate);
  191. rrdset_done(host->anomaly_rate_rs);
  192. }
  193. /*
  194. * Detector Events
  195. */
  196. {
  197. if (!host->detector_events_rs) {
  198. char id_buf[1024];
  199. char name_buf[1024];
  200. snprintfz(id_buf, 1024, "anomaly_detection_on_%s", localhost->machine_guid);
  201. snprintfz(name_buf, 1024, "anomaly_detection_on_%s", rrdhost_hostname(localhost));
  202. host->detector_events_rs = rrdset_create(
  203. host->rh,
  204. "anomaly_detection", // type
  205. id_buf, // id
  206. name_buf, // name
  207. "anomaly_detection", // family
  208. "anomaly_detection.detector_events", // ctx
  209. "Anomaly detection events", // title
  210. "percentage", // units
  211. NETDATA_ML_PLUGIN, // plugin
  212. NETDATA_ML_MODULE_DETECTION, // module
  213. ML_CHART_PRIO_DETECTOR_EVENTS, // priority
  214. localhost->rrd_update_every, // update_every
  215. RRDSET_TYPE_LINE // chart_type
  216. );
  217. rrdset_flag_set(host->detector_events_rs, RRDSET_FLAG_ANOMALY_DETECTION);
  218. host->detector_events_above_threshold_rd =
  219. rrddim_add(host->detector_events_rs, "above_threshold", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  220. host->detector_events_new_anomaly_event_rd =
  221. rrddim_add(host->detector_events_rs, "new_anomaly_event", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  222. }
  223. /*
  224. * Compute the values of the dimensions based on the host rate chart
  225. */
  226. ONEWAYALLOC *OWA = onewayalloc_create(0);
  227. time_t Now = now_realtime_sec();
  228. time_t Before = Now - host->rh->rrd_update_every;
  229. time_t After = Before - Cfg.anomaly_detection_query_duration;
  230. RRDR_OPTIONS Options = static_cast<RRDR_OPTIONS>(0x00000000);
  231. RRDR *R = rrd2rrdr_legacy(
  232. OWA,
  233. host->anomaly_rate_rs,
  234. 1 /* points wanted */,
  235. After,
  236. Before,
  237. Cfg.anomaly_detection_grouping_method,
  238. 0 /* resampling time */,
  239. Options, "anomaly_rate",
  240. NULL /* group options */,
  241. 0, /* timeout */
  242. 0, /* tier */
  243. QUERY_SOURCE_ML,
  244. STORAGE_PRIORITY_SYNCHRONOUS
  245. );
  246. if (R) {
  247. if (R->d == 1 && R->n == 1 && R->rows == 1) {
  248. static thread_local bool prev_above_threshold = false;
  249. bool above_threshold = R->v[0] >= Cfg.host_anomaly_rate_threshold;
  250. bool new_anomaly_event = above_threshold && !prev_above_threshold;
  251. prev_above_threshold = above_threshold;
  252. rrddim_set_by_pointer(host->detector_events_rs,
  253. host->detector_events_above_threshold_rd, above_threshold);
  254. rrddim_set_by_pointer(host->detector_events_rs,
  255. host->detector_events_new_anomaly_event_rd, new_anomaly_event);
  256. rrdset_done(host->detector_events_rs);
  257. }
  258. rrdr_free(OWA, R);
  259. }
  260. onewayalloc_destroy(OWA);
  261. }
  262. }
  263. void ml_update_training_statistics_chart(ml_training_thread_t *training_thread, const ml_training_stats_t &ts) {
  264. /*
  265. * queue stats
  266. */
  267. {
  268. if (!training_thread->queue_stats_rs) {
  269. char id_buf[1024];
  270. char name_buf[1024];
  271. snprintfz(id_buf, 1024, "training_queue_%zu_stats", training_thread->id);
  272. snprintfz(name_buf, 1024, "training_queue_%zu_stats", training_thread->id);
  273. training_thread->queue_stats_rs = rrdset_create(
  274. localhost,
  275. "netdata", // type
  276. id_buf, // id
  277. name_buf, // name
  278. NETDATA_ML_CHART_FAMILY, // family
  279. "netdata.queue_stats", // ctx
  280. "Training queue stats", // title
  281. "items", // units
  282. NETDATA_ML_PLUGIN, // plugin
  283. NETDATA_ML_MODULE_TRAINING, // module
  284. NETDATA_ML_CHART_PRIO_QUEUE_STATS, // priority
  285. localhost->rrd_update_every, // update_every
  286. RRDSET_TYPE_LINE// chart_type
  287. );
  288. rrdset_flag_set(training_thread->queue_stats_rs, RRDSET_FLAG_ANOMALY_DETECTION);
  289. training_thread->queue_stats_queue_size_rd =
  290. rrddim_add(training_thread->queue_stats_rs, "queue_size", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  291. training_thread->queue_stats_popped_items_rd =
  292. rrddim_add(training_thread->queue_stats_rs, "popped_items", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  293. }
  294. rrddim_set_by_pointer(training_thread->queue_stats_rs,
  295. training_thread->queue_stats_queue_size_rd, ts.queue_size);
  296. rrddim_set_by_pointer(training_thread->queue_stats_rs,
  297. training_thread->queue_stats_popped_items_rd, ts.num_popped_items);
  298. rrdset_done(training_thread->queue_stats_rs);
  299. }
  300. /*
  301. * training stats
  302. */
  303. {
  304. if (!training_thread->training_time_stats_rs) {
  305. char id_buf[1024];
  306. char name_buf[1024];
  307. snprintfz(id_buf, 1024, "training_queue_%zu_time_stats", training_thread->id);
  308. snprintfz(name_buf, 1024, "training_queue_%zu_time_stats", training_thread->id);
  309. training_thread->training_time_stats_rs = rrdset_create(
  310. localhost,
  311. "netdata", // type
  312. id_buf, // id
  313. name_buf, // name
  314. NETDATA_ML_CHART_FAMILY, // family
  315. "netdata.training_time_stats", // ctx
  316. "Training time stats", // title
  317. "milliseconds", // units
  318. NETDATA_ML_PLUGIN, // plugin
  319. NETDATA_ML_MODULE_TRAINING, // module
  320. NETDATA_ML_CHART_PRIO_TRAINING_TIME_STATS, // priority
  321. localhost->rrd_update_every, // update_every
  322. RRDSET_TYPE_LINE// chart_type
  323. );
  324. rrdset_flag_set(training_thread->training_time_stats_rs, RRDSET_FLAG_ANOMALY_DETECTION);
  325. training_thread->training_time_stats_allotted_rd =
  326. rrddim_add(training_thread->training_time_stats_rs, "allotted", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE);
  327. training_thread->training_time_stats_consumed_rd =
  328. rrddim_add(training_thread->training_time_stats_rs, "consumed", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE);
  329. training_thread->training_time_stats_remaining_rd =
  330. rrddim_add(training_thread->training_time_stats_rs, "remaining", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE);
  331. }
  332. rrddim_set_by_pointer(training_thread->training_time_stats_rs,
  333. training_thread->training_time_stats_allotted_rd, ts.allotted_ut);
  334. rrddim_set_by_pointer(training_thread->training_time_stats_rs,
  335. training_thread->training_time_stats_consumed_rd, ts.consumed_ut);
  336. rrddim_set_by_pointer(training_thread->training_time_stats_rs,
  337. training_thread->training_time_stats_remaining_rd, ts.remaining_ut);
  338. rrdset_done(training_thread->training_time_stats_rs);
  339. }
  340. /*
  341. * training result stats
  342. */
  343. {
  344. if (!training_thread->training_results_rs) {
  345. char id_buf[1024];
  346. char name_buf[1024];
  347. snprintfz(id_buf, 1024, "training_queue_%zu_results", training_thread->id);
  348. snprintfz(name_buf, 1024, "training_queue_%zu_results", training_thread->id);
  349. training_thread->training_results_rs = rrdset_create(
  350. localhost,
  351. "netdata", // type
  352. id_buf, // id
  353. name_buf, // name
  354. NETDATA_ML_CHART_FAMILY, // family
  355. "netdata.training_results", // ctx
  356. "Training results", // title
  357. "events", // units
  358. NETDATA_ML_PLUGIN, // plugin
  359. NETDATA_ML_MODULE_TRAINING, // module
  360. NETDATA_ML_CHART_PRIO_TRAINING_RESULTS, // priority
  361. localhost->rrd_update_every, // update_every
  362. RRDSET_TYPE_LINE// chart_type
  363. );
  364. rrdset_flag_set(training_thread->training_results_rs, RRDSET_FLAG_ANOMALY_DETECTION);
  365. training_thread->training_results_ok_rd =
  366. rrddim_add(training_thread->training_results_rs, "ok", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  367. training_thread->training_results_invalid_query_time_range_rd =
  368. rrddim_add(training_thread->training_results_rs, "invalid-queries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  369. training_thread->training_results_not_enough_collected_values_rd =
  370. rrddim_add(training_thread->training_results_rs, "not-enough-values", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  371. training_thread->training_results_null_acquired_dimension_rd =
  372. rrddim_add(training_thread->training_results_rs, "null-acquired-dimensions", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  373. training_thread->training_results_chart_under_replication_rd =
  374. rrddim_add(training_thread->training_results_rs, "chart-under-replication", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  375. }
  376. rrddim_set_by_pointer(training_thread->training_results_rs,
  377. training_thread->training_results_ok_rd, ts.training_result_ok);
  378. rrddim_set_by_pointer(training_thread->training_results_rs,
  379. training_thread->training_results_invalid_query_time_range_rd, ts.training_result_invalid_query_time_range);
  380. rrddim_set_by_pointer(training_thread->training_results_rs,
  381. training_thread->training_results_not_enough_collected_values_rd, ts.training_result_not_enough_collected_values);
  382. rrddim_set_by_pointer(training_thread->training_results_rs,
  383. training_thread->training_results_null_acquired_dimension_rd, ts.training_result_null_acquired_dimension);
  384. rrddim_set_by_pointer(training_thread->training_results_rs,
  385. training_thread->training_results_chart_under_replication_rd, ts.training_result_chart_under_replication);
  386. rrdset_done(training_thread->training_results_rs);
  387. }
  388. }
  389. void ml_update_global_statistics_charts(uint64_t models_consulted) {
  390. if (Cfg.enable_statistics_charts) {
  391. static RRDSET *st = NULL;
  392. static RRDDIM *rd = NULL;
  393. if (unlikely(!st)) {
  394. st = rrdset_create_localhost(
  395. "netdata" // type
  396. , "ml_models_consulted" // id
  397. , NULL // name
  398. , NETDATA_ML_CHART_FAMILY // family
  399. , NULL // context
  400. , "KMeans models used for prediction" // title
  401. , "models" // units
  402. , NETDATA_ML_PLUGIN // plugin
  403. , NETDATA_ML_MODULE_DETECTION // module
  404. , NETDATA_ML_CHART_PRIO_MACHINE_LEARNING_STATUS // priority
  405. , localhost->rrd_update_every // update_every
  406. , RRDSET_TYPE_AREA // chart_type
  407. );
  408. rd = rrddim_add(st, "num_models_consulted", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
  409. }
  410. rrddim_set_by_pointer(st, rd, (collected_number) models_consulted);
  411. rrdset_done(st);
  412. }
  413. }