metric_correlations.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "daemon/common.h"
  3. #include "KolmogorovSmirnovDist.h"
  4. #define MAX_POINTS 10000
  5. int enable_metric_correlations = CONFIG_BOOLEAN_NO;
  6. int metric_correlations_version = 1;
  7. struct charts {
  8. RRDSET *st;
  9. struct charts *next;
  10. };
  11. struct per_dim {
  12. char *dimension;
  13. calculated_number baseline[MAX_POINTS];
  14. calculated_number highlight[MAX_POINTS];
  15. double baseline_diffs[MAX_POINTS];
  16. double highlight_diffs[MAX_POINTS];
  17. };
  18. int find_index(double arr[], long int n, double K, long int start)
  19. {
  20. for (long int i = start; i < n; i++) {
  21. if (K<arr[i]){
  22. return i;
  23. }
  24. }
  25. return n;
  26. }
  27. int compare(const void *left, const void *right) {
  28. double lt = *(double *)left;
  29. double rt = *(double *)right;
  30. if(unlikely(lt < rt)) return -1;
  31. if(unlikely(lt > rt)) return 1;
  32. return 0;
  33. }
  34. void kstwo(double data1[], long int n1, double data2[], long int n2, double *d, double *prob)
  35. {
  36. double en1, en2, en, data_all[MAX_POINTS*2], cdf1[MAX_POINTS], cdf2[MAX_POINTS], cddiffs[MAX_POINTS];
  37. double min = 0.0, max = 0.0;
  38. qsort(data1, n1, sizeof(double), compare);
  39. qsort(data2, n2, sizeof(double), compare);
  40. for (int i = 0; i < n1; i++)
  41. data_all[i] = data1[i];
  42. for (int i = 0; i < n2; i++)
  43. data_all[n1 + i] = data2[i];
  44. en1 = (double)n1;
  45. en2 = (double)n2;
  46. *d = 0.0;
  47. cddiffs[0]=0; //for uninitialized warning
  48. for (int i=0; i<n1+n2;i++)
  49. cdf1[i] = find_index(data1, n1, data_all[i], 0) / en1; //TODO, use the start to reduce loops
  50. for (int i=0; i<n1+n2;i++)
  51. cdf2[i] = find_index(data2, n2, data_all[i], 0) / en2;
  52. for ( int i=0;i<n2+n1;i++)
  53. cddiffs[i] = cdf1[i] - cdf2[i];
  54. min = cddiffs[0];
  55. for ( int i=0;i<n2+n1;i++) {
  56. if (cddiffs[i] < min)
  57. min = cddiffs[i];
  58. }
  59. //clip min
  60. if (fabs(min) < 0) min = 0;
  61. else if (fabs(min) > 1) min = 1;
  62. max = fabs(cddiffs[0]);
  63. for ( int i=0;i<n2+n1;i++)
  64. if (cddiffs[i] >= max) max = cddiffs[i];
  65. if (fabs(min) < max)
  66. *d = max;
  67. else
  68. *d = fabs(min);
  69. en = (en1*en2 / (en1 + en2));
  70. *prob = KSfbar(round(en), *d);
  71. }
  72. void fill_nan (struct per_dim *d, long int hp, long int bp)
  73. {
  74. int k;
  75. for (k = 0; k < bp; k++) {
  76. if (isnan(d->baseline[k])) {
  77. d->baseline[k] = 0.0;
  78. }
  79. }
  80. for (k = 0; k < hp; k++) {
  81. if (isnan(d->highlight[k])) {
  82. d->highlight[k] = 0.0;
  83. }
  84. }
  85. }
  86. //TODO check counters
  87. void run_diffs_and_rev (struct per_dim *d, long int hp, long int bp)
  88. {
  89. int k, j;
  90. for (k = 0, j = bp; k < bp - 1; k++, j--)
  91. d->baseline_diffs[k] = (double)d->baseline[j - 2] - (double)d->baseline[j - 1];
  92. for (k = 0, j = hp; k < hp - 1; k++, j--) {
  93. d->highlight_diffs[k] = (double)d->highlight[j - 2] - (double)d->highlight[j - 1];
  94. }
  95. }
  96. int run_metric_correlations (BUFFER *wb, RRDSET *st, long long baseline_after, long long baseline_before, long long highlight_after, long long highlight_before, long long max_points)
  97. {
  98. uint32_t options = 0x00000000;
  99. int group_method = RRDR_GROUPING_AVERAGE;
  100. long group_time = 0;
  101. struct context_param *context_param_list = NULL;
  102. long c;
  103. int i=0, j=0;
  104. int b_dims = 0;
  105. long int baseline_points = 0, highlight_points = 0;
  106. struct per_dim *pd = NULL;
  107. //TODO get everything in one go, when baseline is right before highlight
  108. //get baseline
  109. ONEWAYALLOC *owa = onewayalloc_create(0);
  110. RRDR *rb = rrd2rrdr(owa, st, max_points, baseline_after, baseline_before, group_method, group_time, options, NULL, context_param_list, 0);
  111. if(!rb) {
  112. info("Cannot generate metric correlations output with these parameters on this chart.");
  113. onewayalloc_destroy(owa);
  114. return 0;
  115. } else {
  116. baseline_points = rrdr_rows(rb);
  117. pd = mallocz(sizeof(struct per_dim) * rb->d);
  118. b_dims = rb->d;
  119. for (c = 0; c != rrdr_rows(rb) ; ++c) {
  120. RRDDIM *d;
  121. for (j = 0, d = rb->st->dimensions ; d && j < rb->d ; ++j, d = d->next) {
  122. calculated_number *cn = &rb->v[ c * rb->d ];
  123. if (!c) {
  124. //TODO use points from query
  125. pd[j].dimension = strdupz (d->name);
  126. pd[j].baseline[c] = cn[j];
  127. } else {
  128. pd[j].baseline[c] = cn[j];
  129. }
  130. }
  131. }
  132. }
  133. rrdr_free(owa, rb);
  134. onewayalloc_destroy(owa);
  135. if (!pd)
  136. return 0;
  137. //get highlight
  138. owa = onewayalloc_create(0);
  139. RRDR *rh = rrd2rrdr(owa, st, max_points, highlight_after, highlight_before, group_method, group_time, options, NULL, context_param_list, 0);
  140. if(!rh) {
  141. info("Cannot generate metric correlations output with these parameters on this chart.");
  142. freez(pd);
  143. onewayalloc_destroy(owa);
  144. return 0;
  145. } else {
  146. if (rh->d != b_dims) {
  147. //TODO handle different dims
  148. rrdr_free(owa, rh);
  149. onewayalloc_destroy(owa);
  150. freez(pd);
  151. return 0;
  152. }
  153. highlight_points = rrdr_rows(rh);
  154. for (c = 0; c != rrdr_rows(rh) ; ++c) {
  155. RRDDIM *d;
  156. for (j = 0, d = rh->st->dimensions ; d && j < rh->d ; ++j, d = d->next) {
  157. calculated_number *cn = &rh->v[ c * rh->d ];
  158. pd[j].highlight[c] = cn[j];
  159. }
  160. }
  161. }
  162. rrdr_free(owa, rh);
  163. onewayalloc_destroy(owa);
  164. for (i = 0; i < b_dims; i++) {
  165. fill_nan(&pd[i], highlight_points, baseline_points);
  166. }
  167. for (i = 0; i < b_dims; i++) {
  168. run_diffs_and_rev(&pd[i], highlight_points, baseline_points);
  169. }
  170. double d=0, prob=0;
  171. for (i=0;i < j ;i++) {
  172. if (baseline_points && highlight_points) {
  173. kstwo(pd[i].baseline_diffs, baseline_points-1, pd[i].highlight_diffs, highlight_points-1, &d, &prob);
  174. buffer_sprintf(wb, "\t\t\t\t\"%s\": %f", pd[i].dimension, prob);
  175. if (i != j-1)
  176. buffer_sprintf(wb, ",\n");
  177. else
  178. buffer_sprintf(wb, "\n");
  179. }
  180. }
  181. freez(pd);
  182. return j;
  183. }
  184. void metric_correlations (RRDHOST *host, BUFFER *wb, long long baseline_after, long long baseline_before, long long highlight_after, long long highlight_before, long long max_points)
  185. {
  186. info ("Running metric correlations, highlight_after: %lld, highlight_before: %lld, baseline_after: %lld, baseline_before: %lld, max_points: %lld", highlight_after, highlight_before, baseline_after, baseline_before, max_points);
  187. if (!enable_metric_correlations) {
  188. error("Metric correlations functionality is not enabled.");
  189. buffer_strcat(wb, "{\"error\": \"Metric correlations functionality is not enabled.\" }");
  190. return;
  191. }
  192. if (highlight_before <= highlight_after || baseline_before <= baseline_after) {
  193. error("Invalid baseline or highlight ranges.");
  194. buffer_strcat(wb, "{\"error\": \"Invalid baseline or highlight ranges.\" }");
  195. return;
  196. }
  197. long long dims = 0, total_dims = 0;
  198. RRDSET *st;
  199. size_t c = 0;
  200. BUFFER *wdims = buffer_create(1000);
  201. if (!max_points || max_points > MAX_POINTS)
  202. max_points = MAX_POINTS;
  203. //dont lock here and wait for results
  204. //get the charts and run mc after
  205. //should not be a problem for the query
  206. struct charts *charts = NULL;
  207. rrdhost_rdlock(host);
  208. rrdset_foreach_read(st, host) {
  209. if (rrdset_is_available_for_viewers(st)) {
  210. rrdset_rdlock(st);
  211. struct charts *chart = callocz(1, sizeof(struct charts));
  212. chart->st = st;
  213. chart->next = NULL;
  214. if (charts) {
  215. chart->next = charts;
  216. }
  217. charts = chart;
  218. }
  219. }
  220. rrdhost_unlock(host);
  221. buffer_strcat(wb, "{\n\t\"correlated_charts\": {");
  222. for (struct charts *ch = charts; ch; ch = ch->next) {
  223. buffer_flush(wdims);
  224. dims = run_metric_correlations(wdims, ch->st, baseline_after, baseline_before, highlight_after, highlight_before, max_points);
  225. if (dims) {
  226. if (c)
  227. buffer_strcat(wb, "\t\t},");
  228. buffer_strcat(wb, "\n\t\t\"");
  229. buffer_strcat(wb, ch->st->id);
  230. buffer_strcat(wb, "\": {\n");
  231. buffer_strcat(wb, "\t\t\t\"context\": \"");
  232. buffer_strcat(wb, ch->st->context);
  233. buffer_strcat(wb, "\",\n\t\t\t\"dimensions\": {\n");
  234. buffer_sprintf(wb, "%s", buffer_tostring(wdims));
  235. buffer_strcat(wb, "\t\t\t}\n");
  236. total_dims += dims;
  237. c++;
  238. }
  239. }
  240. buffer_strcat(wb, "\t\t}\n");
  241. buffer_sprintf(wb, "\t},\n\t\"total_dimensions_count\": %lld\n}", total_dims);
  242. if (!total_dims) {
  243. buffer_flush(wb);
  244. buffer_strcat(wb, "{\"error\": \"No results from metric correlations.\" }");
  245. }
  246. struct charts* ch;
  247. while(charts){
  248. ch = charts;
  249. charts = charts->next;
  250. rrdset_unlock(ch->st);
  251. free(ch);
  252. }
  253. buffer_free(wdims);
  254. info ("Done running metric correlations");
  255. }