changefinder.chart.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. # -*- coding: utf-8 -*-
  2. # Description: changefinder netdata python.d module
  3. # Author: andrewm4894
  4. # SPDX-License-Identifier: GPL-3.0-or-later
  5. from json import loads
  6. import re
  7. from bases.FrameworkServices.UrlService import UrlService
  8. import numpy as np
  9. import changefinder
  10. from scipy.stats import percentileofscore
  11. update_every = 5
  12. disabled_by_default = True
  13. ORDER = [
  14. 'scores',
  15. 'flags'
  16. ]
  17. CHARTS = {
  18. 'scores': {
  19. 'options': [None, 'ChangeFinder', 'score', 'Scores', 'changefinder.scores', 'line'],
  20. 'lines': []
  21. },
  22. 'flags': {
  23. 'options': [None, 'ChangeFinder', 'flag', 'Flags', 'changefinder.flags', 'stacked'],
  24. 'lines': []
  25. }
  26. }
  27. DEFAULT_PROTOCOL = 'http'
  28. DEFAULT_HOST = '127.0.0.1:19999'
  29. DEFAULT_CHARTS_REGEX = 'system.*'
  30. DEFAULT_MODE = 'per_chart'
  31. DEFAULT_CF_R = 0.5
  32. DEFAULT_CF_ORDER = 1
  33. DEFAULT_CF_SMOOTH = 15
  34. DEFAULT_CF_DIFF = False
  35. DEFAULT_CF_THRESHOLD = 99
  36. DEFAULT_N_SCORE_SAMPLES = 14400
  37. DEFAULT_SHOW_SCORES = False
  38. class Service(UrlService):
  39. def __init__(self, configuration=None, name=None):
  40. UrlService.__init__(self, configuration=configuration, name=name)
  41. self.order = ORDER
  42. self.definitions = CHARTS
  43. self.protocol = self.configuration.get('protocol', DEFAULT_PROTOCOL)
  44. self.host = self.configuration.get('host', DEFAULT_HOST)
  45. self.url = '{}://{}/api/v1/allmetrics?format=json'.format(self.protocol, self.host)
  46. self.charts_regex = re.compile(self.configuration.get('charts_regex', DEFAULT_CHARTS_REGEX))
  47. self.charts_to_exclude = self.configuration.get('charts_to_exclude', '').split(',')
  48. self.mode = self.configuration.get('mode', DEFAULT_MODE)
  49. self.n_score_samples = int(self.configuration.get('n_score_samples', DEFAULT_N_SCORE_SAMPLES))
  50. self.show_scores = int(self.configuration.get('show_scores', DEFAULT_SHOW_SCORES))
  51. self.cf_r = float(self.configuration.get('cf_r', DEFAULT_CF_R))
  52. self.cf_order = int(self.configuration.get('cf_order', DEFAULT_CF_ORDER))
  53. self.cf_smooth = int(self.configuration.get('cf_smooth', DEFAULT_CF_SMOOTH))
  54. self.cf_diff = bool(self.configuration.get('cf_diff', DEFAULT_CF_DIFF))
  55. self.cf_threshold = float(self.configuration.get('cf_threshold', DEFAULT_CF_THRESHOLD))
  56. self.collected_dims = {'scores': set(), 'flags': set()}
  57. self.models = {}
  58. self.x_latest = {}
  59. self.scores_latest = {}
  60. self.scores_samples = {}
  61. def get_score(self, x, model):
  62. """Update the score for the model based on most recent data, flag if it's percentile passes self.cf_threshold.
  63. """
  64. # get score
  65. if model not in self.models:
  66. # initialise empty model if needed
  67. self.models[model] = changefinder.ChangeFinder(r=self.cf_r, order=self.cf_order, smooth=self.cf_smooth)
  68. # if the update for this step fails then just fallback to last known score
  69. try:
  70. score = self.models[model].update(x)
  71. self.scores_latest[model] = score
  72. except Exception as _:
  73. score = self.scores_latest.get(model, 0)
  74. score = 0 if np.isnan(score) else score
  75. # update sample scores used to calculate percentiles
  76. if model in self.scores_samples:
  77. self.scores_samples[model].append(score)
  78. else:
  79. self.scores_samples[model] = [score]
  80. self.scores_samples[model] = self.scores_samples[model][-self.n_score_samples:]
  81. # convert score to percentile
  82. score = percentileofscore(self.scores_samples[model], score)
  83. # flag based on score percentile
  84. flag = 1 if score >= self.cf_threshold else 0
  85. return score, flag
  86. def validate_charts(self, chart, data, algorithm='absolute', multiplier=1, divisor=1):
  87. """If dimension not in chart then add it.
  88. """
  89. if not self.charts:
  90. return
  91. for dim in data:
  92. if dim not in self.collected_dims[chart]:
  93. self.collected_dims[chart].add(dim)
  94. self.charts[chart].add_dimension([dim, dim, algorithm, multiplier, divisor])
  95. for dim in list(self.collected_dims[chart]):
  96. if dim not in data:
  97. self.collected_dims[chart].remove(dim)
  98. self.charts[chart].del_dimension(dim, hide=False)
  99. def diff(self, x, model):
  100. """Take difference of data.
  101. """
  102. x_diff = x - self.x_latest.get(model, 0)
  103. self.x_latest[model] = x
  104. x = x_diff
  105. return x
  106. def _get_data(self):
  107. # pull data from self.url
  108. raw_data = self._get_raw_data()
  109. if raw_data is None:
  110. return None
  111. raw_data = loads(raw_data)
  112. # filter to just the data for the charts specified
  113. charts_in_scope = list(filter(self.charts_regex.match, raw_data.keys()))
  114. charts_in_scope = [c for c in charts_in_scope if c not in self.charts_to_exclude]
  115. data_score = {}
  116. data_flag = {}
  117. # process each chart
  118. for chart in charts_in_scope:
  119. if self.mode == 'per_chart':
  120. # average dims on chart and run changefinder on that average
  121. x = [raw_data[chart]['dimensions'][dim]['value'] for dim in raw_data[chart]['dimensions']]
  122. x = [x for x in x if x is not None]
  123. if len(x) > 0:
  124. x = sum(x) / len(x)
  125. x = self.diff(x, chart) if self.cf_diff else x
  126. score, flag = self.get_score(x, chart)
  127. if self.show_scores:
  128. data_score['{}_score'.format(chart)] = score * 100
  129. data_flag[chart] = flag
  130. else:
  131. # run changefinder on each individual dim
  132. for dim in raw_data[chart]['dimensions']:
  133. chart_dim = '{}|{}'.format(chart, dim)
  134. x = raw_data[chart]['dimensions'][dim]['value']
  135. x = x if x else 0
  136. x = self.diff(x, chart_dim) if self.cf_diff else x
  137. score, flag = self.get_score(x, chart_dim)
  138. if self.show_scores:
  139. data_score['{}_score'.format(chart_dim)] = score * 100
  140. data_flag[chart_dim] = flag
  141. self.validate_charts('flags', data_flag)
  142. if self.show_scores & len(data_score) > 0:
  143. data_score['average_score'] = sum(data_score.values()) / len(data_score)
  144. self.validate_charts('scores', data_score, divisor=100)
  145. data = {**data_score, **data_flag}
  146. return data