Dimension.h 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #ifndef ML_DIMENSION_H
  3. #define ML_DIMENSION_H
  4. #include "Mutex.h"
  5. #include "Stats.h"
  6. #include "Query.h"
  7. #include "Config.h"
  8. #include "ml-private.h"
  9. namespace ml {
  10. static inline std::string getMLDimensionID(RRDDIM *RD) {
  11. RRDSET *RS = RD->rrdset;
  12. std::stringstream SS;
  13. SS << rrdset_context(RS) << "|" << rrdset_id(RS) << "|" << rrddim_name(RD);
  14. return SS.str();
  15. }
  16. enum class MachineLearningStatus {
  17. // Enable training/prediction
  18. Enabled,
  19. // Disable due to update every being different from the host's
  20. DisabledDueToUniqueUpdateEvery,
  21. // Disable because configuration pattern matches the chart's id
  22. DisabledDueToExcludedChart,
  23. };
  24. enum class TrainingStatus {
  25. // We don't have a model for this dimension
  26. Untrained,
  27. // Request for training sent, but we don't have any models yet
  28. PendingWithoutModel,
  29. // Request to update existing models sent
  30. PendingWithModel,
  31. // Have a valid, up-to-date model
  32. Trained,
  33. };
  34. enum class MetricType {
  35. // The dimension has constant values, no need to train
  36. Constant,
  37. // The dimension's values fluctuate, we need to generate a model
  38. Variable,
  39. };
  40. struct TrainingRequest {
  41. // Chart/dimension we want to train
  42. STRING *ChartId;
  43. STRING *DimensionId;
  44. // Creation time of request
  45. time_t RequestTime;
  46. // First/last entry of this dimension in DB
  47. // at the point the request was made
  48. time_t FirstEntryOnRequest;
  49. time_t LastEntryOnRequest;
  50. };
  51. void dumpTrainingRequest(const TrainingRequest &TrainingReq, const char *Prefix);
  52. enum TrainingResult {
  53. // We managed to create a KMeans model
  54. Ok,
  55. // Could not query DB with a correct time range
  56. InvalidQueryTimeRange,
  57. // Did not gather enough data from DB to run KMeans
  58. NotEnoughCollectedValues,
  59. // Acquired a null dimension
  60. NullAcquiredDimension,
  61. // Chart is under replication
  62. ChartUnderReplication,
  63. };
  64. struct TrainingResponse {
  65. // Time when the request for this response was made
  66. time_t RequestTime;
  67. // First/last entry of the dimension in DB when generating the request
  68. time_t FirstEntryOnRequest;
  69. time_t LastEntryOnRequest;
  70. // First/last entry of the dimension in DB when generating the response
  71. time_t FirstEntryOnResponse;
  72. time_t LastEntryOnResponse;
  73. // After/Before timestamps of our DB query
  74. time_t QueryAfterT;
  75. time_t QueryBeforeT;
  76. // Actual after/before returned by the DB query ops
  77. time_t DbAfterT;
  78. time_t DbBeforeT;
  79. // Number of doubles returned by the DB query
  80. size_t CollectedValues;
  81. // Number of values we return to the caller
  82. size_t TotalValues;
  83. // Result of training response
  84. TrainingResult Result;
  85. };
  86. void dumpTrainingResponse(const TrainingResponse &TrainingResp, const char *Prefix);
  87. class Dimension {
  88. public:
  89. Dimension(RRDDIM *RD) :
  90. RD(RD),
  91. MT(MetricType::Constant),
  92. TS(TrainingStatus::Untrained),
  93. TR(),
  94. LastTrainingTime(0)
  95. {
  96. if (simple_pattern_matches(Cfg.SP_ChartsToSkip, rrdset_name(RD->rrdset)))
  97. MLS = MachineLearningStatus::DisabledDueToExcludedChart;
  98. else if (RD->update_every != RD->rrdset->rrdhost->rrd_update_every)
  99. MLS = MachineLearningStatus::DisabledDueToUniqueUpdateEvery;
  100. else
  101. MLS = MachineLearningStatus::Enabled;
  102. Models.reserve(Cfg.NumModelsToUse);
  103. }
  104. RRDDIM *getRD() const {
  105. return RD;
  106. }
  107. unsigned updateEvery() const {
  108. return RD->update_every;
  109. }
  110. MetricType getMT() const {
  111. return MT;
  112. }
  113. TrainingStatus getTS() const {
  114. return TS;
  115. }
  116. MachineLearningStatus getMLS() const {
  117. return MLS;
  118. }
  119. TrainingResult trainModel(const TrainingRequest &TR);
  120. void scheduleForTraining(time_t CurrT);
  121. bool predict(time_t CurrT, CalculatedNumber Value, bool Exists);
  122. std::vector<KMeans> getModels();
  123. void dump() const;
  124. private:
  125. TrainingRequest getTrainingRequest(time_t CurrT) const {
  126. return TrainingRequest {
  127. string_dup(RD->rrdset->id),
  128. string_dup(RD->id),
  129. CurrT,
  130. rrddim_first_entry_s(RD),
  131. rrddim_last_entry_s(RD)
  132. };
  133. }
  134. private:
  135. std::pair<CalculatedNumber *, TrainingResponse> getCalculatedNumbers(const TrainingRequest &TrainingReq);
  136. public:
  137. RRDDIM *RD;
  138. MetricType MT;
  139. TrainingStatus TS;
  140. TrainingResponse TR;
  141. time_t LastTrainingTime;
  142. MachineLearningStatus MLS;
  143. std::vector<CalculatedNumber> CNs;
  144. DSample Feature;
  145. std::vector<KMeans> Models;
  146. Mutex M;
  147. };
  148. } // namespace ml
  149. #endif /* ML_DIMENSION_H */