SamplesBuffer.cc 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. //
  3. #include "SamplesBuffer.h"
  4. #include <fstream>
  5. #include <sstream>
  6. #include <string>
  7. void Sample::print(std::ostream &OS) const {
  8. for (size_t Idx = 0; Idx != NumDims - 1; Idx++)
  9. OS << CNs[Idx] << ", ";
  10. OS << CNs[NumDims - 1];
  11. }
  12. void SamplesBuffer::print(std::ostream &OS) const {
  13. for (size_t Idx = Preprocessed ? (DiffN + (SmoothN - 1) + (LagN)) : 0;
  14. Idx != NumSamples; Idx++) {
  15. Sample S = Preprocessed ? getPreprocessedSample(Idx) : getSample(Idx);
  16. OS << S << std::endl;
  17. }
  18. }
  19. std::vector<Sample> SamplesBuffer::getPreprocessedSamples() const {
  20. std::vector<Sample> V;
  21. for (size_t Idx = Preprocessed ? (DiffN + (SmoothN - 1) + (LagN)) : 0;
  22. Idx != NumSamples; Idx++) {
  23. Sample S = Preprocessed ? getPreprocessedSample(Idx) : getSample(Idx);
  24. V.push_back(S);
  25. }
  26. return V;
  27. }
  28. void SamplesBuffer::diffSamples() {
  29. // Panda's DataFrame default behaviour is to subtract each element from
  30. // itself. For us `DiffN = 0` means "disable diff-ing" when preprocessing
  31. // the samples buffer. This deviation will make it easier for us to test
  32. // the KMeans implementation.
  33. if (DiffN == 0)
  34. return;
  35. for (size_t Idx = 0; Idx != (NumSamples - DiffN); Idx++) {
  36. size_t High = (NumSamples - 1) - Idx;
  37. size_t Low = High - DiffN;
  38. Sample LHS = getSample(High);
  39. Sample RHS = getSample(Low);
  40. LHS.diff(RHS);
  41. }
  42. }
  43. void SamplesBuffer::smoothSamples() {
  44. // Holds the mean value of each window
  45. CalculatedNumber AccCNs[1] = { 0 };
  46. Sample Acc(AccCNs, 1);
  47. // Used to avoid clobbering the accumulator when moving the window
  48. CalculatedNumber TmpCNs[1] = { 0 };
  49. Sample Tmp(TmpCNs, 1);
  50. CalculatedNumber Factor = (CalculatedNumber) 1 / SmoothN;
  51. // Calculate the value of the 1st window
  52. for (size_t Idx = 0; Idx != std::min(SmoothN, NumSamples); Idx++) {
  53. Tmp.add(getSample(NumSamples - (Idx + 1)));
  54. }
  55. Acc.add(Tmp);
  56. Acc.scale(Factor);
  57. // Move the window and update the samples
  58. for (size_t Idx = NumSamples; Idx != (DiffN + SmoothN - 1); Idx--) {
  59. Sample S = getSample(Idx - 1);
  60. // Tmp <- Next window (if any)
  61. if (Idx >= (SmoothN + 1)) {
  62. Tmp.diff(S);
  63. Tmp.add(getSample(Idx - (SmoothN + 1)));
  64. }
  65. // S <- Acc
  66. S.copy(Acc);
  67. // Acc <- Tmp
  68. Acc.copy(Tmp);
  69. Acc.scale(Factor);
  70. }
  71. }
  72. void SamplesBuffer::lagSamples() {
  73. if (LagN == 0)
  74. return;
  75. for (size_t Idx = NumSamples; Idx != LagN; Idx--) {
  76. Sample PS = getPreprocessedSample(Idx - 1);
  77. PS.lag(getSample(Idx - 1), LagN);
  78. }
  79. }
  80. void SamplesBuffer::preprocess(std::vector<DSample> &Samples) {
  81. assert(Preprocessed == false);
  82. size_t OutN = NumSamples;
  83. // Diff
  84. if (DiffN >= OutN)
  85. return;
  86. OutN -= DiffN;
  87. diffSamples();
  88. // Smooth
  89. if (SmoothN == 0 || SmoothN > OutN)
  90. return;
  91. OutN -= (SmoothN - 1);
  92. smoothSamples();
  93. // Lag
  94. if (LagN >= OutN)
  95. return;
  96. OutN -= LagN;
  97. lagSamples();
  98. Samples.reserve(OutN);
  99. Preprocessed = true;
  100. uint32_t MaxMT = std::numeric_limits<uint32_t>::max();
  101. uint32_t CutOff = static_cast<double>(MaxMT) * SamplingRatio;
  102. for (size_t Idx = NumSamples - OutN; Idx != NumSamples; Idx++) {
  103. if (RandNums[Idx] > CutOff)
  104. continue;
  105. DSample DS;
  106. DS.set_size(NumDimsPerSample * (LagN + 1));
  107. const Sample PS = getPreprocessedSample(Idx);
  108. PS.initDSample(DS);
  109. Samples.push_back(std::move(DS));
  110. }
  111. }
  112. void SamplesBuffer::preprocess(DSample &Feature) {
  113. assert(Preprocessed == false);
  114. size_t OutN = NumSamples;
  115. // Diff
  116. if (DiffN >= OutN)
  117. return;
  118. OutN -= DiffN;
  119. diffSamples();
  120. // Smooth
  121. if (SmoothN == 0 || SmoothN > OutN)
  122. return;
  123. OutN -= (SmoothN - 1);
  124. smoothSamples();
  125. // Lag
  126. if (LagN >= OutN)
  127. return;
  128. OutN -= LagN;
  129. lagSamples();
  130. Preprocessed = true;
  131. uint32_t MaxMT = std::numeric_limits<uint32_t>::max();
  132. uint32_t CutOff = static_cast<double>(MaxMT) * SamplingRatio;
  133. for (size_t Idx = NumSamples - OutN; Idx != NumSamples; Idx++) {
  134. if (RandNums[Idx] > CutOff)
  135. continue;
  136. Feature.set_size(NumDimsPerSample * (LagN + 1));
  137. const Sample PS = getPreprocessedSample(Idx);
  138. PS.initDSample(Feature);
  139. }
  140. }