123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183 |
- // SPDX-License-Identifier: GPL-3.0-or-later
- //
- #include "SamplesBuffer.h"
- #include <fstream>
- #include <sstream>
- #include <string>
- void Sample::print(std::ostream &OS) const {
- for (size_t Idx = 0; Idx != NumDims - 1; Idx++)
- OS << CNs[Idx] << ", ";
- OS << CNs[NumDims - 1];
- }
- void SamplesBuffer::print(std::ostream &OS) const {
- for (size_t Idx = Preprocessed ? (DiffN + (SmoothN - 1) + (LagN)) : 0;
- Idx != NumSamples; Idx++) {
- Sample S = Preprocessed ? getPreprocessedSample(Idx) : getSample(Idx);
- OS << S << std::endl;
- }
- }
- std::vector<Sample> SamplesBuffer::getPreprocessedSamples() const {
- std::vector<Sample> V;
- for (size_t Idx = Preprocessed ? (DiffN + (SmoothN - 1) + (LagN)) : 0;
- Idx != NumSamples; Idx++) {
- Sample S = Preprocessed ? getPreprocessedSample(Idx) : getSample(Idx);
- V.push_back(S);
- }
- return V;
- }
- void SamplesBuffer::diffSamples() {
- // Panda's DataFrame default behaviour is to subtract each element from
- // itself. For us `DiffN = 0` means "disable diff-ing" when preprocessing
- // the samples buffer. This deviation will make it easier for us to test
- // the KMeans implementation.
- if (DiffN == 0)
- return;
- for (size_t Idx = 0; Idx != (NumSamples - DiffN); Idx++) {
- size_t High = (NumSamples - 1) - Idx;
- size_t Low = High - DiffN;
- Sample LHS = getSample(High);
- Sample RHS = getSample(Low);
- LHS.diff(RHS);
- }
- }
- void SamplesBuffer::smoothSamples() {
- // Holds the mean value of each window
- CalculatedNumber AccCNs[1] = { 0 };
- Sample Acc(AccCNs, 1);
- // Used to avoid clobbering the accumulator when moving the window
- CalculatedNumber TmpCNs[1] = { 0 };
- Sample Tmp(TmpCNs, 1);
- CalculatedNumber Factor = (CalculatedNumber) 1 / SmoothN;
- // Calculate the value of the 1st window
- for (size_t Idx = 0; Idx != std::min(SmoothN, NumSamples); Idx++) {
- Tmp.add(getSample(NumSamples - (Idx + 1)));
- }
- Acc.add(Tmp);
- Acc.scale(Factor);
- // Move the window and update the samples
- for (size_t Idx = NumSamples; Idx != (DiffN + SmoothN - 1); Idx--) {
- Sample S = getSample(Idx - 1);
- // Tmp <- Next window (if any)
- if (Idx >= (SmoothN + 1)) {
- Tmp.diff(S);
- Tmp.add(getSample(Idx - (SmoothN + 1)));
- }
- // S <- Acc
- S.copy(Acc);
- // Acc <- Tmp
- Acc.copy(Tmp);
- Acc.scale(Factor);
- }
- }
- void SamplesBuffer::lagSamples() {
- if (LagN == 0)
- return;
- for (size_t Idx = NumSamples; Idx != LagN; Idx--) {
- Sample PS = getPreprocessedSample(Idx - 1);
- PS.lag(getSample(Idx - 1), LagN);
- }
- }
- void SamplesBuffer::preprocess(std::vector<DSample> &Samples) {
- assert(Preprocessed == false);
- size_t OutN = NumSamples;
- // Diff
- if (DiffN >= OutN)
- return;
- OutN -= DiffN;
- diffSamples();
- // Smooth
- if (SmoothN == 0 || SmoothN > OutN)
- return;
- OutN -= (SmoothN - 1);
- smoothSamples();
- // Lag
- if (LagN >= OutN)
- return;
- OutN -= LagN;
- lagSamples();
- Samples.reserve(OutN);
- Preprocessed = true;
- uint32_t MaxMT = std::numeric_limits<uint32_t>::max();
- uint32_t CutOff = static_cast<double>(MaxMT) * SamplingRatio;
- for (size_t Idx = NumSamples - OutN; Idx != NumSamples; Idx++) {
- if (RandNums[Idx] > CutOff)
- continue;
- DSample DS;
- DS.set_size(NumDimsPerSample * (LagN + 1));
- const Sample PS = getPreprocessedSample(Idx);
- PS.initDSample(DS);
- Samples.push_back(std::move(DS));
- }
- }
- void SamplesBuffer::preprocess(DSample &Feature) {
- assert(Preprocessed == false);
- size_t OutN = NumSamples;
- // Diff
- if (DiffN >= OutN)
- return;
- OutN -= DiffN;
- diffSamples();
- // Smooth
- if (SmoothN == 0 || SmoothN > OutN)
- return;
- OutN -= (SmoothN - 1);
- smoothSamples();
- // Lag
- if (LagN >= OutN)
- return;
- OutN -= LagN;
- lagSamples();
- Preprocessed = true;
- uint32_t MaxMT = std::numeric_limits<uint32_t>::max();
- uint32_t CutOff = static_cast<double>(MaxMT) * SamplingRatio;
- for (size_t Idx = NumSamples - OutN; Idx != NumSamples; Idx++) {
- if (RandNums[Idx] > CutOff)
- continue;
- Feature.set_size(NumDimsPerSample * (LagN + 1));
- const Sample PS = getPreprocessedSample(Idx);
- PS.initDSample(Feature);
- }
- }
|