SMusatov
/
netdata
mirror of https://github.com/netdata/netdata.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
							// SPDX-License-Identifier: GPL-3.0-or-later
//
#include "SamplesBuffer.h"

#include <fstream>
#include <sstream>
#include <string>

void Sample::print(std::ostream &OS) const {
    for (size_t Idx = 0; Idx != NumDims - 1; Idx++)
        OS << CNs[Idx] << ", ";

    OS << CNs[NumDims - 1];
}

void SamplesBuffer::print(std::ostream &OS) const {
    for (size_t Idx = Preprocessed ? (DiffN + (SmoothN - 1) + (LagN)) : 0;
         Idx != NumSamples; Idx++) {
        Sample S = Preprocessed ? getPreprocessedSample(Idx) : getSample(Idx);
        OS << S << std::endl;
    }
}

std::vector<Sample> SamplesBuffer::getPreprocessedSamples() const {
    std::vector<Sample> V;

    for (size_t Idx = Preprocessed ? (DiffN + (SmoothN - 1) + (LagN)) : 0;
         Idx != NumSamples; Idx++) {
        Sample S = Preprocessed ? getPreprocessedSample(Idx) : getSample(Idx);
        V.push_back(S);
    }

    return V;
}

void SamplesBuffer::diffSamples() {
    // Panda's DataFrame default behaviour is to subtract each element from
    // itself. For us `DiffN = 0` means "disable diff-ing" when preprocessing
    // the samples buffer. This deviation will make it easier for us to test
    // the KMeans implementation.
    if (DiffN == 0)
        return;

    for (size_t Idx = 0; Idx != (NumSamples - DiffN); Idx++) {
        size_t High = (NumSamples - 1) - Idx;
        size_t Low = High - DiffN;

        Sample LHS = getSample(High);
        Sample RHS = getSample(Low);

        LHS.diff(RHS);
    }
}

void SamplesBuffer::smoothSamples() {
    // Holds the mean value of each window
    CalculatedNumber AccCNs[1] = { 0 };
    Sample Acc(AccCNs, 1);

    // Used to avoid clobbering the accumulator when moving the window
    CalculatedNumber TmpCNs[1] = { 0 };
    Sample Tmp(TmpCNs, 1);

    CalculatedNumber Factor = (CalculatedNumber) 1 / SmoothN;

    // Calculate the value of the 1st window
    for (size_t Idx = 0; Idx != std::min(SmoothN, NumSamples); Idx++) {
        Tmp.add(getSample(NumSamples - (Idx + 1)));
    }

    Acc.add(Tmp);
    Acc.scale(Factor);

    // Move the window and update the samples
    for (size_t Idx = NumSamples; Idx != (DiffN + SmoothN - 1); Idx--) {
        Sample S = getSample(Idx - 1);

        // Tmp <- Next window (if any)
        if (Idx >= (SmoothN + 1)) {
            Tmp.diff(S);
            Tmp.add(getSample(Idx - (SmoothN + 1)));
        }

        // S <- Acc
        S.copy(Acc);

        // Acc <- Tmp
        Acc.copy(Tmp);
        Acc.scale(Factor);
    }
}

void SamplesBuffer::lagSamples() {
    if (LagN == 0)
        return;

    for (size_t Idx = NumSamples; Idx != LagN; Idx--) {
        Sample PS = getPreprocessedSample(Idx - 1);
        PS.lag(getSample(Idx - 1), LagN);
    }
}

void SamplesBuffer::preprocess(std::vector<DSample> &Samples) {
    assert(Preprocessed == false);

    size_t OutN = NumSamples;

    // Diff
    if (DiffN >= OutN)
        return;
    OutN -= DiffN;
    diffSamples();

    // Smooth
    if (SmoothN == 0 || SmoothN > OutN)
        return;
    OutN -= (SmoothN - 1);
    smoothSamples();

    // Lag
    if (LagN >= OutN)
        return;
    OutN -= LagN;
    lagSamples();

    Samples.reserve(OutN);
    Preprocessed = true;

    uint32_t MaxMT = std::numeric_limits<uint32_t>::max();
    uint32_t CutOff = static_cast<double>(MaxMT) * SamplingRatio;

    for (size_t Idx = NumSamples - OutN; Idx != NumSamples; Idx++) {
        if (RandNums[Idx] > CutOff)
            continue;

        DSample DS;
        DS.set_size(NumDimsPerSample * (LagN + 1));

        const Sample PS = getPreprocessedSample(Idx);
        PS.initDSample(DS);

        Samples.push_back(std::move(DS));
    }
}

void SamplesBuffer::preprocess(DSample &Feature) {
    assert(Preprocessed == false);

    size_t OutN = NumSamples;

    // Diff
    if (DiffN >= OutN)
        return;
    OutN -= DiffN;
    diffSamples();

    // Smooth
    if (SmoothN == 0 || SmoothN > OutN)
        return;
    OutN -= (SmoothN - 1);
    smoothSamples();

    // Lag
    if (LagN >= OutN)
        return;
    OutN -= LagN;
    lagSamples();

    Preprocessed = true;

    uint32_t MaxMT = std::numeric_limits<uint32_t>::max();
    uint32_t CutOff = static_cast<double>(MaxMT) * SamplingRatio;

    for (size_t Idx = NumSamples - OutN; Idx != NumSamples; Idx++) {
        if (RandNums[Idx] > CutOff)
            continue;

        Feature.set_size(NumDimsPerSample * (LagN + 1));

        const Sample PS = getPreprocessedSample(Idx);
        PS.initDSample(Feature);
    }
}