SMusatov
/
netdata
mirror of https://github.com/netdata/netdata.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
							// SPDX-License-Identifier: GPL-3.0-or-later

#include "Config.h"
#include "Dimension.h"
#include "Query.h"

using namespace ml;

/*
 * Copy of the unpack_storage_number which allows us to convert
 * a storage_number to double.
 */
static CalculatedNumber unpack_storage_number_dbl(storage_number value) {
    if(!value)
        return 0;

    int sign = 0, exp = 0;
    int factor = 10;

    // bit 32 = 0:positive, 1:negative
    if(unlikely(value & (1 << 31)))
        sign = 1;

    // bit 31 = 0:divide, 1:multiply
    if(unlikely(value & (1 << 30)))
        exp = 1;

    // bit 27 SN_EXISTS_100
    if(unlikely(value & (1 << 26)))
        factor = 100;

    // bit 26 SN_EXISTS_RESET
    // bit 25 SN_ANOMALY_BIT

    // bit 30, 29, 28 = (multiplier or divider) 0-7 (8 total)
    int mul = (value & ((1<<29)|(1<<28)|(1<<27))) >> 27;

    // bit 24 to bit 1 = the value, so remove all other bits
    value ^= value & ((1<<31)|(1<<30)|(1<<29)|(1<<28)|(1<<27)|(1<<26)|(1<<25)|(1<<24));

    CalculatedNumber CN = value;

    if(exp) {
        for(; mul; mul--)
            CN *= factor;
    }
    else {
        for( ; mul ; mul--)
            CN /= 10;
    }

    if(sign)
        CN = -CN;

    return CN;
}

std::pair<CalculatedNumber *, size_t>
TrainableDimension::getCalculatedNumbers() {
    size_t MinN = Cfg.MinTrainSamples;
    size_t MaxN = Cfg.MaxTrainSamples;

    // Figure out what our time window should be.
    time_t BeforeT = now_realtime_sec() - 1;
    time_t AfterT = BeforeT - (MaxN * updateEvery());

    BeforeT -= (BeforeT % updateEvery());
    AfterT -= (AfterT % updateEvery());

    BeforeT = std::min(BeforeT, latestTime());
    AfterT = std::max(AfterT, oldestTime());

    if (AfterT >= BeforeT)
        return { nullptr, 0 };

    CalculatedNumber *CNs = new CalculatedNumber[MaxN * (Cfg.LagN + 1)]();

    // Start the query.
    unsigned Idx = 0;
    unsigned CollectedValues = 0;
    unsigned TotalValues = 0;

    CalculatedNumber LastValue = std::numeric_limits<CalculatedNumber>::quiet_NaN();
    Query Q = Query(getRD());

    Q.init(AfterT, BeforeT);
    while (!Q.isFinished()) {
        if (Idx == MaxN)
            break;

        auto P = Q.nextMetric();
        storage_number SN = P.second;

        if (does_storage_number_exist(SN)) {
            CNs[Idx] = unpack_storage_number_dbl(SN);
            LastValue = CNs[Idx];
            CollectedValues++;
        } else
            CNs[Idx] = LastValue;

        Idx++;
    }
    TotalValues = Idx;

    if (CollectedValues < MinN) {
        delete[] CNs;
        return { nullptr, 0 };
    }

    // Find first non-NaN value.
    for (Idx = 0; std::isnan(CNs[Idx]); Idx++, TotalValues--) { }

    // Overwrite NaN values.
    if (Idx != 0)
        memmove(CNs, &CNs[Idx], sizeof(CalculatedNumber) * TotalValues);

    return { CNs, TotalValues };
}

MLResult TrainableDimension::trainModel() {
    auto P = getCalculatedNumbers();
    CalculatedNumber *CNs = P.first;
    unsigned N = P.second;

    if (!CNs)
        return MLResult::MissingData;

    unsigned TargetNumSamples = Cfg.MaxTrainSamples * Cfg.RandomSamplingRatio;
    double SamplingRatio = std::min(static_cast<double>(TargetNumSamples) / N, 1.0);

    SamplesBuffer SB = SamplesBuffer(CNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN,
                                     SamplingRatio, Cfg.RandomNums);
    KM.train(SB, Cfg.MaxKMeansIters);

    Trained = true;
    ConstantModel = true;

    delete[] CNs;
    return MLResult::Success;
}

void PredictableDimension::addValue(CalculatedNumber Value, bool Exists) {
    if (!Exists) {
        CNs.clear();
        return;
    }

    unsigned N = Cfg.DiffN + Cfg.SmoothN + Cfg.LagN;
    if (CNs.size() < N) {
        CNs.push_back(Value);
        return;
    }

    std::rotate(std::begin(CNs), std::begin(CNs) + 1, std::end(CNs));

    if (CNs[N - 1] != Value)
        ConstantModel = false;

    CNs[N - 1] = Value;
}

std::pair<MLResult, bool> PredictableDimension::predict() {
    unsigned N = Cfg.DiffN + Cfg.SmoothN + Cfg.LagN;
    if (CNs.size() != N) {
        AnomalyBit = false;
        return { MLResult::MissingData, AnomalyBit };
    }

    CalculatedNumber *TmpCNs = new CalculatedNumber[N * (Cfg.LagN + 1)]();
    std::memcpy(TmpCNs, CNs.data(), N * sizeof(CalculatedNumber));

    SamplesBuffer SB = SamplesBuffer(TmpCNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN,
                                     1.0, Cfg.RandomNums);
    AnomalyScore = computeAnomalyScore(SB);
    delete[] TmpCNs;

    if (AnomalyScore == std::numeric_limits<CalculatedNumber>::quiet_NaN()) {
        AnomalyBit = false;
        return { MLResult::NaN, AnomalyBit };
    }

    AnomalyBit = AnomalyScore >= (100 * Cfg.DimensionAnomalyScoreThreshold);
    return { MLResult::Success, AnomalyBit };
}