Browse Source

Refactor ML code. (#14659)

* Refactor ML code.

This commit introduces only non-functional changes.

Originally, the C++ code exposed C functions to be called
from the rest of the agent. When we migrated from C++ to C,
we did not eliminate these wrapper functions to make the
PR easier to understand and keep the total LOC low.

This commit removes the wrapper functions and "reclaims" the
`ml_` prefix that we used for the public API of the old
implementation.

Also, the nlohmann Json library has been removed and its
functionality was replaced with the equivalent Json functionality
that we added in libnetdata's BUFFERs.

* Remove missing headers from build systems.

* Fix CMake build.

* rrddim_free is outside of rrd "internals" now.
vkalintiris 2 years ago
parent
commit
37a06960f9
10 changed files with 39 additions and 51 deletions
  1. 0 4
      .gitmodules
  2. 15 21
      CMakeLists.txt
  3. 1 3
      Makefile.am
  4. 2 2
      collectors/plugins.d/pluginsd_parser.c
  5. 1 1
      configure.ac
  6. 8 8
      database/rrd.h
  7. 3 3
      database/rrdhost.c
  8. 3 3
      database/rrdset.c
  9. 3 3
      ml/Config.cc
  10. 3 3
      ml/ad_charts.cc

+ 0 - 4
.gitmodules

@@ -9,7 +9,3 @@
 	url = https://github.com/davisking/dlib.git
 	url = https://github.com/davisking/dlib.git
 	shallow = true
 	shallow = true
 	ignore = dirty
 	ignore = dirty
-[submodule "ml/json"]
-	path = ml/json
-	url = https://github.com/nlohmann/json.git
-	shallow = true

+ 15 - 21
CMakeLists.txt

@@ -352,8 +352,7 @@ ENDIF()
 # Detect ml dependencies
 # Detect ml dependencies
 file(STRINGS "${CMAKE_SOURCE_DIR}/config.h" DEFINE_ENABLE_ML REGEX "^#define ENABLE_ML 1$")
 file(STRINGS "${CMAKE_SOURCE_DIR}/config.h" DEFINE_ENABLE_ML REGEX "^#define ENABLE_ML 1$")
 IF(DEFINE_ENABLE_ML MATCHES ".+" AND
 IF(DEFINE_ENABLE_ML MATCHES ".+" AND
-   EXISTS "${CMAKE_SOURCE_DIR}/ml/dlib/dlib/all/source.cpp" AND
-   EXISTS "${CMAKE_SOURCE_DIR}/ml/json/single_include/nlohmann/json.hpp")
+   EXISTS "${CMAKE_SOURCE_DIR}/ml/dlib/dlib/all/source.cpp")
     set(ENABLE_ML True)
     set(ENABLE_ML True)
     list(APPEND NETDATA_COMMON_CFLAGS "-DDLIB_NO_GUI_SUPPORT")
     list(APPEND NETDATA_COMMON_CFLAGS "-DDLIB_NO_GUI_SUPPORT")
     list(APPEND NETDATA_COMMON_INCLUDE_DIRS "ml/dlib")
     list(APPEND NETDATA_COMMON_INCLUDE_DIRS "ml/dlib")
@@ -718,12 +717,21 @@ set(STATSD_PLUGIN_FILES
         )
         )
 
 
 set(RRD_PLUGIN_FILES
 set(RRD_PLUGIN_FILES
+        database/contexts/api_v1.c
+        database/contexts/api_v2.c
+        database/contexts/context.c
+        database/contexts/instance.c
+        database/contexts/internal.h
+        database/contexts/metric.c
+        database/contexts/query_scope.c
+        database/contexts/query_target.c
+        database/contexts/rrdcontext.c
+        database/contexts/rrdcontext.h
+        database/contexts/worker.c
         database/rrdcalc.c
         database/rrdcalc.c
         database/rrdcalc.h
         database/rrdcalc.h
         database/rrdcalctemplate.c
         database/rrdcalctemplate.c
         database/rrdcalctemplate.h
         database/rrdcalctemplate.h
-        database/rrdcontext.c
-        database/rrdcontext.h
         database/rrddim.c
         database/rrddim.c
         database/rrddimvar.c
         database/rrddimvar.c
         database/rrddimvar.h
         database/rrddimvar.h
@@ -747,7 +755,7 @@ set(RRD_PLUGIN_FILES
         database/sqlite/sqlite_metadata.h
         database/sqlite/sqlite_metadata.h
         database/sqlite/sqlite_functions.c
         database/sqlite/sqlite_functions.c
         database/sqlite/sqlite_functions.h
         database/sqlite/sqlite_functions.h
-		database/sqlite/sqlite_context.c
+        database/sqlite/sqlite_context.c
         database/sqlite/sqlite_context.h
         database/sqlite/sqlite_context.h
         database/sqlite/sqlite_db_migration.c
         database/sqlite/sqlite_db_migration.c
         database/sqlite/sqlite_db_migration.h
         database/sqlite/sqlite_db_migration.h
@@ -1037,24 +1045,10 @@ set(ML_FILES
 IF(ENABLE_ML)
 IF(ENABLE_ML)
     message(STATUS "ML: enabled")
     message(STATUS "ML: enabled")
     list(APPEND ML_FILES
     list(APPEND ML_FILES
-            ml/ADCharts.h
-            ml/ADCharts.cc
-            ml/Chart.h
-            ml/Chart.cc
-            ml/Config.h
+            ml/ad_charts.h
+            ml/ad_charts.cc
             ml/Config.cc
             ml/Config.cc
-            ml/Dimension.h
-            ml/Dimension.cc
-            ml/Host.h
-            ml/Host.cc
-            ml/Mutex.h
-            ml/Query.h
-            ml/KMeans.h
-            ml/KMeans.cc
-            ml/SamplesBuffer.h
-            ml/SamplesBuffer.cc
             ml/dlib/dlib/all/source.cpp
             ml/dlib/dlib/all/source.cpp
-            ml/json/single_include/nlohmann/json.hpp
             ml/ml.cc
             ml/ml.cc
             ml/ml-private.h
             ml/ml-private.h
     )
     )

+ 1 - 3
Makefile.am

@@ -239,9 +239,7 @@ ML_FILES += \
     ml/ad_charts.cc \
     ml/ad_charts.cc \
     ml/Config.cc \
     ml/Config.cc \
     ml/dlib/dlib/all/source.cpp \
     ml/dlib/dlib/all/source.cpp \
-    ml/json/single_include/nlohmann/json.hpp \
-    ml/nml.h \
-    ml/nml.cc \
+    ml/ml-private.h \
     ml/ml.cc \
     ml/ml.cc \
     $(NULL)
     $(NULL)
 
 

+ 2 - 2
collectors/plugins.d/pluginsd_parser.c

@@ -1693,10 +1693,10 @@ PARSER_RC pluginsd_set_v2(char **words, size_t num_words, void *user) {
         flags = SN_EMPTY_SLOT;
         flags = SN_EMPTY_SLOT;
 
 
         if(u->v2.ml_locked)
         if(u->v2.ml_locked)
-            ml_is_anomalous(rd, u->v2.end_time, 0, false);
+            ml_dimension_is_anomalous(rd, u->v2.end_time, 0, false);
     }
     }
     else if(u->v2.ml_locked) {
     else if(u->v2.ml_locked) {
-        if (ml_is_anomalous(rd, u->v2.end_time, value, true)) {
+        if (ml_dimension_is_anomalous(rd, u->v2.end_time, value, true)) {
             // clear anomaly bit: 0 -> is anomalous, 1 -> not anomalous
             // clear anomaly bit: 0 -> is anomalous, 1 -> not anomalous
             flags &= ~((storage_number) SN_FLAG_NOT_ANOMALOUS);
             flags &= ~((storage_number) SN_FLAG_NOT_ANOMALOUS);
         }
         }

+ 1 - 1
configure.ac

@@ -1140,7 +1140,7 @@ fi
 
 
 # Check if submodules have not been fetched. Fail if ML was explicitly requested.
 # Check if submodules have not been fetched. Fail if ML was explicitly requested.
 AC_MSG_CHECKING([if git submodules are present for machine learning functionality])
 AC_MSG_CHECKING([if git submodules are present for machine learning functionality])
-if test -f "ml/dlib/dlib/all/source.cpp" -a -f "ml/json/single_include/nlohmann/json.hpp"; then
+if test -f "ml/dlib/dlib/all/source.cpp"; then
     AC_MSG_RESULT([yes])
     AC_MSG_RESULT([yes])
     have_ml_submodules="yes"
     have_ml_submodules="yes"
 else
 else

+ 8 - 8
database/rrd.h

@@ -30,9 +30,9 @@ typedef struct rrdhost_acquired RRDHOST_ACQUIRED;
 typedef struct rrdset_acquired RRDSET_ACQUIRED;
 typedef struct rrdset_acquired RRDSET_ACQUIRED;
 typedef struct rrddim_acquired RRDDIM_ACQUIRED;
 typedef struct rrddim_acquired RRDDIM_ACQUIRED;
 
 
-typedef struct ml_host ml_host_t;
-typedef struct ml_chart ml_chart_t;
-typedef struct ml_dimension ml_dimension_t;
+typedef struct ml_host rrd_ml_host_t;
+typedef struct ml_chart rrd_ml_chart_t;
+typedef struct ml_dimension rrd_ml_dimension_t;
 
 
 typedef enum __attribute__ ((__packed__)) {
 typedef enum __attribute__ ((__packed__)) {
     QUERY_SOURCE_UNKNOWN = 0,
     QUERY_SOURCE_UNKNOWN = 0,
@@ -363,7 +363,7 @@ struct rrddim {
     // ------------------------------------------------------------------------
     // ------------------------------------------------------------------------
     // operational state members
     // operational state members
 
 
-    ml_dimension_t *ml_dimension;                   // machine learning data about this dimension
+    rrd_ml_dimension_t *ml_dimension;                   // machine learning data about this dimension
 
 
     // ------------------------------------------------------------------------
     // ------------------------------------------------------------------------
     // linking to siblings and parents
     // linking to siblings and parents
@@ -626,7 +626,7 @@ struct rrdset {
     DICTIONARY *rrddimvar_root_index;               // dimension variables
     DICTIONARY *rrddimvar_root_index;               // dimension variables
                                                     // we use this dictionary to manage their allocation
                                                     // we use this dictionary to manage their allocation
 
 
-    ml_chart_t *ml_chart;
+    rrd_ml_chart_t *ml_chart;
 
 
     // ------------------------------------------------------------------------
     // ------------------------------------------------------------------------
     // operational state members
     // operational state members
@@ -1067,7 +1067,7 @@ struct rrdhost {
 
 
     // ------------------------------------------------------------------------
     // ------------------------------------------------------------------------
     // ML handle
     // ML handle
-    ml_host_t *ml_host;
+    rrd_ml_host_t *ml_host;
 
 
     // ------------------------------------------------------------------------
     // ------------------------------------------------------------------------
     // Support for host-level labels
     // Support for host-level labels
@@ -1358,13 +1358,13 @@ void rrdset_delete_files(RRDSET *st);
 void rrdset_save(RRDSET *st);
 void rrdset_save(RRDSET *st);
 void rrdset_free(RRDSET *st);
 void rrdset_free(RRDSET *st);
 
 
+void rrddim_free(RRDSET *st, RRDDIM *rd);
+
 #ifdef NETDATA_RRD_INTERNALS
 #ifdef NETDATA_RRD_INTERNALS
 
 
 char *rrdhost_cache_dir_for_rrdset_alloc(RRDHOST *host, const char *id);
 char *rrdhost_cache_dir_for_rrdset_alloc(RRDHOST *host, const char *id);
 const char *rrdset_cache_dir(RRDSET *st);
 const char *rrdset_cache_dir(RRDSET *st);
 
 
-void rrddim_free(RRDSET *st, RRDDIM *rd);
-
 void rrdset_reset(RRDSET *st);
 void rrdset_reset(RRDSET *st);
 void rrdset_delete_obsolete_dimensions(RRDSET *st);
 void rrdset_delete_obsolete_dimensions(RRDSET *st);
 
 

+ 3 - 3
database/rrdhost.c

@@ -525,7 +525,7 @@ int is_legacy = 1;
     rrdhost_load_rrdcontext_data(host);
     rrdhost_load_rrdcontext_data(host);
     if (!archived) {
     if (!archived) {
         ml_host_new(host);
         ml_host_new(host);
-        ml_start_training_thread(host);
+        ml_host_start_training_thread(host);
     } else
     } else
         rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED | RRDHOST_FLAG_ORPHAN);
         rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED | RRDHOST_FLAG_ORPHAN);
 
 
@@ -642,7 +642,7 @@ static void rrdhost_update(RRDHOST *host
         host->rrdpush_replication_step = rrdpush_replication_step;
         host->rrdpush_replication_step = rrdpush_replication_step;
 
 
         ml_host_new(host);
         ml_host_new(host);
-        ml_start_training_thread(host);
+        ml_host_start_training_thread(host);
         
         
         rrdhost_load_rrdcontext_data(host);
         rrdhost_load_rrdcontext_data(host);
         info("Host %s is not in archived mode anymore", rrdhost_hostname(host));
         info("Host %s is not in archived mode anymore", rrdhost_hostname(host));
@@ -1145,7 +1145,7 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) {
     rrdcalctemplate_index_destroy(host);
     rrdcalctemplate_index_destroy(host);
 
 
     // cleanup ML resources
     // cleanup ML resources
-    ml_stop_training_thread(host);
+    ml_host_stop_training_thread(host);
     ml_host_delete(host);
     ml_host_delete(host);
 
 
     freez(host->exporting_flags);
     freez(host->exporting_flags);

+ 3 - 3
database/rrdset.c

@@ -1406,7 +1406,7 @@ static inline size_t rrdset_done_interpolate(
             time_t current_time_s = (time_t) (next_store_ut / USEC_PER_SEC);
             time_t current_time_s = (time_t) (next_store_ut / USEC_PER_SEC);
 
 
             if(unlikely(!store_this_entry)) {
             if(unlikely(!store_this_entry)) {
-                (void) ml_is_anomalous(rd, current_time_s, 0, false);
+                (void) ml_dimension_is_anomalous(rd, current_time_s, 0, false);
 
 
                 if(rsb->wb && rsb->v2)
                 if(rsb->wb && rsb->v2)
                     rrddim_push_metrics_v2(rsb, rd, next_store_ut, NAN, SN_FLAG_NONE);
                     rrddim_push_metrics_v2(rsb, rd, next_store_ut, NAN, SN_FLAG_NONE);
@@ -1418,7 +1418,7 @@ static inline size_t rrdset_done_interpolate(
             if(likely(rd->updated && rd->collections_counter > 1 && iterations < gap_when_lost_iterations_above)) {
             if(likely(rd->updated && rd->collections_counter > 1 && iterations < gap_when_lost_iterations_above)) {
                 uint32_t dim_storage_flags = storage_flags;
                 uint32_t dim_storage_flags = storage_flags;
 
 
-                if (ml_is_anomalous(rd, current_time_s, new_value, true)) {
+                if (ml_dimension_is_anomalous(rd, current_time_s, new_value, true)) {
                     // clear anomaly bit: 0 -> is anomalous, 1 -> not anomalous
                     // clear anomaly bit: 0 -> is anomalous, 1 -> not anomalous
                     dim_storage_flags &= ~((storage_number)SN_FLAG_NOT_ANOMALOUS);
                     dim_storage_flags &= ~((storage_number)SN_FLAG_NOT_ANOMALOUS);
                 }
                 }
@@ -1430,7 +1430,7 @@ static inline size_t rrdset_done_interpolate(
                 rd->last_stored_value = new_value;
                 rd->last_stored_value = new_value;
             }
             }
             else {
             else {
-                (void) ml_is_anomalous(rd, current_time_s, 0, false);
+                (void) ml_dimension_is_anomalous(rd, current_time_s, 0, false);
 
 
                 rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING ", rrddim_name(rd), current_entry);
                 rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING ", rrddim_name(rd), current_entry);
 
 

+ 3 - 3
ml/Config.cc

@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-3.0-or-later
 // SPDX-License-Identifier: GPL-3.0-or-later
 
 
-#include "nml.h"
+#include "ml-private.h"
 
 
 /*
 /*
  * Global configuration instance to be shared between training and
  * Global configuration instance to be shared between training and
  * prediction threads.
  * prediction threads.
  */
  */
-nml_config_t Cfg;
+ml_config_t Cfg;
 
 
 template <typename T>
 template <typename T>
 static T clamp(const T& Value, const T& Min, const T& Max) {
 static T clamp(const T& Value, const T& Min, const T& Max) {
@@ -16,7 +16,7 @@ static T clamp(const T& Value, const T& Min, const T& Max) {
 /*
 /*
  * Initialize global configuration variable.
  * Initialize global configuration variable.
  */
  */
-void nml_config_load(nml_config_t *cfg) {
+void ml_config_load(ml_config_t *cfg) {
     const char *config_section_ml = CONFIG_SECTION_ML;
     const char *config_section_ml = CONFIG_SECTION_ML;
 
 
     bool enable_anomaly_detection = config_get_boolean(config_section_ml, "enabled", true);
     bool enable_anomaly_detection = config_get_boolean(config_section_ml, "enabled", true);

+ 3 - 3
ml/ad_charts.cc

@@ -2,7 +2,7 @@
 
 
 #include "ad_charts.h"
 #include "ad_charts.h"
 
 
-void nml_update_dimensions_chart(nml_host_t *host, const nml_machine_learning_stats_t &mls) {
+void ml_update_dimensions_chart(ml_host_t *host, const ml_machine_learning_stats_t &mls) {
     /*
     /*
      * Machine learning status
      * Machine learning status
     */
     */
@@ -182,7 +182,7 @@ void nml_update_dimensions_chart(nml_host_t *host, const nml_machine_learning_st
 
 
 }
 }
 
 
-void nml_update_host_and_detection_rate_charts(nml_host_t *host, collected_number AnomalyRate) {
+void ml_update_host_and_detection_rate_charts(ml_host_t *host, collected_number AnomalyRate) {
     /*
     /*
      * Anomaly rate
      * Anomaly rate
     */
     */
@@ -301,7 +301,7 @@ void nml_update_host_and_detection_rate_charts(nml_host_t *host, collected_numbe
     }
     }
 }
 }
 
 
-void nml_update_training_statistics_chart(nml_host_t *host, const nml_training_stats_t &ts) {
+void ml_update_training_statistics_chart(ml_host_t *host, const ml_training_stats_t &ts) {
     /*
     /*
      * queue stats
      * queue stats
     */
     */

Some files were not shown because too many files changed in this diff