rrdhost.c 61 KB


  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #define NETDATA_RRD_INTERNALS
  3. #include "rrd.h"
  4. static void rrdhost_streaming_sender_structures_init(RRDHOST *host);
  5. bool dbengine_enabled = false; // will become true if and when dbengine is initialized
  6. size_t storage_tiers = 3;
  7. bool use_direct_io = true;
  8. size_t storage_tiers_grouping_iterations[RRD_STORAGE_TIERS] = { 1, 60, 60, 60, 60 };
  9. RRD_BACKFILL storage_tiers_backfill[RRD_STORAGE_TIERS] = { RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW };
  10. #if RRD_STORAGE_TIERS != 5
  11. #error RRD_STORAGE_TIERS is not 5 - you need to update the grouping iterations per tier
  12. #endif
  13. size_t get_tier_grouping(size_t tier) {
  14. if(unlikely(tier >= storage_tiers)) tier = storage_tiers - 1;
  15. size_t grouping = 1;
  16. // first tier is always 1 iteration of whatever update every the chart has
  17. for(size_t i = 1; i <= tier ;i++)
  18. grouping *= storage_tiers_grouping_iterations[i];
  19. return grouping;
  20. }
  21. RRDHOST *localhost = NULL;
  22. netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER;
  23. time_t rrdset_free_obsolete_time_s = 3600;
  24. time_t rrdhost_free_orphan_time_s = 3600;
  25. bool is_storage_engine_shared(STORAGE_INSTANCE *engine __maybe_unused) {
  26. #ifdef ENABLE_DBENGINE
  27. if(!rrdeng_is_legacy(engine))
  28. return true;
  29. #endif
  30. return false;
  31. }
  32. // ----------------------------------------------------------------------------
  33. // RRDHOST indexes management
  34. DICTIONARY *rrdhost_root_index = NULL;
  35. static DICTIONARY *rrdhost_root_index_hostname = NULL;
  36. static inline void rrdhost_init() {
  37. if(unlikely(!rrdhost_root_index)) {
  38. rrdhost_root_index = dictionary_create_advanced(
  39. DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE,
  40. &dictionary_stats_category_rrdhost, 0);
  41. }
  42. if(unlikely(!rrdhost_root_index_hostname)) {
  43. rrdhost_root_index_hostname = dictionary_create_advanced(
  44. DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE,
  45. &dictionary_stats_category_rrdhost, 0);
  46. }
  47. }
  48. // ----------------------------------------------------------------------------
  49. // RRDHOST index by UUID
  50. inline size_t rrdhost_hosts_available(void) {
  51. return dictionary_entries(rrdhost_root_index);
  52. }
  53. inline RRDHOST *rrdhost_find_by_guid(const char *guid) {
  54. return dictionary_get(rrdhost_root_index, guid);
  55. }
  56. static inline RRDHOST *rrdhost_index_add_by_guid(RRDHOST *host) {
  57. RRDHOST *ret_machine_guid = dictionary_set(rrdhost_root_index, host->machine_guid, host, sizeof(RRDHOST));
  58. if(ret_machine_guid == host)
  59. rrdhost_option_set(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID);
  60. else {
  61. rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID);
  62. error("RRDHOST: %s() host with machine guid '%s' is already indexed", __FUNCTION__, host->machine_guid);
  63. }
  64. return host;
  65. }
  66. static void rrdhost_index_del_by_guid(RRDHOST *host) {
  67. if(rrdhost_option_check(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID)) {
  68. if(!dictionary_del(rrdhost_root_index, host->machine_guid))
  69. error("RRDHOST: %s() failed to delete machine guid '%s' from index", __FUNCTION__, host->machine_guid);
  70. rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID);
  71. }
  72. }
  73. // ----------------------------------------------------------------------------
  74. // RRDHOST index by hostname
  75. inline RRDHOST *rrdhost_find_by_hostname(const char *hostname) {
  76. if(unlikely(!strcmp(hostname, "localhost")))
  77. return localhost;
  78. return dictionary_get(rrdhost_root_index_hostname, hostname);
  79. }
  80. static inline RRDHOST *rrdhost_index_add_hostname(RRDHOST *host) {
  81. if(!host->hostname) return host;
  82. RRDHOST *ret_hostname = dictionary_set(rrdhost_root_index_hostname, rrdhost_hostname(host), host, sizeof(RRDHOST));
  83. if(ret_hostname == host)
  84. rrdhost_option_set(host, RRDHOST_OPTION_INDEXED_HOSTNAME);
  85. else {
  86. rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_HOSTNAME);
  87. error("RRDHOST: %s() host with hostname '%s' is already indexed", __FUNCTION__, rrdhost_hostname(host));
  88. }
  89. return host;
  90. }
  91. static inline void rrdhost_index_del_hostname(RRDHOST *host) {
  92. if(unlikely(!host->hostname)) return;
  93. if(rrdhost_option_check(host, RRDHOST_OPTION_INDEXED_HOSTNAME)) {
  94. if(!dictionary_del(rrdhost_root_index_hostname, rrdhost_hostname(host)))
  95. error("RRDHOST: %s() failed to delete hostname '%s' from index", __FUNCTION__, rrdhost_hostname(host));
  96. rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_HOSTNAME);
  97. }
  98. }
  99. // ----------------------------------------------------------------------------
  100. // RRDHOST - internal helpers
  101. static inline void rrdhost_init_tags(RRDHOST *host, const char *tags) {
  102. if(host->tags && tags && !strcmp(rrdhost_tags(host), tags))
  103. return;
  104. STRING *old = host->tags;
  105. host->tags = string_strdupz((tags && *tags)?tags:NULL);
  106. string_freez(old);
  107. }
  108. static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname, bool add_to_index) {
  109. if(unlikely(hostname && !*hostname)) hostname = NULL;
  110. if(host->hostname && hostname && !strcmp(rrdhost_hostname(host), hostname))
  111. return;
  112. rrdhost_index_del_hostname(host);
  113. STRING *old = host->hostname;
  114. host->hostname = string_strdupz(hostname?hostname:"localhost");
  115. string_freez(old);
  116. if(add_to_index)
  117. rrdhost_index_add_hostname(host);
  118. }
  119. static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
  120. if(host->os && os && !strcmp(rrdhost_os(host), os))
  121. return;
  122. STRING *old = host->os;
  123. host->os = string_strdupz(os?os:"unknown");
  124. string_freez(old);
  125. }
  126. static inline void rrdhost_init_timezone(RRDHOST *host, const char *timezone, const char *abbrev_timezone, int32_t utc_offset) {
  127. if (host->timezone && timezone && !strcmp(rrdhost_timezone(host), timezone) && host->abbrev_timezone && abbrev_timezone &&
  128. !strcmp(rrdhost_abbrev_timezone(host), abbrev_timezone) && host->utc_offset == utc_offset)
  129. return;
  130. STRING *old = host->timezone;
  131. host->timezone = string_strdupz((timezone && *timezone)?timezone:"unknown");
  132. string_freez(old);
  133. old = (void *)host->abbrev_timezone;
  134. host->abbrev_timezone = string_strdupz((abbrev_timezone && *abbrev_timezone) ? abbrev_timezone : "UTC");
  135. string_freez(old);
  136. host->utc_offset = utc_offset;
  137. }
  138. void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode,
  139. const char *registry_hostname, const char *os, const char *tags,
  140. const char *tzone, const char *abbrev_tzone, int32_t utc_offset, const char *program_name,
  141. const char *program_version)
  142. {
  143. host->rrd_update_every = update_every;
  144. host->rrd_memory_mode = memory_mode;
  145. rrdhost_init_os(host, os);
  146. rrdhost_init_timezone(host, tzone, abbrev_tzone, utc_offset);
  147. rrdhost_init_tags(host, tags);
  148. host->program_name = string_strdupz((program_name && *program_name) ? program_name : "unknown");
  149. host->program_version = string_strdupz((program_version && *program_version) ? program_version : "unknown");
  150. host->registry_hostname = string_strdupz((registry_hostname && *registry_hostname) ? registry_hostname : rrdhost_hostname(host));
  151. }
  152. // ----------------------------------------------------------------------------
  153. // RRDHOST - add a host
  154. static void rrdhost_initialize_rrdpush_sender(RRDHOST *host,
  155. unsigned int rrdpush_enabled,
  156. char *rrdpush_destination,
  157. char *rrdpush_api_key,
  158. char *rrdpush_send_charts_matching
  159. ) {
  160. if(rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED)) return;
  161. if(rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) {
  162. rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED);
  163. rrdhost_streaming_sender_structures_init(host);
  164. #ifdef ENABLE_HTTPS
  165. host->sender->ssl.conn = NULL;
  166. host->sender->ssl.flags = NETDATA_SSL_START;
  167. #endif
  168. host->rrdpush_send_destination = strdupz(rrdpush_destination);
  169. rrdpush_destinations_init(host);
  170. host->rrdpush_send_api_key = strdupz(rrdpush_api_key);
  171. host->rrdpush_send_charts_matching = simple_pattern_create(rrdpush_send_charts_matching, NULL, SIMPLE_PATTERN_EXACT);
  172. rrdhost_option_set(host, RRDHOST_OPTION_SENDER_ENABLED);
  173. }
  174. else
  175. rrdhost_option_clear(host, RRDHOST_OPTION_SENDER_ENABLED);
  176. }
  177. static RRDHOST *rrdhost_create(
  178. const char *hostname,
  179. const char *registry_hostname,
  180. const char *guid,
  181. const char *os,
  182. const char *timezone,
  183. const char *abbrev_timezone,
  184. int32_t utc_offset,
  185. const char *tags,
  186. const char *program_name,
  187. const char *program_version,
  188. int update_every,
  189. long entries,
  190. RRD_MEMORY_MODE memory_mode,
  191. unsigned int health_enabled,
  192. unsigned int rrdpush_enabled,
  193. char *rrdpush_destination,
  194. char *rrdpush_api_key,
  195. char *rrdpush_send_charts_matching,
  196. bool rrdpush_enable_replication,
  197. time_t rrdpush_seconds_to_replicate,
  198. time_t rrdpush_replication_step,
  199. struct rrdhost_system_info *system_info,
  200. int is_localhost,
  201. bool archived
  202. ) {
  203. debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid);
  204. if(memory_mode == RRD_MEMORY_MODE_DBENGINE && !dbengine_enabled) {
  205. error("memory mode 'dbengine' is not enabled, but host '%s' is configured for it. Falling back to 'alloc'", hostname);
  206. memory_mode = RRD_MEMORY_MODE_ALLOC;
  207. }
  208. #ifdef ENABLE_DBENGINE
  209. int is_legacy = (memory_mode == RRD_MEMORY_MODE_DBENGINE) && is_legacy_child(guid);
  210. #else
  211. int is_legacy = 1;
  212. #endif
  213. int is_in_multihost = (memory_mode == RRD_MEMORY_MODE_DBENGINE && !is_legacy);
  214. RRDHOST *host = callocz(1, sizeof(RRDHOST));
  215. __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(RRDHOST), __ATOMIC_RELAXED);
  216. strncpyz(host->machine_guid, guid, GUID_LEN + 1);
  217. set_host_properties(host, (update_every > 0)?update_every:1, memory_mode, registry_hostname, os,
  218. tags, timezone, abbrev_timezone, utc_offset, program_name, program_version);
  219. rrdhost_init_hostname(host, hostname, false);
  220. host->rrd_history_entries = align_entries_to_pagesize(memory_mode, entries);
  221. host->health.health_enabled = ((memory_mode == RRD_MEMORY_MODE_NONE)) ? 0 : health_enabled;
  222. if (likely(!archived)) {
  223. rrdfunctions_init(host);
  224. host->rrdlabels = rrdlabels_create();
  225. rrdhost_initialize_rrdpush_sender(
  226. host, rrdpush_enabled, rrdpush_destination, rrdpush_api_key, rrdpush_send_charts_matching);
  227. }
  228. if(rrdpush_enable_replication)
  229. rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION);
  230. else
  231. rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION);
  232. host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate;
  233. host->rrdpush_replication_step = rrdpush_replication_step;
  234. switch(memory_mode) {
  235. default:
  236. case RRD_MEMORY_MODE_ALLOC:
  237. case RRD_MEMORY_MODE_MAP:
  238. case RRD_MEMORY_MODE_SAVE:
  239. case RRD_MEMORY_MODE_RAM:
  240. if(host->rrdpush_seconds_to_replicate > host->rrd_history_entries * host->rrd_update_every)
  241. host->rrdpush_seconds_to_replicate = host->rrd_history_entries * host->rrd_update_every;
  242. break;
  243. case RRD_MEMORY_MODE_DBENGINE:
  244. break;
  245. }
  246. netdata_mutex_init(&host->aclk_state_lock);
  247. netdata_mutex_init(&host->receiver_lock);
  248. host->system_info = system_info;
  249. rrdset_index_init(host);
  250. if(config_get_boolean(CONFIG_SECTION_DB, "delete obsolete charts files", 1))
  251. rrdhost_option_set(host, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS);
  252. if(config_get_boolean(CONFIG_SECTION_DB, "delete orphan hosts files", 1) && !is_localhost)
  253. rrdhost_option_set(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST);
  254. char filename[FILENAME_MAX + 1];
  255. if(is_localhost) {
  256. host->cache_dir = strdupz(netdata_configured_cache_dir);
  257. host->varlib_dir = strdupz(netdata_configured_varlib_dir);
  258. }
  259. else {
  260. // this is not localhost - append our GUID to localhost path
  261. if (is_in_multihost) { // don't append to cache dir in multihost
  262. host->cache_dir = strdupz(netdata_configured_cache_dir);
  263. }
  264. else {
  265. snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid);
  266. host->cache_dir = strdupz(filename);
  267. }
  268. if((host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE ||
  269. (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_legacy))) {
  270. int r = mkdir(host->cache_dir, 0775);
  271. if(r != 0 && errno != EEXIST)
  272. error("Host '%s': cannot create directory '%s'", rrdhost_hostname(host), host->cache_dir);
  273. }
  274. snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid);
  275. host->varlib_dir = strdupz(filename);
  276. }
  277. // this is also needed for custom host variables - not only health
  278. if(!host->rrdvars)
  279. host->rrdvars = rrdvariables_create();
  280. if (likely(!uuid_parse(host->machine_guid, host->host_uuid)))
  281. sql_load_node_id(host);
  282. else
  283. error_report("Host machine GUID %s is not valid", host->machine_guid);
  284. rrdfamily_index_init(host);
  285. rrdcalctemplate_index_init(host);
  286. rrdcalc_rrdhost_index_init(host);
  287. metaqueue_host_update_info(host);
  288. if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  289. #ifdef ENABLE_DBENGINE
  290. char dbenginepath[FILENAME_MAX + 1];
  291. int ret;
  292. snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", host->cache_dir);
  293. ret = mkdir(dbenginepath, 0775);
  294. if (ret != 0 && errno != EEXIST)
  295. error("Host '%s': cannot create directory '%s'", rrdhost_hostname(host), dbenginepath);
  296. else
  297. ret = 0; // succeed
  298. if (is_legacy) {
  299. // initialize legacy dbengine instance as needed
  300. host->db[0].mode = RRD_MEMORY_MODE_DBENGINE;
  301. host->db[0].eng = storage_engine_get(host->db[0].mode);
  302. host->db[0].tier_grouping = get_tier_grouping(0);
  303. ret = rrdeng_init(
  304. (struct rrdengine_instance **)&host->db[0].instance,
  305. dbenginepath,
  306. default_rrdeng_disk_quota_mb,
  307. 0); // may fail here for legacy dbengine initialization
  308. if(ret == 0) {
  309. rrdeng_readiness_wait((struct rrdengine_instance *)host->db[0].instance);
  310. // assign the rest of the shared storage instances to it
  311. // to allow them collect its metrics too
  312. for(size_t tier = 1; tier < storage_tiers ; tier++) {
  313. host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE;
  314. host->db[tier].eng = storage_engine_get(host->db[tier].mode);
  315. host->db[tier].instance = (STORAGE_INSTANCE *) multidb_ctx[tier];
  316. host->db[tier].tier_grouping = get_tier_grouping(tier);
  317. }
  318. }
  319. }
  320. else {
  321. for(size_t tier = 0; tier < storage_tiers ; tier++) {
  322. host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE;
  323. host->db[tier].eng = storage_engine_get(host->db[tier].mode);
  324. host->db[tier].instance = (STORAGE_INSTANCE *)multidb_ctx[tier];
  325. host->db[tier].tier_grouping = get_tier_grouping(tier);
  326. }
  327. }
  328. if (ret) { // check legacy or multihost initialization success
  329. error(
  330. "Host '%s': cannot initialize host with machine guid '%s'. Failed to initialize DB engine at '%s'.",
  331. rrdhost_hostname(host), host->machine_guid, host->cache_dir);
  332. rrd_wrlock();
  333. rrdhost_free___while_having_rrd_wrlock(host, true);
  334. rrd_unlock();
  335. return NULL;
  336. }
  337. #else
  338. fatal("RRD_MEMORY_MODE_DBENGINE is not supported in this platform.");
  339. #endif
  340. }
  341. else {
  342. host->db[0].mode = host->rrd_memory_mode;
  343. host->db[0].eng = storage_engine_get(host->db[0].mode);
  344. host->db[0].instance = NULL;
  345. host->db[0].tier_grouping = get_tier_grouping(0);
  346. #ifdef ENABLE_DBENGINE
  347. // the first tier is reserved for the non-dbengine modes
  348. for(size_t tier = 1; tier < storage_tiers ; tier++) {
  349. host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE;
  350. host->db[tier].eng = storage_engine_get(host->db[tier].mode);
  351. host->db[tier].instance = (STORAGE_INSTANCE *) multidb_ctx[tier];
  352. host->db[tier].tier_grouping = get_tier_grouping(tier);
  353. }
  354. #endif
  355. }
  356. // ------------------------------------------------------------------------
  357. // init new ML host and update system_info to let upstreams know
  358. // about ML functionality
  359. //
  360. if (is_localhost && host->system_info) {
  361. host->system_info->ml_capable = ml_capable();
  362. host->system_info->ml_enabled = ml_enabled(host);
  363. host->system_info->mc_version = enable_metric_correlations ? metric_correlations_version : 0;
  364. }
  365. // ------------------------------------------------------------------------
  366. // link it and add it to the index
  367. rrd_wrlock();
  368. RRDHOST *t = rrdhost_index_add_by_guid(host);
  369. if(t != host) {
  370. error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", rrdhost_hostname(host), host->machine_guid, rrdhost_hostname(t), t->machine_guid);
  371. rrdhost_free___while_having_rrd_wrlock(host, true);
  372. rrd_unlock();
  373. return NULL;
  374. }
  375. rrdhost_index_add_hostname(host);
  376. if(is_localhost)
  377. DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(localhost, host, prev, next);
  378. else
  379. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(localhost, host, prev, next);
  380. rrd_unlock();
  381. // ------------------------------------------------------------------------
  382. info("Host '%s' (at registry as '%s') with guid '%s' initialized"
  383. ", os '%s'"
  384. ", timezone '%s'"
  385. ", tags '%s'"
  386. ", program_name '%s'"
  387. ", program_version '%s'"
  388. ", update every %d"
  389. ", memory mode %s"
  390. ", history entries %ld"
  391. ", streaming %s"
  392. " (to '%s' with api key '%s')"
  393. ", health %s"
  394. ", cache_dir '%s'"
  395. ", varlib_dir '%s'"
  396. ", alarms default handler '%s'"
  397. ", alarms default recipient '%s'"
  398. , rrdhost_hostname(host)
  399. , rrdhost_registry_hostname(host)
  400. , host->machine_guid
  401. , rrdhost_os(host)
  402. , rrdhost_timezone(host)
  403. , rrdhost_tags(host)
  404. , rrdhost_program_name(host)
  405. , rrdhost_program_version(host)
  406. , host->rrd_update_every
  407. , rrd_memory_mode_name(host->rrd_memory_mode)
  408. , host->rrd_history_entries
  409. , rrdhost_has_rrdpush_sender_enabled(host)?"enabled":"disabled"
  410. , host->rrdpush_send_destination?host->rrdpush_send_destination:""
  411. , host->rrdpush_send_api_key?host->rrdpush_send_api_key:""
  412. , host->health.health_enabled?"enabled":"disabled"
  413. , host->cache_dir
  414. , host->varlib_dir
  415. , string2str(host->health.health_default_exec)
  416. , string2str(host->health.health_default_recipient)
  417. );
  418. if(!archived)
  419. rrdhost_flag_set(host,RRDHOST_FLAG_METADATA_INFO | RRDHOST_FLAG_METADATA_UPDATE);
  420. rrdhost_load_rrdcontext_data(host);
  421. if (!archived) {
  422. ml_host_new(host);
  423. ml_start_anomaly_detection_threads(host);
  424. } else
  425. rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED | RRDHOST_FLAG_ORPHAN);
  426. return host;
  427. }
  428. static void rrdhost_update(RRDHOST *host
  429. , const char *hostname
  430. , const char *registry_hostname
  431. , const char *guid
  432. , const char *os
  433. , const char *timezone
  434. , const char *abbrev_timezone
  435. , int32_t utc_offset
  436. , const char *tags
  437. , const char *program_name
  438. , const char *program_version
  439. , int update_every
  440. , long history
  441. , RRD_MEMORY_MODE mode
  442. , unsigned int health_enabled
  443. , unsigned int rrdpush_enabled
  444. , char *rrdpush_destination
  445. , char *rrdpush_api_key
  446. , char *rrdpush_send_charts_matching
  447. , bool rrdpush_enable_replication
  448. , time_t rrdpush_seconds_to_replicate
  449. , time_t rrdpush_replication_step
  450. , struct rrdhost_system_info *system_info
  451. )
  452. {
  453. UNUSED(guid);
  454. netdata_spinlock_lock(&host->rrdhost_update_lock);
  455. host->health.health_enabled = (mode == RRD_MEMORY_MODE_NONE) ? 0 : health_enabled;
  456. {
  457. struct rrdhost_system_info *old = host->system_info;
  458. host->system_info = system_info;
  459. rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_INFO | RRDHOST_FLAG_METADATA_CLAIMID | RRDHOST_FLAG_METADATA_UPDATE);
  460. rrdhost_system_info_free(old);
  461. }
  462. rrdhost_init_os(host, os);
  463. rrdhost_init_timezone(host, timezone, abbrev_timezone, utc_offset);
  464. string_freez(host->registry_hostname);
  465. host->registry_hostname = string_strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname);
  466. if(strcmp(rrdhost_hostname(host), hostname) != 0) {
  467. info("Host '%s' has been renamed to '%s'. If this is not intentional it may mean multiple hosts are using the same machine_guid.", rrdhost_hostname(host), hostname);
  468. rrdhost_init_hostname(host, hostname, true);
  469. }
  470. if(strcmp(rrdhost_program_name(host), program_name) != 0) {
  471. info("Host '%s' switched program name from '%s' to '%s'", rrdhost_hostname(host), rrdhost_program_name(host), program_name);
  472. STRING *t = host->program_name;
  473. host->program_name = string_strdupz(program_name);
  474. string_freez(t);
  475. }
  476. if(strcmp(rrdhost_program_version(host), program_version) != 0) {
  477. info("Host '%s' switched program version from '%s' to '%s'", rrdhost_hostname(host), rrdhost_program_version(host), program_version);
  478. STRING *t = host->program_version;
  479. host->program_version = string_strdupz(program_version);
  480. string_freez(t);
  481. }
  482. if(host->rrd_update_every != update_every)
  483. error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds. Restart netdata here to apply the new settings.", rrdhost_hostname(host), host->rrd_update_every, update_every);
  484. if(host->rrd_memory_mode != mode)
  485. error("Host '%s' has memory mode '%s', but the wanted one is '%s'. Restart netdata here to apply the new settings.", rrdhost_hostname(host), rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
  486. else if(host->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && host->rrd_history_entries < history)
  487. error("Host '%s' has history of %ld entries, but the wanted one is %ld entries. Restart netdata here to apply the new settings.", rrdhost_hostname(host), host->rrd_history_entries, history);
  488. // update host tags
  489. rrdhost_init_tags(host, tags);
  490. if(!host->rrdvars)
  491. host->rrdvars = rrdvariables_create();
  492. if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) {
  493. rrdhost_flag_clear(host, RRDHOST_FLAG_ARCHIVED);
  494. rrdfunctions_init(host);
  495. if(!host->rrdlabels)
  496. host->rrdlabels = rrdlabels_create();
  497. if (!host->rrdset_root_index)
  498. rrdset_index_init(host);
  499. rrdhost_initialize_rrdpush_sender(host,
  500. rrdpush_enabled,
  501. rrdpush_destination,
  502. rrdpush_api_key,
  503. rrdpush_send_charts_matching);
  504. rrdfamily_index_init(host);
  505. rrdcalctemplate_index_init(host);
  506. rrdcalc_rrdhost_index_init(host);
  507. if(rrdpush_enable_replication)
  508. rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION);
  509. else
  510. rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION);
  511. host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate;
  512. host->rrdpush_replication_step = rrdpush_replication_step;
  513. ml_host_new(host);
  514. ml_start_anomaly_detection_threads(host);
  515. rrdhost_load_rrdcontext_data(host);
  516. info("Host %s is not in archived mode anymore", rrdhost_hostname(host));
  517. }
  518. netdata_spinlock_unlock(&host->rrdhost_update_lock);
  519. }
  520. RRDHOST *rrdhost_find_or_create(
  521. const char *hostname
  522. , const char *registry_hostname
  523. , const char *guid
  524. , const char *os
  525. , const char *timezone
  526. , const char *abbrev_timezone
  527. , int32_t utc_offset
  528. , const char *tags
  529. , const char *program_name
  530. , const char *program_version
  531. , int update_every
  532. , long history
  533. , RRD_MEMORY_MODE mode
  534. , unsigned int health_enabled
  535. , unsigned int rrdpush_enabled
  536. , char *rrdpush_destination
  537. , char *rrdpush_api_key
  538. , char *rrdpush_send_charts_matching
  539. , bool rrdpush_enable_replication
  540. , time_t rrdpush_seconds_to_replicate
  541. , time_t rrdpush_replication_step
  542. , struct rrdhost_system_info *system_info
  543. , bool archived
  544. ) {
  545. debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
  546. RRDHOST *host = rrdhost_find_by_guid(guid);
  547. if (unlikely(host && host->rrd_memory_mode != mode && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) {
  548. /* If a legacy memory mode instantiates all dbengine state must be discarded to avoid inconsistencies */
  549. error("Archived host '%s' has memory mode '%s', but the wanted one is '%s'. Discarding archived state.",
  550. rrdhost_hostname(host), rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
  551. rrd_wrlock();
  552. rrdhost_free___while_having_rrd_wrlock(host, true);
  553. host = NULL;
  554. rrd_unlock();
  555. }
  556. if(!host) {
  557. host = rrdhost_create(
  558. hostname
  559. , registry_hostname
  560. , guid
  561. , os
  562. , timezone
  563. , abbrev_timezone
  564. , utc_offset
  565. , tags
  566. , program_name
  567. , program_version
  568. , update_every
  569. , history
  570. , mode
  571. , health_enabled
  572. , rrdpush_enabled
  573. , rrdpush_destination
  574. , rrdpush_api_key
  575. , rrdpush_send_charts_matching
  576. , rrdpush_enable_replication
  577. , rrdpush_seconds_to_replicate
  578. , rrdpush_replication_step
  579. , system_info
  580. , 0
  581. , archived
  582. );
  583. }
  584. else {
  585. rrdhost_update(host
  586. , hostname
  587. , registry_hostname
  588. , guid
  589. , os
  590. , timezone
  591. , abbrev_timezone
  592. , utc_offset
  593. , tags
  594. , program_name
  595. , program_version
  596. , update_every
  597. , history
  598. , mode
  599. , health_enabled
  600. , rrdpush_enabled
  601. , rrdpush_destination
  602. , rrdpush_api_key
  603. , rrdpush_send_charts_matching
  604. , rrdpush_enable_replication
  605. , rrdpush_seconds_to_replicate
  606. , rrdpush_replication_step
  607. , system_info);
  608. }
  609. return host;
  610. }
  611. inline int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected_host, time_t now_s) {
  612. if(host != protected_host
  613. && host != localhost
  614. && rrdhost_receiver_replicating_charts(host) == 0
  615. && rrdhost_sender_replicating_charts(host) == 0
  616. && rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN)
  617. && !rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)
  618. && !host->receiver
  619. && host->child_disconnected_time
  620. && host->child_disconnected_time + rrdhost_free_orphan_time_s < now_s)
  621. return 1;
  622. return 0;
  623. }
  624. // ----------------------------------------------------------------------------
  625. // RRDHOST global / startup initialization
  626. #ifdef ENABLE_DBENGINE
  627. struct dbengine_initialization {
  628. netdata_thread_t thread;
  629. char path[FILENAME_MAX + 1];
  630. int disk_space_mb;
  631. size_t tier;
  632. int ret;
  633. };
  634. void *dbengine_tier_init(void *ptr) {
  635. struct dbengine_initialization *dbi = ptr;
  636. dbi->ret = rrdeng_init(NULL, dbi->path, dbi->disk_space_mb, dbi->tier);
  637. return ptr;
  638. }
  639. #endif
  640. void dbengine_init(char *hostname) {
  641. #ifdef ENABLE_DBENGINE
  642. use_direct_io = config_get_boolean(CONFIG_SECTION_DB, "dbengine use direct io", use_direct_io);
  643. unsigned read_num = (unsigned)config_get_number(CONFIG_SECTION_DB, "dbengine pages per extent", MAX_PAGES_PER_EXTENT);
  644. if (read_num > 0 && read_num <= MAX_PAGES_PER_EXTENT)
  645. rrdeng_pages_per_extent = read_num;
  646. else {
  647. error("Invalid dbengine pages per extent %u given. Using %u.", read_num, rrdeng_pages_per_extent);
  648. config_set_number(CONFIG_SECTION_DB, "dbengine pages per extent", rrdeng_pages_per_extent);
  649. }
  650. storage_tiers = config_get_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
  651. if(storage_tiers < 1) {
  652. error("At least 1 storage tier is required. Assuming 1.");
  653. storage_tiers = 1;
  654. config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
  655. }
  656. if(storage_tiers > RRD_STORAGE_TIERS) {
  657. error("Up to %d storage tier are supported. Assuming %d.", RRD_STORAGE_TIERS, RRD_STORAGE_TIERS);
  658. storage_tiers = RRD_STORAGE_TIERS;
  659. config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
  660. }
  661. bool parallel_initialization = (storage_tiers <= (size_t)get_netdata_cpus()) ? true : false;
  662. parallel_initialization = config_get_boolean(CONFIG_SECTION_DB, "dbengine parallel initialization", parallel_initialization);
  663. default_rrdeng_page_fetch_timeout = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page fetch timeout secs", PAGE_CACHE_FETCH_WAIT_TIMEOUT);
  664. if (default_rrdeng_page_fetch_timeout < 1) {
  665. info("'dbengine page fetch timeout secs' cannot be %d, using 1", default_rrdeng_page_fetch_timeout);
  666. default_rrdeng_page_fetch_timeout = 1;
  667. config_set_number(CONFIG_SECTION_DB, "dbengine page fetch timeout secs", default_rrdeng_page_fetch_timeout);
  668. }
  669. default_rrdeng_page_fetch_retries = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page fetch retries", MAX_PAGE_CACHE_FETCH_RETRIES);
  670. if (default_rrdeng_page_fetch_retries < 1) {
  671. info("\"dbengine page fetch retries\" found in netdata.conf cannot be %d, using 1", default_rrdeng_page_fetch_retries);
  672. default_rrdeng_page_fetch_retries = 1;
  673. config_set_number(CONFIG_SECTION_DB, "dbengine page fetch retries", default_rrdeng_page_fetch_retries);
  674. }
  675. struct dbengine_initialization tiers_init[RRD_STORAGE_TIERS] = {};
  676. size_t created_tiers = 0;
  677. char dbenginepath[FILENAME_MAX + 1];
  678. char dbengineconfig[200 + 1];
  679. int divisor = 1;
  680. for(size_t tier = 0; tier < storage_tiers ;tier++) {
  681. if(tier == 0)
  682. snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", netdata_configured_cache_dir);
  683. else
  684. snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine-tier%zu", netdata_configured_cache_dir, tier);
  685. int ret = mkdir(dbenginepath, 0775);
  686. if (ret != 0 && errno != EEXIST) {
  687. error("DBENGINE on '%s': cannot create directory '%s'", hostname, dbenginepath);
  688. break;
  689. }
  690. if(tier > 0)
  691. divisor *= 2;
  692. int disk_space_mb = default_multidb_disk_quota_mb / divisor;
  693. size_t grouping_iterations = storage_tiers_grouping_iterations[tier];
  694. RRD_BACKFILL backfill = storage_tiers_backfill[tier];
  695. if(tier > 0) {
  696. snprintfz(dbengineconfig, 200, "dbengine tier %zu multihost disk space MB", tier);
  697. disk_space_mb = config_get_number(CONFIG_SECTION_DB, dbengineconfig, disk_space_mb);
  698. snprintfz(dbengineconfig, 200, "dbengine tier %zu update every iterations", tier);
  699. grouping_iterations = config_get_number(CONFIG_SECTION_DB, dbengineconfig, grouping_iterations);
  700. if(grouping_iterations < 2) {
  701. grouping_iterations = 2;
  702. config_set_number(CONFIG_SECTION_DB, dbengineconfig, grouping_iterations);
  703. error("DBENGINE on '%s': 'dbegnine tier %zu update every iterations' cannot be less than 2. Assuming 2.", hostname, tier);
  704. }
  705. snprintfz(dbengineconfig, 200, "dbengine tier %zu backfill", tier);
  706. const char *bf = config_get(CONFIG_SECTION_DB, dbengineconfig, backfill == RRD_BACKFILL_NEW ? "new" : backfill == RRD_BACKFILL_FULL ? "full" : "none");
  707. if(strcmp(bf, "new") == 0) backfill = RRD_BACKFILL_NEW;
  708. else if(strcmp(bf, "full") == 0) backfill = RRD_BACKFILL_FULL;
  709. else if(strcmp(bf, "none") == 0) backfill = RRD_BACKFILL_NONE;
  710. else {
  711. error("DBENGINE: unknown backfill value '%s', assuming 'new'", bf);
  712. config_set(CONFIG_SECTION_DB, dbengineconfig, "new");
  713. backfill = RRD_BACKFILL_NEW;
  714. }
  715. }
  716. storage_tiers_grouping_iterations[tier] = grouping_iterations;
  717. storage_tiers_backfill[tier] = backfill;
  718. if(tier > 0 && get_tier_grouping(tier) > 65535) {
  719. storage_tiers_grouping_iterations[tier] = 1;
  720. error("DBENGINE on '%s': dbengine tier %zu gives aggregation of more than 65535 points of tier 0. Disabling tiers above %zu", hostname, tier, tier);
  721. break;
  722. }
  723. internal_error(true, "DBENGINE tier %zu grouping iterations is set to %zu", tier, storage_tiers_grouping_iterations[tier]);
  724. tiers_init[tier].disk_space_mb = disk_space_mb;
  725. tiers_init[tier].tier = tier;
  726. strncpyz(tiers_init[tier].path, dbenginepath, FILENAME_MAX);
  727. tiers_init[tier].ret = 0;
  728. if(parallel_initialization)
  729. netdata_thread_create(&tiers_init[tier].thread, "DBENGINE_INIT", NETDATA_THREAD_OPTION_JOINABLE,
  730. dbengine_tier_init, &tiers_init[tier]);
  731. else
  732. dbengine_tier_init(&tiers_init[tier]);
  733. }
  734. for(size_t tier = 0; tier < storage_tiers ;tier++) {
  735. void *ptr;
  736. if(parallel_initialization)
  737. netdata_thread_join(tiers_init[tier].thread, &ptr);
  738. if(tiers_init[tier].ret != 0) {
  739. error("DBENGINE on '%s': Failed to initialize multi-host database tier %zu on path '%s'",
  740. hostname, tiers_init[tier].tier, tiers_init[tier].path);
  741. }
  742. else if(created_tiers == tier)
  743. created_tiers++;
  744. }
  745. if(created_tiers && created_tiers < storage_tiers) {
  746. error("DBENGINE on '%s': Managed to create %zu tiers instead of %zu. Continuing with %zu available.",
  747. hostname, created_tiers, storage_tiers, created_tiers);
  748. storage_tiers = created_tiers;
  749. }
  750. else if(!created_tiers)
  751. fatal("DBENGINE on '%s', failed to initialize databases at '%s'.", hostname, netdata_configured_cache_dir);
  752. for(size_t tier = 0; tier < storage_tiers ;tier++)
  753. rrdeng_readiness_wait(multidb_ctx[tier]);
  754. dbengine_enabled = true;
  755. #else
  756. storage_tiers = config_get_number(CONFIG_SECTION_DB, "storage tiers", 1);
  757. if(storage_tiers != 1) {
  758. error("DBENGINE is not available on '%s', so only 1 database tier can be supported.", hostname);
  759. storage_tiers = 1;
  760. config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
  761. }
  762. dbengine_enabled = false;
  763. #endif
  764. }
  765. int rrd_init(char *hostname, struct rrdhost_system_info *system_info, bool unittest) {
  766. rrdhost_init();
  767. if (unlikely(sql_init_database(DB_CHECK_NONE, system_info ? 0 : 1))) {
  768. if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE)
  769. fatal("Failed to initialize SQLite");
  770. info("Skipping SQLITE metadata initialization since memory mode is not dbengine");
  771. }
  772. if (unlikely(sql_init_context_database(system_info ? 0 : 1))) {
  773. error_report("Failed to initialize context metadata database");
  774. }
  775. if (unlikely(unittest)) {
  776. dbengine_enabled = true;
  777. }
  778. else {
  779. health_init();
  780. rrdpush_init();
  781. if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE || rrdpush_receiver_needs_dbengine()) {
  782. info("DBENGINE: Initializing ...");
  783. dbengine_init(hostname);
  784. }
  785. else {
  786. info("DBENGINE: Not initializing ...");
  787. storage_tiers = 1;
  788. }
  789. if (!dbengine_enabled) {
  790. if (storage_tiers > 1) {
  791. error("dbengine is not enabled, but %zu tiers have been requested. Resetting tiers to 1",
  792. storage_tiers);
  793. storage_tiers = 1;
  794. }
  795. if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  796. error("dbengine is not enabled, but it has been given as the default db mode. Resetting db mode to alloc");
  797. default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC;
  798. }
  799. }
  800. }
  801. if(!unittest)
  802. metadata_sync_init();
  803. debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
  804. localhost = rrdhost_create(
  805. hostname
  806. , registry_get_this_machine_hostname()
  807. , registry_get_this_machine_guid()
  808. , os_type
  809. , netdata_configured_timezone
  810. , netdata_configured_abbrev_timezone
  811. , netdata_configured_utc_offset
  812. , ""
  813. , program_name
  814. , program_version
  815. , default_rrd_update_every
  816. , default_rrd_history_entries
  817. , default_rrd_memory_mode
  818. , default_health_enabled
  819. , default_rrdpush_enabled
  820. , default_rrdpush_destination
  821. , default_rrdpush_api_key
  822. , default_rrdpush_send_charts_matching
  823. , default_rrdpush_enable_replication
  824. , default_rrdpush_seconds_to_replicate
  825. , default_rrdpush_replication_step
  826. , system_info
  827. , 1
  828. , 0
  829. );
  830. if (unlikely(!localhost)) {
  831. return 1;
  832. }
  833. if (likely(system_info)) {
  834. migrate_localhost(&localhost->host_uuid);
  835. sql_aclk_sync_init();
  836. web_client_api_v1_management_init();
  837. }
  838. return localhost==NULL;
  839. }
  840. // ----------------------------------------------------------------------------
  841. // RRDHOST - free
  842. void rrdhost_system_info_free(struct rrdhost_system_info *system_info) {
  843. if(likely(system_info)) {
  844. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED);
  845. freez(system_info->cloud_provider_type);
  846. freez(system_info->cloud_instance_type);
  847. freez(system_info->cloud_instance_region);
  848. freez(system_info->host_os_name);
  849. freez(system_info->host_os_id);
  850. freez(system_info->host_os_id_like);
  851. freez(system_info->host_os_version);
  852. freez(system_info->host_os_version_id);
  853. freez(system_info->host_os_detection);
  854. freez(system_info->host_cores);
  855. freez(system_info->host_cpu_freq);
  856. freez(system_info->host_ram_total);
  857. freez(system_info->host_disk_space);
  858. freez(system_info->container_os_name);
  859. freez(system_info->container_os_id);
  860. freez(system_info->container_os_id_like);
  861. freez(system_info->container_os_version);
  862. freez(system_info->container_os_version_id);
  863. freez(system_info->container_os_detection);
  864. freez(system_info->kernel_name);
  865. freez(system_info->kernel_version);
  866. freez(system_info->architecture);
  867. freez(system_info->virtualization);
  868. freez(system_info->virt_detection);
  869. freez(system_info->container);
  870. freez(system_info->container_detection);
  871. freez(system_info->is_k8s_node);
  872. freez(system_info->install_type);
  873. freez(system_info->prebuilt_arch);
  874. freez(system_info->prebuilt_dist);
  875. freez(system_info);
  876. }
  877. }
  878. static void rrdhost_streaming_sender_structures_init(RRDHOST *host)
  879. {
  880. if (host->sender)
  881. return;
  882. host->sender = callocz(1, sizeof(*host->sender));
  883. __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(*host->sender), __ATOMIC_RELAXED);
  884. host->sender->host = host;
  885. host->sender->buffer = cbuffer_new(CBUFFER_INITIAL_SIZE, 1024 * 1024, &netdata_buffers_statistics.cbuffers_streaming);
  886. host->sender->capabilities = STREAM_OUR_CAPABILITIES;
  887. host->sender->rrdpush_sender_pipe[PIPE_READ] = -1;
  888. host->sender->rrdpush_sender_pipe[PIPE_WRITE] = -1;
  889. host->sender->rrdpush_sender_socket = -1;
  890. #ifdef ENABLE_COMPRESSION
  891. if(default_compression_enabled) {
  892. host->sender->flags |= SENDER_FLAG_COMPRESSION;
  893. host->sender->compressor = create_compressor();
  894. }
  895. else
  896. host->sender->flags &= ~SENDER_FLAG_COMPRESSION;
  897. #endif
  898. netdata_mutex_init(&host->sender->mutex);
  899. replication_init_sender(host->sender);
  900. }
  901. static void rrdhost_streaming_sender_structures_free(RRDHOST *host)
  902. {
  903. rrdhost_option_clear(host, RRDHOST_OPTION_SENDER_ENABLED);
  904. if (unlikely(!host->sender))
  905. return;
  906. rrdpush_sender_thread_stop(host, "HOST CLEANUP", true); // stop a possibly running thread
  907. cbuffer_free(host->sender->buffer);
  908. #ifdef ENABLE_COMPRESSION
  909. if (host->sender->compressor)
  910. host->sender->compressor->destroy(&host->sender->compressor);
  911. #endif
  912. replication_cleanup_sender(host->sender);
  913. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(*host->sender), __ATOMIC_RELAXED);
  914. freez(host->sender);
  915. host->sender = NULL;
  916. rrdhost_flag_clear(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED);
  917. }
  918. void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) {
  919. if(!host) return;
  920. if (netdata_exit || force) {
  921. info("RRD: 'host:%s' freeing memory...", rrdhost_hostname(host));
  922. // ------------------------------------------------------------------------
  923. // first remove it from the indexes, so that it will not be discoverable
  924. rrdhost_index_del_hostname(host);
  925. rrdhost_index_del_by_guid(host);
  926. if (host->prev)
  927. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(localhost, host, prev, next);
  928. }
  929. // ------------------------------------------------------------------------
  930. // clean up streaming
  931. rrdhost_streaming_sender_structures_free(host);
  932. if (netdata_exit || force)
  933. stop_streaming_receiver(host, "HOST CLEANUP");
  934. // ------------------------------------------------------------------------
  935. // clean up alarms
  936. rrdcalc_delete_all(host);
  937. // ------------------------------------------------------------------------
  938. // release its children resources
  939. #ifdef ENABLE_DBENGINE
  940. for(size_t tier = 0; tier < storage_tiers ;tier++) {
  941. if(host->db[tier].mode == RRD_MEMORY_MODE_DBENGINE
  942. && host->db[tier].instance
  943. && !is_storage_engine_shared(host->db[tier].instance))
  944. rrdeng_prepare_exit((struct rrdengine_instance *)host->db[tier].instance);
  945. }
  946. #endif
  947. // delete all the RRDSETs of the host
  948. rrdset_index_destroy(host);
  949. rrdcalc_rrdhost_index_destroy(host);
  950. rrdcalctemplate_index_destroy(host);
  951. // cleanup ML resources
  952. ml_stop_anomaly_detection_threads(host);
  953. ml_host_delete(host);
  954. freez(host->exporting_flags);
  955. health_alarm_log_free(host);
  956. #ifdef ENABLE_DBENGINE
  957. for(size_t tier = 0; tier < storage_tiers ;tier++) {
  958. if(host->db[tier].mode == RRD_MEMORY_MODE_DBENGINE
  959. && host->db[tier].instance
  960. && !is_storage_engine_shared(host->db[tier].instance))
  961. rrdeng_exit((struct rrdengine_instance *)host->db[tier].instance);
  962. }
  963. #endif
  964. if (!netdata_exit && !force) {
  965. info("RRD: 'host:%s' is now in archive mode...", rrdhost_hostname(host));
  966. rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED | RRDHOST_FLAG_ORPHAN);
  967. return;
  968. }
  969. #ifdef ENABLE_ACLK
  970. struct aclk_database_worker_config *wc = host->dbsync_worker;
  971. if (wc && !netdata_exit) {
  972. struct aclk_database_cmd cmd;
  973. memset(&cmd, 0, sizeof(cmd));
  974. cmd.opcode = ACLK_DATABASE_ORPHAN_HOST;
  975. struct aclk_completion compl ;
  976. init_aclk_completion(&compl );
  977. cmd.completion = &compl ;
  978. aclk_database_enq_cmd(wc, &cmd);
  979. wait_for_aclk_completion(&compl );
  980. destroy_aclk_completion(&compl );
  981. }
  982. #endif
  983. // ------------------------------------------------------------------------
  984. // free it
  985. pthread_mutex_destroy(&host->aclk_state_lock);
  986. freez(host->aclk_state.claimed_id);
  987. freez(host->aclk_state.prev_claimed_id);
  988. string_freez(host->tags);
  989. rrdlabels_destroy(host->rrdlabels);
  990. string_freez(host->os);
  991. string_freez(host->timezone);
  992. string_freez(host->abbrev_timezone);
  993. string_freez(host->program_name);
  994. string_freez(host->program_version);
  995. rrdhost_system_info_free(host->system_info);
  996. freez(host->cache_dir);
  997. freez(host->varlib_dir);
  998. freez(host->rrdpush_send_api_key);
  999. freez(host->rrdpush_send_destination);
  1000. rrdpush_destinations_free(host);
  1001. string_freez(host->health.health_default_exec);
  1002. string_freez(host->health.health_default_recipient);
  1003. string_freez(host->registry_hostname);
  1004. simple_pattern_free(host->rrdpush_send_charts_matching);
  1005. netdata_rwlock_destroy(&host->health_log.alarm_log_rwlock);
  1006. freez(host->node_id);
  1007. rrdfamily_index_destroy(host);
  1008. rrdfunctions_destroy(host);
  1009. rrdvariables_destroy(host->rrdvars);
  1010. rrdhost_destroy_rrdcontexts(host);
  1011. string_freez(host->hostname);
  1012. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(RRDHOST), __ATOMIC_RELAXED);
  1013. freez(host);
  1014. #ifdef ENABLE_ACLK
  1015. if (wc)
  1016. wc->is_orphan = 0;
  1017. #endif
  1018. }
  1019. void rrdhost_free_all(void) {
  1020. rrd_wrlock();
  1021. /* Make sure child-hosts are released before the localhost. */
  1022. while(localhost && localhost->next)
  1023. rrdhost_free___while_having_rrd_wrlock(localhost->next, true);
  1024. if(localhost)
  1025. rrdhost_free___while_having_rrd_wrlock(localhost, true);
  1026. rrd_unlock();
  1027. }
  1028. void rrd_finalize_collection_for_all_hosts(void) {
  1029. RRDHOST *host;
  1030. rrd_wrlock();
  1031. rrdhost_foreach_read(host) {
  1032. rrdhost_finalize_collection(host);
  1033. }
  1034. rrd_unlock();
  1035. }
  1036. // ----------------------------------------------------------------------------
  1037. // RRDHOST - save host files
  1038. void rrdhost_save_charts(RRDHOST *host) {
  1039. if(!host) return;
  1040. info("RRD: 'host:%s' saving / closing database...", rrdhost_hostname(host));
  1041. RRDSET *st;
  1042. // we get a write lock
  1043. // to ensure only one thread is saving the database
  1044. rrdset_foreach_write(st, host) {
  1045. rrdset_save(st);
  1046. }
  1047. rrdset_foreach_done(st);
  1048. }
  1049. static void rrdhost_load_auto_labels(void) {
  1050. DICTIONARY *labels = localhost->rrdlabels;
  1051. if (localhost->system_info->cloud_provider_type)
  1052. rrdlabels_add(labels, "_cloud_provider_type", localhost->system_info->cloud_provider_type, RRDLABEL_SRC_AUTO);
  1053. if (localhost->system_info->cloud_instance_type)
  1054. rrdlabels_add(labels, "_cloud_instance_type", localhost->system_info->cloud_instance_type, RRDLABEL_SRC_AUTO);
  1055. if (localhost->system_info->cloud_instance_region)
  1056. rrdlabels_add(
  1057. labels, "_cloud_instance_region", localhost->system_info->cloud_instance_region, RRDLABEL_SRC_AUTO);
  1058. if (localhost->system_info->host_os_name)
  1059. rrdlabels_add(labels, "_os_name", localhost->system_info->host_os_name, RRDLABEL_SRC_AUTO);
  1060. if (localhost->system_info->host_os_version)
  1061. rrdlabels_add(labels, "_os_version", localhost->system_info->host_os_version, RRDLABEL_SRC_AUTO);
  1062. if (localhost->system_info->kernel_version)
  1063. rrdlabels_add(labels, "_kernel_version", localhost->system_info->kernel_version, RRDLABEL_SRC_AUTO);
  1064. if (localhost->system_info->host_cores)
  1065. rrdlabels_add(labels, "_system_cores", localhost->system_info->host_cores, RRDLABEL_SRC_AUTO);
  1066. if (localhost->system_info->host_cpu_freq)
  1067. rrdlabels_add(labels, "_system_cpu_freq", localhost->system_info->host_cpu_freq, RRDLABEL_SRC_AUTO);
  1068. if (localhost->system_info->host_ram_total)
  1069. rrdlabels_add(labels, "_system_ram_total", localhost->system_info->host_ram_total, RRDLABEL_SRC_AUTO);
  1070. if (localhost->system_info->host_disk_space)
  1071. rrdlabels_add(labels, "_system_disk_space", localhost->system_info->host_disk_space, RRDLABEL_SRC_AUTO);
  1072. if (localhost->system_info->architecture)
  1073. rrdlabels_add(labels, "_architecture", localhost->system_info->architecture, RRDLABEL_SRC_AUTO);
  1074. if (localhost->system_info->virtualization)
  1075. rrdlabels_add(labels, "_virtualization", localhost->system_info->virtualization, RRDLABEL_SRC_AUTO);
  1076. if (localhost->system_info->container)
  1077. rrdlabels_add(labels, "_container", localhost->system_info->container, RRDLABEL_SRC_AUTO);
  1078. if (localhost->system_info->container_detection)
  1079. rrdlabels_add(labels, "_container_detection", localhost->system_info->container_detection, RRDLABEL_SRC_AUTO);
  1080. if (localhost->system_info->virt_detection)
  1081. rrdlabels_add(labels, "_virt_detection", localhost->system_info->virt_detection, RRDLABEL_SRC_AUTO);
  1082. if (localhost->system_info->is_k8s_node)
  1083. rrdlabels_add(labels, "_is_k8s_node", localhost->system_info->is_k8s_node, RRDLABEL_SRC_AUTO);
  1084. if (localhost->system_info->install_type)
  1085. rrdlabels_add(labels, "_install_type", localhost->system_info->install_type, RRDLABEL_SRC_AUTO);
  1086. if (localhost->system_info->prebuilt_arch)
  1087. rrdlabels_add(labels, "_prebuilt_arch", localhost->system_info->prebuilt_arch, RRDLABEL_SRC_AUTO);
  1088. if (localhost->system_info->prebuilt_dist)
  1089. rrdlabels_add(labels, "_prebuilt_dist", localhost->system_info->prebuilt_dist, RRDLABEL_SRC_AUTO);
  1090. add_aclk_host_labels();
  1091. health_add_host_labels();
  1092. rrdlabels_add(labels, "_is_parent", (localhost->connected_children_count > 0) ? "true" : "false", RRDLABEL_SRC_AUTO);
  1093. if (localhost->rrdpush_send_destination)
  1094. rrdlabels_add(labels, "_streams_to", localhost->rrdpush_send_destination, RRDLABEL_SRC_AUTO);
  1095. }
  1096. void rrdhost_set_is_parent_label(int count) {
  1097. DICTIONARY *labels = localhost->rrdlabels;
  1098. if (count == 0 || count == 1) {
  1099. rrdlabels_add(
  1100. labels, "_is_parent", (count) ? "true" : "false", RRDLABEL_SRC_AUTO);
  1101. //queue a node info
  1102. #ifdef ENABLE_ACLK
  1103. if (netdata_cloud_setting) {
  1104. aclk_queue_node_info(localhost);
  1105. }
  1106. #endif
  1107. }
  1108. }
  1109. static void rrdhost_load_config_labels(void) {
  1110. int status = config_load(NULL, 1, CONFIG_SECTION_HOST_LABEL);
  1111. if(!status) {
  1112. char *filename = CONFIG_DIR "/" CONFIG_FILENAME;
  1113. error("RRDLABEL: Cannot reload the configuration file '%s', using labels in memory", filename);
  1114. }
  1115. struct section *co = appconfig_get_section(&netdata_config, CONFIG_SECTION_HOST_LABEL);
  1116. if(co) {
  1117. config_section_wrlock(co);
  1118. struct config_option *cv;
  1119. for(cv = co->values; cv ; cv = cv->next) {
  1120. rrdlabels_add(localhost->rrdlabels, cv->name, cv->value, RRDLABEL_SRC_CONFIG);
  1121. cv->flags |= CONFIG_VALUE_USED;
  1122. }
  1123. config_section_unlock(co);
  1124. }
  1125. }
  1126. static void rrdhost_load_kubernetes_labels(void) {
  1127. char label_script[sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("get-kubernetes-labels.sh") + 2)];
  1128. sprintf(label_script, "%s/%s", netdata_configured_primary_plugins_dir, "get-kubernetes-labels.sh");
  1129. if (unlikely(access(label_script, R_OK) != 0)) {
  1130. error("Kubernetes pod label fetching script %s not found.",label_script);
  1131. return;
  1132. }
  1133. debug(D_RRDHOST, "Attempting to fetch external labels via %s", label_script);
  1134. pid_t pid;
  1135. FILE *fp_child_input;
  1136. FILE *fp_child_output = netdata_popen(label_script, &pid, &fp_child_input);
  1137. if(!fp_child_output) return;
  1138. char buffer[1000 + 1];
  1139. while (fgets(buffer, 1000, fp_child_output) != NULL)
  1140. rrdlabels_add_pair(localhost->rrdlabels, buffer, RRDLABEL_SRC_AUTO|RRDLABEL_SRC_K8S);
  1141. // Non-zero exit code means that all the script output is error messages. We've shown already any message that didn't include a ':'
  1142. // Here we'll inform with an ERROR that the script failed, show whatever (if anything) was added to the list of labels, free the memory and set the return to null
  1143. int rc = netdata_pclose(fp_child_input, fp_child_output, pid);
  1144. if(rc) error("%s exited abnormally. Failed to get kubernetes labels.", label_script);
  1145. }
  1146. void reload_host_labels(void) {
  1147. if(!localhost->rrdlabels)
  1148. localhost->rrdlabels = rrdlabels_create();
  1149. rrdlabels_unmark_all(localhost->rrdlabels);
  1150. // priority is important here
  1151. rrdhost_load_config_labels();
  1152. rrdhost_load_kubernetes_labels();
  1153. rrdhost_load_auto_labels();
  1154. rrdhost_flag_set(localhost,RRDHOST_FLAG_METADATA_LABELS | RRDHOST_FLAG_METADATA_UPDATE);
  1155. rrdpush_send_host_labels(localhost);
  1156. }
  1157. void rrdhost_finalize_collection(RRDHOST *host) {
  1158. info("RRD: 'host:%s' stopping data collection...", rrdhost_hostname(host));
  1159. RRDSET *st;
  1160. rrdset_foreach_write(st, host)
  1161. rrdset_finalize_collection(st, true);
  1162. rrdset_foreach_done(st);
  1163. }
  1164. // ----------------------------------------------------------------------------
  1165. // RRDHOST - delete host files
  1166. void rrdhost_delete_charts(RRDHOST *host) {
  1167. if(!host) return;
  1168. info("RRD: 'host:%s' deleting disk files...", rrdhost_hostname(host));
  1169. RRDSET *st;
  1170. if(host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || host->rrd_memory_mode == RRD_MEMORY_MODE_MAP) {
  1171. // we get a write lock
  1172. // to ensure only one thread is saving the database
  1173. rrdset_foreach_write(st, host){
  1174. rrdset_delete_files(st);
  1175. }
  1176. rrdset_foreach_done(st);
  1177. }
  1178. recursively_delete_dir(host->cache_dir, "left over host");
  1179. }
  1180. // ----------------------------------------------------------------------------
  1181. // RRDHOST - cleanup host files
  1182. void rrdhost_cleanup_charts(RRDHOST *host) {
  1183. if(!host) return;
  1184. info("RRD: 'host:%s' cleaning up disk files...", rrdhost_hostname(host));
  1185. RRDSET *st;
  1186. uint32_t rrdhost_delete_obsolete_charts = rrdhost_option_check(host, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS);
  1187. // we get a write lock
  1188. // to ensure only one thread is saving the database
  1189. rrdset_foreach_write(st, host) {
  1190. if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))
  1191. rrdset_delete_files(st);
  1192. else if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS))
  1193. rrdset_delete_obsolete_dimensions(st);
  1194. else
  1195. rrdset_save(st);
  1196. }
  1197. rrdset_foreach_done(st);
  1198. }
  1199. // ----------------------------------------------------------------------------
  1200. // RRDHOST - save all hosts to disk
  1201. void rrdhost_save_all(void) {
  1202. info("RRD: saving databases [%zu hosts(s)]...", rrdhost_hosts_available());
  1203. rrd_rdlock();
  1204. RRDHOST *host;
  1205. rrdhost_foreach_read(host)
  1206. rrdhost_save_charts(host);
  1207. rrd_unlock();
  1208. }
  1209. // ----------------------------------------------------------------------------
  1210. // RRDHOST - save or delete all hosts from disk
  1211. void rrdhost_cleanup_all(void) {
  1212. info("RRD: cleaning up database [%zu hosts(s)]...", rrdhost_hosts_available());
  1213. rrd_rdlock();
  1214. RRDHOST *host;
  1215. rrdhost_foreach_read(host) {
  1216. if (host != localhost && rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) && !host->receiver
  1217. /* don't delete multi-host DB host files */
  1218. && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance))
  1219. )
  1220. rrdhost_delete_charts(host);
  1221. else
  1222. rrdhost_cleanup_charts(host);
  1223. }
  1224. rrd_unlock();
  1225. }
  1226. // ----------------------------------------------------------------------------
  1227. // RRDHOST - set system info from environment variables
  1228. // system_info fields must be heap allocated or NULL
  1229. int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value) {
  1230. int res = 0;
  1231. if (!strcmp(name, "NETDATA_PROTOCOL_VERSION"))
  1232. return res;
  1233. else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_TYPE")){
  1234. freez(system_info->cloud_provider_type);
  1235. system_info->cloud_provider_type = strdupz(value);
  1236. }
  1237. else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE")){
  1238. freez(system_info->cloud_instance_type);
  1239. system_info->cloud_instance_type = strdupz(value);
  1240. }
  1241. else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_INSTANCE_REGION")){
  1242. freez(system_info->cloud_instance_region);
  1243. system_info->cloud_instance_region = strdupz(value);
  1244. }
  1245. else if(!strcmp(name, "NETDATA_CONTAINER_OS_NAME")){
  1246. freez(system_info->container_os_name);
  1247. system_info->container_os_name = strdupz(value);
  1248. }
  1249. else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID")){
  1250. freez(system_info->container_os_id);
  1251. system_info->container_os_id = strdupz(value);
  1252. }
  1253. else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID_LIKE")){
  1254. freez(system_info->container_os_id_like);
  1255. system_info->container_os_id_like = strdupz(value);
  1256. }
  1257. else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION")){
  1258. freez(system_info->container_os_version);
  1259. system_info->container_os_version = strdupz(value);
  1260. }
  1261. else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION_ID")){
  1262. freez(system_info->container_os_version_id);
  1263. system_info->container_os_version_id = strdupz(value);
  1264. }
  1265. else if(!strcmp(name, "NETDATA_CONTAINER_OS_DETECTION")){
  1266. freez(system_info->container_os_detection);
  1267. system_info->container_os_detection = strdupz(value);
  1268. }
  1269. else if(!strcmp(name, "NETDATA_HOST_OS_NAME")){
  1270. freez(system_info->host_os_name);
  1271. system_info->host_os_name = strdupz(value);
  1272. json_fix_string(system_info->host_os_name);
  1273. }
  1274. else if(!strcmp(name, "NETDATA_HOST_OS_ID")){
  1275. freez(system_info->host_os_id);
  1276. system_info->host_os_id = strdupz(value);
  1277. }
  1278. else if(!strcmp(name, "NETDATA_HOST_OS_ID_LIKE")){
  1279. freez(system_info->host_os_id_like);
  1280. system_info->host_os_id_like = strdupz(value);
  1281. }
  1282. else if(!strcmp(name, "NETDATA_HOST_OS_VERSION")){
  1283. freez(system_info->host_os_version);
  1284. system_info->host_os_version = strdupz(value);
  1285. }
  1286. else if(!strcmp(name, "NETDATA_HOST_OS_VERSION_ID")){
  1287. freez(system_info->host_os_version_id);
  1288. system_info->host_os_version_id = strdupz(value);
  1289. }
  1290. else if(!strcmp(name, "NETDATA_HOST_OS_DETECTION")){
  1291. freez(system_info->host_os_detection);
  1292. system_info->host_os_detection = strdupz(value);
  1293. }
  1294. else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_NAME")){
  1295. freez(system_info->kernel_name);
  1296. system_info->kernel_name = strdupz(value);
  1297. }
  1298. else if(!strcmp(name, "NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT")){
  1299. freez(system_info->host_cores);
  1300. system_info->host_cores = strdupz(value);
  1301. }
  1302. else if(!strcmp(name, "NETDATA_SYSTEM_CPU_FREQ")){
  1303. freez(system_info->host_cpu_freq);
  1304. system_info->host_cpu_freq = strdupz(value);
  1305. }
  1306. else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_RAM")){
  1307. freez(system_info->host_ram_total);
  1308. system_info->host_ram_total = strdupz(value);
  1309. }
  1310. else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_DISK_SIZE")){
  1311. freez(system_info->host_disk_space);
  1312. system_info->host_disk_space = strdupz(value);
  1313. }
  1314. else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_VERSION")){
  1315. freez(system_info->kernel_version);
  1316. system_info->kernel_version = strdupz(value);
  1317. }
  1318. else if(!strcmp(name, "NETDATA_SYSTEM_ARCHITECTURE")){
  1319. freez(system_info->architecture);
  1320. system_info->architecture = strdupz(value);
  1321. }
  1322. else if(!strcmp(name, "NETDATA_SYSTEM_VIRTUALIZATION")){
  1323. freez(system_info->virtualization);
  1324. system_info->virtualization = strdupz(value);
  1325. }
  1326. else if(!strcmp(name, "NETDATA_SYSTEM_VIRT_DETECTION")){
  1327. freez(system_info->virt_detection);
  1328. system_info->virt_detection = strdupz(value);
  1329. }
  1330. else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER")){
  1331. freez(system_info->container);
  1332. system_info->container = strdupz(value);
  1333. }
  1334. else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER_DETECTION")){
  1335. freez(system_info->container_detection);
  1336. system_info->container_detection = strdupz(value);
  1337. }
  1338. else if(!strcmp(name, "NETDATA_HOST_IS_K8S_NODE")){
  1339. freez(system_info->is_k8s_node);
  1340. system_info->is_k8s_node = strdupz(value);
  1341. }
  1342. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_VENDOR"))
  1343. return res;
  1344. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_MODEL"))
  1345. return res;
  1346. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_DETECTION"))
  1347. return res;
  1348. else if (!strcmp(name, "NETDATA_SYSTEM_RAM_DETECTION"))
  1349. return res;
  1350. else if (!strcmp(name, "NETDATA_SYSTEM_DISK_DETECTION"))
  1351. return res;
  1352. else if (!strcmp(name, "NETDATA_CONTAINER_IS_OFFICIAL_IMAGE"))
  1353. return res;
  1354. else {
  1355. res = 1;
  1356. }
  1357. return res;
  1358. }