rrdhost.c 64 KB


  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #define NETDATA_RRD_INTERNALS
  3. #include "rrd.h"
  4. RRDHOST *localhost = NULL;
  5. size_t rrd_hosts_available = 0;
  6. netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER;
  7. time_t rrdset_free_obsolete_time = 3600;
  8. time_t rrdhost_free_orphan_time = 3600;
  9. // ----------------------------------------------------------------------------
  10. // RRDHOST index
  11. int rrdhost_compare(void* a, void* b) {
  12. if(((RRDHOST *)a)->hash_machine_guid < ((RRDHOST *)b)->hash_machine_guid) return -1;
  13. else if(((RRDHOST *)a)->hash_machine_guid > ((RRDHOST *)b)->hash_machine_guid) return 1;
  14. else return strcmp(((RRDHOST *)a)->machine_guid, ((RRDHOST *)b)->machine_guid);
  15. }
  16. avl_tree_lock rrdhost_root_index = {
  17. .avl_tree = { NULL, rrdhost_compare },
  18. .rwlock = AVL_LOCK_INITIALIZER
  19. };
  20. RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) {
  21. debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid);
  22. RRDHOST tmp;
  23. strncpyz(tmp.machine_guid, guid, GUID_LEN);
  24. tmp.hash_machine_guid = (hash)?hash:simple_hash(tmp.machine_guid);
  25. return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl_t *) &tmp);
  26. }
  27. RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) {
  28. if(unlikely(!strcmp(hostname, "localhost")))
  29. return localhost;
  30. if(unlikely(!hash)) hash = simple_hash(hostname);
  31. rrd_rdlock();
  32. RRDHOST *host;
  33. rrdhost_foreach_read(host) {
  34. if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) {
  35. rrd_unlock();
  36. return host;
  37. }
  38. }
  39. rrd_unlock();
  40. return NULL;
  41. }
  42. #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl_t *)(rrdhost))
  43. #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl_t *)(rrdhost))
  44. // ----------------------------------------------------------------------------
  45. // RRDHOST - internal helpers
  46. static inline void rrdhost_init_tags(RRDHOST *host, const char *tags) {
  47. if(host->tags && tags && !strcmp(host->tags, tags))
  48. return;
  49. void *old = (void *)host->tags;
  50. host->tags = (tags && *tags)?strdupz(tags):NULL;
  51. freez(old);
  52. }
  53. static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) {
  54. if(host->hostname && hostname && !strcmp(host->hostname, hostname))
  55. return;
  56. void *old = host->hostname;
  57. host->hostname = strdupz(hostname?hostname:"localhost");
  58. host->hash_hostname = simple_hash(host->hostname);
  59. freez(old);
  60. }
  61. static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
  62. if(host->os && os && !strcmp(host->os, os))
  63. return;
  64. void *old = (void *)host->os;
  65. host->os = strdupz(os?os:"unknown");
  66. freez(old);
  67. }
  68. static inline void rrdhost_init_timezone(RRDHOST *host, const char *timezone, const char *abbrev_timezone, int32_t utc_offset) {
  69. if (host->timezone && timezone && !strcmp(host->timezone, timezone) && host->abbrev_timezone && abbrev_timezone &&
  70. !strcmp(host->abbrev_timezone, abbrev_timezone) && host->utc_offset == utc_offset)
  71. return;
  72. void *old = (void *)host->timezone;
  73. host->timezone = strdupz((timezone && *timezone)?timezone:"unknown");
  74. freez(old);
  75. old = (void *)host->abbrev_timezone;
  76. host->abbrev_timezone = strdupz((abbrev_timezone && *abbrev_timezone) ? abbrev_timezone : "UTC");
  77. freez(old);
  78. host->utc_offset = utc_offset;
  79. }
  80. static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) {
  81. strncpy(host->machine_guid, machine_guid, GUID_LEN);
  82. host->machine_guid[GUID_LEN] = '\0';
  83. host->hash_machine_guid = simple_hash(host->machine_guid);
  84. }
  85. void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode, const char *hostname,
  86. const char *registry_hostname, const char *guid, const char *os, const char *tags,
  87. const char *tzone, const char *abbrev_tzone, int32_t utc_offset, const char *program_name,
  88. const char *program_version)
  89. {
  90. host->rrd_update_every = update_every;
  91. host->rrd_memory_mode = memory_mode;
  92. rrdhost_init_hostname(host, hostname);
  93. rrdhost_init_machine_guid(host, guid);
  94. rrdhost_init_os(host, os);
  95. rrdhost_init_timezone(host, tzone, abbrev_tzone, utc_offset);
  96. rrdhost_init_tags(host, tags);
  97. host->program_name = strdupz((program_name && *program_name) ? program_name : "unknown");
  98. host->program_version = strdupz((program_version && *program_version) ? program_version : "unknown");
  99. host->registry_hostname = strdupz((registry_hostname && *registry_hostname) ? registry_hostname : host->hostname);
  100. }
  101. // ----------------------------------------------------------------------------
  102. // RRDHOST - add a host
  103. RRDHOST *rrdhost_create(const char *hostname,
  104. const char *registry_hostname,
  105. const char *guid,
  106. const char *os,
  107. const char *timezone,
  108. const char *abbrev_timezone,
  109. int32_t utc_offset,
  110. const char *tags,
  111. const char *program_name,
  112. const char *program_version,
  113. int update_every,
  114. long entries,
  115. RRD_MEMORY_MODE memory_mode,
  116. unsigned int health_enabled,
  117. unsigned int rrdpush_enabled,
  118. char *rrdpush_destination,
  119. char *rrdpush_api_key,
  120. char *rrdpush_send_charts_matching,
  121. struct rrdhost_system_info *system_info,
  122. int is_localhost
  123. ) {
  124. debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid);
  125. #ifdef ENABLE_DBENGINE
  126. int is_legacy = (memory_mode == RRD_MEMORY_MODE_DBENGINE) && is_legacy_child(guid);
  127. #else
  128. int is_legacy = 1;
  129. #endif
  130. rrd_check_wrlock();
  131. int is_in_multihost = (memory_mode == RRD_MEMORY_MODE_DBENGINE && !is_legacy);
  132. RRDHOST *host = callocz(1, sizeof(RRDHOST));
  133. set_host_properties(host, (update_every > 0)?update_every:1, memory_mode, hostname, registry_hostname, guid, os,
  134. tags, timezone, abbrev_timezone, utc_offset, program_name, program_version);
  135. host->rrd_history_entries = align_entries_to_pagesize(memory_mode, entries);
  136. host->health_enabled = ((memory_mode == RRD_MEMORY_MODE_NONE)) ? 0 : health_enabled;
  137. host->sender = mallocz(sizeof(*host->sender));
  138. sender_init(host->sender, host);
  139. netdata_mutex_init(&host->receiver_lock);
  140. host->rrdpush_send_enabled = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) ? 1 : 0;
  141. host->rrdpush_send_destination = (host->rrdpush_send_enabled)?strdupz(rrdpush_destination):NULL;
  142. if (host->rrdpush_send_destination)
  143. host->destinations = destinations_init(host->rrdpush_send_destination);
  144. host->rrdpush_send_api_key = (host->rrdpush_send_enabled)?strdupz(rrdpush_api_key):NULL;
  145. host->rrdpush_send_charts_matching = simple_pattern_create(rrdpush_send_charts_matching, NULL, SIMPLE_PATTERN_EXACT);
  146. host->rrdpush_sender_pipe[0] = -1;
  147. host->rrdpush_sender_pipe[1] = -1;
  148. host->rrdpush_sender_socket = -1;
  149. //host->stream_version = STREAMING_PROTOCOL_CURRENT_VERSION; Unused?
  150. #ifdef ENABLE_HTTPS
  151. host->ssl.conn = NULL;
  152. host->ssl.flags = NETDATA_SSL_START;
  153. host->stream_ssl.conn = NULL;
  154. host->stream_ssl.flags = NETDATA_SSL_START;
  155. #endif
  156. netdata_rwlock_init(&host->rrdhost_rwlock);
  157. netdata_rwlock_init(&host->labels.labels_rwlock);
  158. netdata_mutex_init(&host->aclk_state_lock);
  159. host->system_info = system_info;
  160. avl_init_lock(&(host->rrdset_root_index), rrdset_compare);
  161. avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name);
  162. avl_init_lock(&(host->rrdfamily_root_index), rrdfamily_compare);
  163. avl_init_lock(&(host->rrdvar_root_index), rrdvar_compare);
  164. if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", 1))
  165. rrdhost_flag_set(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS);
  166. if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", 1) && !is_localhost)
  167. rrdhost_flag_set(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST);
  168. host->health_default_warn_repeat_every = config_get_duration(CONFIG_SECTION_HEALTH, "default repeat warning", "never");
  169. host->health_default_crit_repeat_every = config_get_duration(CONFIG_SECTION_HEALTH, "default repeat critical", "never");
  170. avl_init_lock(&(host->alarms_idx_health_log), alarm_compare_id);
  171. avl_init_lock(&(host->alarms_idx_name), alarm_compare_name);
  172. // ------------------------------------------------------------------------
  173. // initialize health variables
  174. host->health_log.next_log_id = 1;
  175. host->health_log.next_alarm_id = 1;
  176. host->health_log.max = 1000;
  177. host->health_log.next_log_id = (uint32_t)now_realtime_sec();
  178. host->health_log.next_alarm_id = 0;
  179. long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max);
  180. if(n < 10) {
  181. error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max);
  182. config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max);
  183. }
  184. else
  185. host->health_log.max = (unsigned int)n;
  186. netdata_rwlock_init(&host->health_log.alarm_log_rwlock);
  187. char filename[FILENAME_MAX + 1];
  188. if(is_localhost) {
  189. host->cache_dir = strdupz(netdata_configured_cache_dir);
  190. host->varlib_dir = strdupz(netdata_configured_varlib_dir);
  191. }
  192. else {
  193. // this is not localhost - append our GUID to localhost path
  194. if (is_in_multihost) { // don't append to cache dir in multihost
  195. host->cache_dir = strdupz(netdata_configured_cache_dir);
  196. } else {
  197. snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid);
  198. host->cache_dir = strdupz(filename);
  199. }
  200. if((host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || (
  201. host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_legacy))) {
  202. int r = mkdir(host->cache_dir, 0775);
  203. if(r != 0 && errno != EEXIST)
  204. error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir);
  205. }
  206. snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid);
  207. host->varlib_dir = strdupz(filename);
  208. if(host->health_enabled) {
  209. int r = mkdir(host->varlib_dir, 0775);
  210. if(r != 0 && errno != EEXIST)
  211. error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
  212. }
  213. }
  214. if(host->health_enabled) {
  215. snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
  216. int r = mkdir(filename, 0775);
  217. if(r != 0 && errno != EEXIST)
  218. error("Host '%s': cannot create directory '%s'", host->hostname, filename);
  219. }
  220. snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
  221. host->health_log_filename = strdupz(filename);
  222. snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_primary_plugins_dir);
  223. host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
  224. host->health_default_recipient = strdupz("root");
  225. // ------------------------------------------------------------------------
  226. // load health configuration
  227. if(host->health_enabled) {
  228. rrdhost_wrlock(host);
  229. health_readdir(host, health_user_config_dir(), health_stock_config_dir(), NULL);
  230. rrdhost_unlock(host);
  231. }
  232. RRDHOST *t = rrdhost_index_add(host);
  233. if(t != host) {
  234. error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid);
  235. rrdhost_free(host);
  236. return NULL;
  237. }
  238. if (likely(!uuid_parse(host->machine_guid, host->host_uuid))) {
  239. int rc = sql_store_host(&host->host_uuid, hostname, registry_hostname, update_every, os, timezone, tags);
  240. if (unlikely(rc))
  241. error_report("Failed to store machine GUID to the database");
  242. sql_load_node_id(host);
  243. if (host->health_enabled) {
  244. if (!file_is_migrated(host->health_log_filename)) {
  245. int rc = sql_create_health_log_table(host);
  246. if (unlikely(rc)) {
  247. error_report("Failed to create health log table in the database");
  248. health_alarm_log_load(host);
  249. health_alarm_log_open(host);
  250. }
  251. else {
  252. health_alarm_log_load(host);
  253. add_migrated_file(host->health_log_filename, 0);
  254. }
  255. } else {
  256. sql_create_health_log_table(host);
  257. sql_health_alarm_log_load(host);
  258. }
  259. }
  260. }
  261. else
  262. error_report("Host machine GUID %s is not valid", host->machine_guid);
  263. if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  264. #ifdef ENABLE_DBENGINE
  265. char dbenginepath[FILENAME_MAX + 1];
  266. int ret;
  267. snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", host->cache_dir);
  268. ret = mkdir(dbenginepath, 0775);
  269. if (ret != 0 && errno != EEXIST)
  270. error("Host '%s': cannot create directory '%s'", host->hostname, dbenginepath);
  271. else ret = 0; // succeed
  272. if (is_legacy) // initialize legacy dbengine instance as needed
  273. ret = rrdeng_init(host, &host->rrdeng_ctx, dbenginepath, default_rrdeng_page_cache_mb,
  274. default_rrdeng_disk_quota_mb); // may fail here for legacy dbengine initialization
  275. else
  276. host->rrdeng_ctx = &multidb_ctx;
  277. if (ret) { // check legacy or multihost initialization success
  278. error(
  279. "Host '%s': cannot initialize host with machine guid '%s'. Failed to initialize DB engine at '%s'.",
  280. host->hostname, host->machine_guid, host->cache_dir);
  281. rrdhost_free(host);
  282. host = NULL;
  283. //rrd_hosts_available++; //TODO: maybe we want this?
  284. return host;
  285. }
  286. #else
  287. fatal("RRD_MEMORY_MODE_DBENGINE is not supported in this platform.");
  288. #endif
  289. }
  290. else {
  291. #ifdef ENABLE_DBENGINE
  292. host->rrdeng_ctx = &multidb_ctx;
  293. #endif
  294. }
  295. // ------------------------------------------------------------------------
  296. // link it and add it to the index
  297. if(is_localhost) {
  298. host->next = localhost;
  299. localhost = host;
  300. }
  301. else {
  302. if(localhost) {
  303. host->next = localhost->next;
  304. localhost->next = host;
  305. }
  306. else localhost = host;
  307. }
  308. // ------------------------------------------------------------------------
  309. // init new ML host and update system_info to let upstreams know
  310. // about ML functionality
  311. //
  312. if (is_localhost && host->system_info) {
  313. host->system_info->ml_capable = ml_capable();
  314. host->system_info->ml_enabled = ml_enabled(host);
  315. host->system_info->mc_version = enable_metric_correlations ? metric_correlations_version : 0;
  316. }
  317. ml_new_host(host);
  318. info("Host '%s' (at registry as '%s') with guid '%s' initialized"
  319. ", os '%s'"
  320. ", timezone '%s'"
  321. ", tags '%s'"
  322. ", program_name '%s'"
  323. ", program_version '%s'"
  324. ", update every %d"
  325. ", memory mode %s"
  326. ", history entries %ld"
  327. ", streaming %s"
  328. " (to '%s' with api key '%s')"
  329. ", health %s"
  330. ", cache_dir '%s'"
  331. ", varlib_dir '%s'"
  332. ", health_log '%s'"
  333. ", alarms default handler '%s'"
  334. ", alarms default recipient '%s'"
  335. , host->hostname
  336. , host->registry_hostname
  337. , host->machine_guid
  338. , host->os
  339. , host->timezone
  340. , (host->tags)?host->tags:""
  341. , host->program_name
  342. , host->program_version
  343. , host->rrd_update_every
  344. , rrd_memory_mode_name(host->rrd_memory_mode)
  345. , host->rrd_history_entries
  346. , host->rrdpush_send_enabled?"enabled":"disabled"
  347. , host->rrdpush_send_destination?host->rrdpush_send_destination:""
  348. , host->rrdpush_send_api_key?host->rrdpush_send_api_key:""
  349. , host->health_enabled?"enabled":"disabled"
  350. , host->cache_dir
  351. , host->varlib_dir
  352. , host->health_log_filename
  353. , host->health_default_exec
  354. , host->health_default_recipient
  355. );
  356. rrd_hosts_available++;
  357. return host;
  358. }
  359. void rrdhost_update(RRDHOST *host
  360. , const char *hostname
  361. , const char *registry_hostname
  362. , const char *guid
  363. , const char *os
  364. , const char *timezone
  365. , const char *abbrev_timezone
  366. , int32_t utc_offset
  367. , const char *tags
  368. , const char *program_name
  369. , const char *program_version
  370. , int update_every
  371. , long history
  372. , RRD_MEMORY_MODE mode
  373. , unsigned int health_enabled
  374. , unsigned int rrdpush_enabled
  375. , char *rrdpush_destination
  376. , char *rrdpush_api_key
  377. , char *rrdpush_send_charts_matching
  378. , struct rrdhost_system_info *system_info
  379. )
  380. {
  381. UNUSED(guid);
  382. UNUSED(rrdpush_enabled);
  383. UNUSED(rrdpush_destination);
  384. UNUSED(rrdpush_api_key);
  385. UNUSED(rrdpush_send_charts_matching);
  386. host->health_enabled = (mode == RRD_MEMORY_MODE_NONE) ? 0 : health_enabled;
  387. //host->stream_version = STREAMING_PROTOCOL_CURRENT_VERSION; Unused?
  388. rrdhost_system_info_free(host->system_info);
  389. host->system_info = system_info;
  390. rrdhost_init_os(host, os);
  391. rrdhost_init_timezone(host, timezone, abbrev_timezone, utc_offset);
  392. freez(host->registry_hostname);
  393. host->registry_hostname = strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname);
  394. if(strcmp(host->hostname, hostname) != 0) {
  395. info("Host '%s' has been renamed to '%s'. If this is not intentional it may mean multiple hosts are using the same machine_guid.", host->hostname, hostname);
  396. char *t = host->hostname;
  397. host->hostname = strdupz(hostname);
  398. host->hash_hostname = simple_hash(host->hostname);
  399. freez(t);
  400. }
  401. if(strcmp(host->program_name, program_name) != 0) {
  402. info("Host '%s' switched program name from '%s' to '%s'", host->hostname, host->program_name, program_name);
  403. char *t = host->program_name;
  404. host->program_name = strdupz(program_name);
  405. freez(t);
  406. }
  407. if(strcmp(host->program_version, program_version) != 0) {
  408. info("Host '%s' switched program version from '%s' to '%s'", host->hostname, host->program_version, program_version);
  409. char *t = host->program_version;
  410. host->program_version = strdupz(program_version);
  411. freez(t);
  412. }
  413. if(host->rrd_update_every != update_every)
  414. error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds. Restart netdata here to apply the new settings.", host->hostname, host->rrd_update_every, update_every);
  415. if(host->rrd_history_entries < history)
  416. error("Host '%s' has history of %ld entries, but the wanted one is %ld entries. Restart netdata here to apply the new settings.", host->hostname, host->rrd_history_entries, history);
  417. if(host->rrd_memory_mode != mode)
  418. error("Host '%s' has memory mode '%s', but the wanted one is '%s'. Restart netdata here to apply the new settings.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
  419. // update host tags
  420. rrdhost_init_tags(host, tags);
  421. if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) {
  422. rrdhost_flag_clear(host, RRDHOST_FLAG_ARCHIVED);
  423. if(host->health_enabled) {
  424. int r;
  425. char filename[FILENAME_MAX + 1];
  426. if (host != localhost) {
  427. r = mkdir(host->varlib_dir, 0775);
  428. if (r != 0 && errno != EEXIST)
  429. error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
  430. }
  431. snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
  432. r = mkdir(filename, 0775);
  433. if(r != 0 && errno != EEXIST)
  434. error("Host '%s': cannot create directory '%s'", host->hostname, filename);
  435. rrdhost_wrlock(host);
  436. health_readdir(host, health_user_config_dir(), health_stock_config_dir(), NULL);
  437. rrdhost_unlock(host);
  438. if (!file_is_migrated(host->health_log_filename)) {
  439. int rc = sql_create_health_log_table(host);
  440. if (unlikely(rc)) {
  441. error_report("Failed to create health log table in the database");
  442. health_alarm_log_load(host);
  443. health_alarm_log_open(host);
  444. } else {
  445. health_alarm_log_load(host);
  446. add_migrated_file(host->health_log_filename, 0);
  447. }
  448. } else {
  449. sql_create_health_log_table(host);
  450. sql_health_alarm_log_load(host);
  451. }
  452. }
  453. rrd_hosts_available++;
  454. info("Host %s is not in archived mode anymore", host->hostname);
  455. }
  456. return;
  457. }
  458. RRDHOST *rrdhost_find_or_create(
  459. const char *hostname
  460. , const char *registry_hostname
  461. , const char *guid
  462. , const char *os
  463. , const char *timezone
  464. , const char *abbrev_timezone
  465. , int32_t utc_offset
  466. , const char *tags
  467. , const char *program_name
  468. , const char *program_version
  469. , int update_every
  470. , long history
  471. , RRD_MEMORY_MODE mode
  472. , unsigned int health_enabled
  473. , unsigned int rrdpush_enabled
  474. , char *rrdpush_destination
  475. , char *rrdpush_api_key
  476. , char *rrdpush_send_charts_matching
  477. , struct rrdhost_system_info *system_info
  478. ) {
  479. debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
  480. rrd_wrlock();
  481. RRDHOST *host = rrdhost_find_by_guid(guid, 0);
  482. if (unlikely(host && RRD_MEMORY_MODE_DBENGINE != mode && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) {
  483. /* If a legacy memory mode instantiates all dbengine state must be discarded to avoid inconsistencies */
  484. error("Archived host '%s' has memory mode '%s', but the wanted one is '%s'. Discarding archived state.",
  485. host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
  486. rrdhost_free(host);
  487. host = NULL;
  488. }
  489. if(!host) {
  490. host = rrdhost_create(
  491. hostname
  492. , registry_hostname
  493. , guid
  494. , os
  495. , timezone
  496. , abbrev_timezone
  497. , utc_offset
  498. , tags
  499. , program_name
  500. , program_version
  501. , update_every
  502. , history
  503. , mode
  504. , health_enabled
  505. , rrdpush_enabled
  506. , rrdpush_destination
  507. , rrdpush_api_key
  508. , rrdpush_send_charts_matching
  509. , system_info
  510. , 0
  511. );
  512. }
  513. else {
  514. rrdhost_update(host
  515. , hostname
  516. , registry_hostname
  517. , guid
  518. , os
  519. , timezone
  520. , abbrev_timezone
  521. , utc_offset
  522. , tags
  523. , program_name
  524. , program_version
  525. , update_every
  526. , history
  527. , mode
  528. , health_enabled
  529. , rrdpush_enabled
  530. , rrdpush_destination
  531. , rrdpush_api_key
  532. , rrdpush_send_charts_matching
  533. , system_info);
  534. }
  535. if (host) {
  536. rrdhost_wrlock(host);
  537. rrdhost_flag_clear(host, RRDHOST_FLAG_ORPHAN);
  538. host->senders_disconnected_time = 0;
  539. rrdhost_unlock(host);
  540. }
  541. rrd_unlock();
  542. return host;
  543. }
  544. inline int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected_host, time_t now) {
  545. if(host != protected_host
  546. && host != localhost
  547. && rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN)
  548. && !host->receiver
  549. && host->senders_disconnected_time
  550. && host->senders_disconnected_time + rrdhost_free_orphan_time < now)
  551. return 1;
  552. return 0;
  553. }
  554. void rrdhost_cleanup_orphan_hosts_nolock(RRDHOST *protected_host) {
  555. time_t now = now_realtime_sec();
  556. RRDHOST *host;
  557. restart_after_removal:
  558. rrdhost_foreach_write(host) {
  559. if(rrdhost_should_be_removed(host, protected_host, now)) {
  560. info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", host->hostname, host->machine_guid);
  561. if (rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST)
  562. #ifdef ENABLE_DBENGINE
  563. /* don't delete multi-host DB host files */
  564. && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && host->rrdeng_ctx == &multidb_ctx)
  565. #endif
  566. )
  567. rrdhost_delete_charts(host);
  568. else
  569. rrdhost_save_charts(host);
  570. rrdhost_free(host);
  571. goto restart_after_removal;
  572. }
  573. }
  574. }
  575. // ----------------------------------------------------------------------------
  576. // RRDHOST global / startup initialization
  577. int rrd_init(char *hostname, struct rrdhost_system_info *system_info) {
  578. rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", rrdset_free_obsolete_time);
  579. // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short
  580. // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at
  581. // https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information.
  582. if (rrdset_free_obsolete_time < 10) {
  583. rrdset_free_obsolete_time = 10;
  584. info("The \"cleanup obsolete charts after seconds\" option was set to 10 seconds. A lower delay can potentially cause a segmentation fault.");
  585. }
  586. gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_GLOBAL, "gap when lost iterations above", gap_when_lost_iterations_above);
  587. if (gap_when_lost_iterations_above < 1)
  588. gap_when_lost_iterations_above = 1;
  589. if (unlikely(sql_init_database(DB_CHECK_NONE, 0))) {
  590. if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE)
  591. fatal("Failed to initialize SQLite");
  592. info("Skipping SQLITE metadata initialization since memory mode is not db engine");
  593. }
  594. health_init();
  595. rrdpush_init();
  596. debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
  597. rrd_wrlock();
  598. localhost = rrdhost_create(
  599. hostname
  600. , registry_get_this_machine_hostname()
  601. , registry_get_this_machine_guid()
  602. , os_type
  603. , netdata_configured_timezone
  604. , netdata_configured_abbrev_timezone
  605. , netdata_configured_utc_offset
  606. , ""
  607. , program_name
  608. , program_version
  609. , default_rrd_update_every
  610. , default_rrd_history_entries
  611. , default_rrd_memory_mode
  612. , default_health_enabled
  613. , default_rrdpush_enabled
  614. , default_rrdpush_destination
  615. , default_rrdpush_api_key
  616. , default_rrdpush_send_charts_matching
  617. , system_info
  618. , 1
  619. );
  620. if (unlikely(!localhost)) {
  621. rrd_unlock();
  622. return 1;
  623. }
  624. #ifdef ENABLE_DBENGINE
  625. char dbenginepath[FILENAME_MAX + 1];
  626. int ret;
  627. snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", localhost->cache_dir);
  628. ret = mkdir(dbenginepath, 0775);
  629. if (ret != 0 && errno != EEXIST)
  630. error("Host '%s': cannot create directory '%s'", localhost->hostname, dbenginepath);
  631. else // Unconditionally create multihost db to support on demand host creation
  632. ret = rrdeng_init(NULL, NULL, dbenginepath, default_rrdeng_page_cache_mb, default_multidb_disk_quota_mb);
  633. if (ret) {
  634. error(
  635. "Host '%s' with machine guid '%s' failed to initialize multi-host DB engine instance at '%s'.",
  636. localhost->hostname, localhost->machine_guid, localhost->cache_dir);
  637. rrdhost_free(localhost);
  638. localhost = NULL;
  639. rrd_unlock();
  640. fatal("Failed to initialize dbengine");
  641. }
  642. #endif
  643. sql_aclk_sync_init();
  644. rrd_unlock();
  645. web_client_api_v1_management_init();
  646. return localhost==NULL;
  647. }
  648. // ----------------------------------------------------------------------------
  649. // RRDHOST - lock validations
  650. // there are only used when NETDATA_INTERNAL_CHECKS is set
  651. void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
  652. debug(D_RRDHOST, "Checking read lock on host '%s'", host->hostname);
  653. int ret = netdata_rwlock_trywrlock(&host->rrdhost_rwlock);
  654. if(ret == 0)
  655. fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
  656. }
  657. void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
  658. debug(D_RRDHOST, "Checking write lock on host '%s'", host->hostname);
  659. int ret = netdata_rwlock_tryrdlock(&host->rrdhost_rwlock);
  660. if(ret == 0)
  661. fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
  662. }
  663. void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line) {
  664. debug(D_RRDHOST, "Checking read lock on all RRDs");
  665. int ret = netdata_rwlock_trywrlock(&rrd_rwlock);
  666. if(ret == 0)
  667. fatal("RRDs should be read-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
  668. }
  669. void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line) {
  670. debug(D_RRDHOST, "Checking write lock on all RRDs");
  671. int ret = netdata_rwlock_tryrdlock(&rrd_rwlock);
  672. if(ret == 0)
  673. fatal("RRDs should be write-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
  674. }
  675. // ----------------------------------------------------------------------------
  676. // RRDHOST - free
  677. void rrdhost_system_info_free(struct rrdhost_system_info *system_info) {
  678. info("SYSTEM_INFO: free %p", system_info);
  679. if(likely(system_info)) {
  680. freez(system_info->cloud_provider_type);
  681. freez(system_info->cloud_instance_type);
  682. freez(system_info->cloud_instance_region);
  683. freez(system_info->host_os_name);
  684. freez(system_info->host_os_id);
  685. freez(system_info->host_os_id_like);
  686. freez(system_info->host_os_version);
  687. freez(system_info->host_os_version_id);
  688. freez(system_info->host_os_detection);
  689. freez(system_info->host_cores);
  690. freez(system_info->host_cpu_freq);
  691. freez(system_info->host_ram_total);
  692. freez(system_info->host_disk_space);
  693. freez(system_info->container_os_name);
  694. freez(system_info->container_os_id);
  695. freez(system_info->container_os_id_like);
  696. freez(system_info->container_os_version);
  697. freez(system_info->container_os_version_id);
  698. freez(system_info->container_os_detection);
  699. freez(system_info->kernel_name);
  700. freez(system_info->kernel_version);
  701. freez(system_info->architecture);
  702. freez(system_info->virtualization);
  703. freez(system_info->virt_detection);
  704. freez(system_info->container);
  705. freez(system_info->container_detection);
  706. freez(system_info->is_k8s_node);
  707. freez(system_info->install_type);
  708. freez(system_info->prebuilt_arch);
  709. freez(system_info->prebuilt_dist);
  710. freez(system_info);
  711. }
  712. }
  713. void destroy_receiver_state(struct receiver_state *rpt);
  714. void rrdhost_free(RRDHOST *host) {
  715. if(!host) return;
  716. info("Freeing all memory for host '%s'...", host->hostname);
  717. rrd_check_wrlock(); // make sure the RRDs are write locked
  718. rrdhost_wrlock(host);
  719. ml_delete_host(host);
  720. rrdhost_unlock(host);
  721. // ------------------------------------------------------------------------
  722. // clean up streaming
  723. rrdpush_sender_thread_stop(host); // stop a possibly running thread
  724. cbuffer_free(host->sender->buffer);
  725. buffer_free(host->sender->build);
  726. #ifdef ENABLE_COMPRESSION
  727. if (host->sender->compressor)
  728. host->sender->compressor->destroy(&host->sender->compressor);
  729. #endif
  730. freez(host->sender);
  731. host->sender = NULL;
  732. if (netdata_exit) {
  733. netdata_mutex_lock(&host->receiver_lock);
  734. if (host->receiver) {
  735. if (!host->receiver->exited)
  736. netdata_thread_cancel(host->receiver->thread);
  737. netdata_mutex_unlock(&host->receiver_lock);
  738. struct receiver_state *rpt = host->receiver;
  739. while (host->receiver && !rpt->exited)
  740. sleep_usec(50 * USEC_PER_MS);
  741. // If the receiver detached from the host then its thread will destroy the state
  742. if (host->receiver == rpt)
  743. destroy_receiver_state(host->receiver);
  744. }
  745. else
  746. netdata_mutex_unlock(&host->receiver_lock);
  747. }
  748. rrdhost_wrlock(host); // lock this RRDHOST
  749. #if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
  750. struct aclk_database_worker_config *wc = host->dbsync_worker;
  751. if (wc && !netdata_exit) {
  752. struct aclk_database_cmd cmd;
  753. memset(&cmd, 0, sizeof(cmd));
  754. cmd.opcode = ACLK_DATABASE_ORPHAN_HOST;
  755. struct aclk_completion compl ;
  756. init_aclk_completion(&compl );
  757. cmd.completion = &compl ;
  758. aclk_database_enq_cmd(wc, &cmd);
  759. wait_for_aclk_completion(&compl );
  760. destroy_aclk_completion(&compl );
  761. }
  762. #endif
  763. // ------------------------------------------------------------------------
  764. // release its children resources
  765. #ifdef ENABLE_DBENGINE
  766. if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  767. if (host->rrdeng_ctx != &multidb_ctx)
  768. rrdeng_prepare_exit(host->rrdeng_ctx);
  769. }
  770. #endif
  771. while(host->rrdset_root)
  772. rrdset_free(host->rrdset_root);
  773. freez(host->exporting_flags);
  774. while(host->alarms)
  775. rrdcalc_unlink_and_free(host, host->alarms);
  776. RRDCALC *rc,*nc;
  777. for(rc = host->alarms_with_foreach; rc ; rc = nc) {
  778. nc = rc->next;
  779. rrdcalc_free(rc);
  780. }
  781. host->alarms_with_foreach = NULL;
  782. while(host->templates)
  783. rrdcalctemplate_unlink_and_free(host, host->templates);
  784. RRDCALCTEMPLATE *rt,*next;
  785. for(rt = host->alarms_template_with_foreach; rt ; rt = next) {
  786. next = rt->next;
  787. rrdcalctemplate_free(rt);
  788. }
  789. host->alarms_template_with_foreach = NULL;
  790. debug(D_RRD_CALLS, "RRDHOST: Cleaning up remaining host variables for host '%s'", host->hostname);
  791. rrdvar_free_remaining_variables(host, &host->rrdvar_root_index);
  792. health_alarm_log_free(host);
  793. #ifdef ENABLE_DBENGINE
  794. if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && host->rrdeng_ctx != &multidb_ctx)
  795. rrdeng_exit(host->rrdeng_ctx);
  796. #endif
  797. // ------------------------------------------------------------------------
  798. // remove it from the indexes
  799. if(rrdhost_index_del(host) != host)
  800. error("RRDHOST '%s' removed from index, deleted the wrong entry.", host->hostname);
  801. // ------------------------------------------------------------------------
  802. // unlink it from the host
  803. if(host == localhost) {
  804. localhost = host->next;
  805. }
  806. else {
  807. // find the previous one
  808. RRDHOST *h;
  809. for(h = localhost; h && h->next != host ; h = h->next) ;
  810. // bypass it
  811. if(h) h->next = host->next;
  812. else error("Request to free RRDHOST '%s': cannot find it", host->hostname);
  813. }
  814. // ------------------------------------------------------------------------
  815. // free it
  816. pthread_mutex_destroy(&host->aclk_state_lock);
  817. freez(host->aclk_state.claimed_id);
  818. freez(host->aclk_state.prev_claimed_id);
  819. freez((void *)host->tags);
  820. free_label_list(host->labels.head);
  821. freez((void *)host->os);
  822. freez((void *)host->timezone);
  823. freez((void *)host->abbrev_timezone);
  824. freez(host->program_version);
  825. freez(host->program_name);
  826. rrdhost_system_info_free(host->system_info);
  827. freez(host->cache_dir);
  828. freez(host->varlib_dir);
  829. freez(host->rrdpush_send_api_key);
  830. freez(host->rrdpush_send_destination);
  831. freez(host->health_default_exec);
  832. freez(host->health_default_recipient);
  833. freez(host->health_log_filename);
  834. freez(host->hostname);
  835. freez(host->registry_hostname);
  836. simple_pattern_free(host->rrdpush_send_charts_matching);
  837. rrdhost_unlock(host);
  838. netdata_rwlock_destroy(&host->labels.labels_rwlock);
  839. netdata_rwlock_destroy(&host->health_log.alarm_log_rwlock);
  840. netdata_rwlock_destroy(&host->rrdhost_rwlock);
  841. freez(host->node_id);
  842. freez(host);
  843. #if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
  844. if (wc)
  845. wc->is_orphan = 0;
  846. #endif
  847. rrd_hosts_available--;
  848. }
  849. void rrdhost_free_all(void) {
  850. rrd_wrlock();
  851. /* Make sure child-hosts are released before the localhost. */
  852. while(localhost->next) rrdhost_free(localhost->next);
  853. rrdhost_free(localhost);
  854. rrd_unlock();
  855. }
  856. // ----------------------------------------------------------------------------
  857. // RRDHOST - save host files
  858. void rrdhost_save_charts(RRDHOST *host) {
  859. if(!host) return;
  860. info("Saving/Closing database of host '%s'...", host->hostname);
  861. RRDSET *st;
  862. // we get a write lock
  863. // to ensure only one thread is saving the database
  864. rrdhost_wrlock(host);
  865. rrdset_foreach_write(st, host) {
  866. rrdset_rdlock(st);
  867. rrdset_save(st);
  868. rrdset_unlock(st);
  869. }
  870. rrdhost_unlock(host);
  871. }
  872. static struct label *rrdhost_load_auto_labels(void)
  873. {
  874. struct label *label_list = NULL;
  875. if (localhost->system_info->cloud_provider_type)
  876. label_list =
  877. add_label_to_list(label_list, "_cloud_provider_type", localhost->system_info->cloud_provider_type, LABEL_SOURCE_AUTO);
  878. if (localhost->system_info->cloud_instance_type)
  879. label_list =
  880. add_label_to_list(label_list, "_cloud_instance_type", localhost->system_info->cloud_instance_type, LABEL_SOURCE_AUTO);
  881. if (localhost->system_info->cloud_instance_region)
  882. label_list =
  883. add_label_to_list(label_list, "_cloud_instance_region", localhost->system_info->cloud_instance_region, LABEL_SOURCE_AUTO);
  884. if (localhost->system_info->host_os_name)
  885. label_list =
  886. add_label_to_list(label_list, "_os_name", localhost->system_info->host_os_name, LABEL_SOURCE_AUTO);
  887. if (localhost->system_info->host_os_version)
  888. label_list =
  889. add_label_to_list(label_list, "_os_version", localhost->system_info->host_os_version, LABEL_SOURCE_AUTO);
  890. if (localhost->system_info->kernel_version)
  891. label_list =
  892. add_label_to_list(label_list, "_kernel_version", localhost->system_info->kernel_version, LABEL_SOURCE_AUTO);
  893. if (localhost->system_info->host_cores)
  894. label_list =
  895. add_label_to_list(label_list, "_system_cores", localhost->system_info->host_cores, LABEL_SOURCE_AUTO);
  896. if (localhost->system_info->host_cpu_freq)
  897. label_list =
  898. add_label_to_list(label_list, "_system_cpu_freq", localhost->system_info->host_cpu_freq, LABEL_SOURCE_AUTO);
  899. if (localhost->system_info->host_ram_total)
  900. label_list =
  901. add_label_to_list(label_list, "_system_ram_total", localhost->system_info->host_ram_total, LABEL_SOURCE_AUTO);
  902. if (localhost->system_info->host_disk_space)
  903. label_list =
  904. add_label_to_list(label_list, "_system_disk_space", localhost->system_info->host_disk_space, LABEL_SOURCE_AUTO);
  905. if (localhost->system_info->architecture)
  906. label_list =
  907. add_label_to_list(label_list, "_architecture", localhost->system_info->architecture, LABEL_SOURCE_AUTO);
  908. if (localhost->system_info->virtualization)
  909. label_list =
  910. add_label_to_list(label_list, "_virtualization", localhost->system_info->virtualization, LABEL_SOURCE_AUTO);
  911. if (localhost->system_info->container)
  912. label_list =
  913. add_label_to_list(label_list, "_container", localhost->system_info->container, LABEL_SOURCE_AUTO);
  914. if (localhost->system_info->container_detection)
  915. label_list =
  916. add_label_to_list(label_list, "_container_detection", localhost->system_info->container_detection, LABEL_SOURCE_AUTO);
  917. if (localhost->system_info->virt_detection)
  918. label_list =
  919. add_label_to_list(label_list, "_virt_detection", localhost->system_info->virt_detection, LABEL_SOURCE_AUTO);
  920. if (localhost->system_info->is_k8s_node)
  921. label_list =
  922. add_label_to_list(label_list, "_is_k8s_node", localhost->system_info->is_k8s_node, LABEL_SOURCE_AUTO);
  923. if (localhost->system_info->install_type)
  924. label_list =
  925. add_label_to_list(label_list, "_install_type", localhost->system_info->install_type, LABEL_SOURCE_AUTO);
  926. if (localhost->system_info->prebuilt_arch)
  927. label_list =
  928. add_label_to_list(label_list, "_prebuilt_arch", localhost->system_info->prebuilt_arch, LABEL_SOURCE_AUTO);
  929. if (localhost->system_info->prebuilt_dist)
  930. label_list =
  931. add_label_to_list(label_list, "_prebuilt_dist", localhost->system_info->prebuilt_dist, LABEL_SOURCE_AUTO);
  932. label_list = add_aclk_host_labels(label_list);
  933. label_list = add_label_to_list(
  934. label_list, "_is_parent", (localhost->next || configured_as_parent()) ? "true" : "false", LABEL_SOURCE_AUTO);
  935. if (localhost->rrdpush_send_destination)
  936. label_list =
  937. add_label_to_list(label_list, "_streams_to", localhost->rrdpush_send_destination, LABEL_SOURCE_AUTO);
  938. return label_list;
  939. }
  940. static inline int rrdhost_is_valid_label_config_option(char *name, char *value)
  941. {
  942. return (is_valid_label_key(name) && is_valid_label_value(value) && strcmp(name, "from environment") &&
  943. strcmp(name, "from kubernetes pods"));
  944. }
  945. static struct label *rrdhost_load_config_labels()
  946. {
  947. int status = config_load(NULL, 1, CONFIG_SECTION_HOST_LABEL);
  948. if(!status) {
  949. char *filename = CONFIG_DIR "/" CONFIG_FILENAME;
  950. error("LABEL: Cannot reload the configuration file '%s', using labels in memory", filename);
  951. }
  952. struct label *l = NULL;
  953. struct section *co = appconfig_get_section(&netdata_config, CONFIG_SECTION_HOST_LABEL);
  954. if(co) {
  955. config_section_wrlock(co);
  956. struct config_option *cv;
  957. for(cv = co->values; cv ; cv = cv->next) {
  958. if(rrdhost_is_valid_label_config_option(cv->name, cv->value)) {
  959. l = add_label_to_list(l, cv->name, cv->value, LABEL_SOURCE_NETDATA_CONF);
  960. cv->flags |= CONFIG_VALUE_USED;
  961. } else {
  962. error("LABELS: It was not possible to create the label '%s' because it contains invalid character(s) or values."
  963. , cv->name);
  964. }
  965. }
  966. config_section_unlock(co);
  967. }
  968. return l;
  969. }
  970. struct label *parse_simple_tags(
  971. struct label *label_list,
  972. const char *tags,
  973. char key_value_separator,
  974. char label_separator,
  975. STRIP_QUOTES_OPTION strip_quotes_from_key,
  976. STRIP_QUOTES_OPTION strip_quotes_from_value,
  977. SKIP_ESCAPED_CHARACTERS_OPTION skip_escaped_characters)
  978. {
  979. const char *end = tags;
  980. while (*end) {
  981. const char *start = end;
  982. char key[CONFIG_MAX_VALUE + 1];
  983. char value[CONFIG_MAX_VALUE + 1];
  984. while (*end && *end != key_value_separator)
  985. end++;
  986. strncpyz(key, start, end - start);
  987. if (*end)
  988. start = ++end;
  989. while (*end && *end != label_separator)
  990. end++;
  991. strncpyz(value, start, end - start);
  992. label_list = add_label_to_list(
  993. label_list,
  994. strip_quotes_from_key ? strip_double_quotes(trim(key), skip_escaped_characters) : trim(key),
  995. strip_quotes_from_value ? strip_double_quotes(trim(value), skip_escaped_characters) : trim(value),
  996. LABEL_SOURCE_NETDATA_CONF);
  997. if (*end)
  998. end++;
  999. }
  1000. return label_list;
  1001. }
  1002. struct label *parse_json_tags(struct label *label_list, const char *tags)
  1003. {
  1004. char tags_buf[CONFIG_MAX_VALUE + 1];
  1005. strncpy(tags_buf, tags, CONFIG_MAX_VALUE);
  1006. char *str = tags_buf;
  1007. switch (*str) {
  1008. case '{':
  1009. str++;
  1010. strip_last_symbol(str, '}', SKIP_ESCAPED_CHARACTERS);
  1011. label_list = parse_simple_tags(label_list, str, ':', ',', STRIP_QUOTES, STRIP_QUOTES, SKIP_ESCAPED_CHARACTERS);
  1012. break;
  1013. case '[':
  1014. str++;
  1015. strip_last_symbol(str, ']', SKIP_ESCAPED_CHARACTERS);
  1016. char *end = str + strlen(str);
  1017. size_t i = 0;
  1018. while (str < end) {
  1019. char key[CONFIG_MAX_VALUE + 1];
  1020. snprintfz(key, CONFIG_MAX_VALUE, "host_tag%zu", i);
  1021. str = strip_double_quotes(trim(str), SKIP_ESCAPED_CHARACTERS);
  1022. label_list = add_label_to_list(label_list, key, str, LABEL_SOURCE_NETDATA_CONF);
  1023. // skip to the next element in the array
  1024. str += strlen(str) + 1;
  1025. while (*str && *str != ',')
  1026. str++;
  1027. str++;
  1028. i++;
  1029. }
  1030. break;
  1031. case '"':
  1032. label_list = add_label_to_list(
  1033. label_list, "host_tag", strip_double_quotes(str, SKIP_ESCAPED_CHARACTERS), LABEL_SOURCE_NETDATA_CONF);
  1034. break;
  1035. default:
  1036. label_list = add_label_to_list(label_list, "host_tag", str, LABEL_SOURCE_NETDATA_CONF);
  1037. break;
  1038. }
  1039. return label_list;
  1040. }
  1041. static struct label *rrdhost_load_kubernetes_labels(void)
  1042. {
  1043. struct label *l=NULL;
  1044. char *label_script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("get-kubernetes-labels.sh") + 2));
  1045. sprintf(label_script, "%s/%s", netdata_configured_primary_plugins_dir, "get-kubernetes-labels.sh");
  1046. if (unlikely(access(label_script, R_OK) != 0)) {
  1047. error("Kubernetes pod label fetching script %s not found.",label_script);
  1048. freez(label_script);
  1049. } else {
  1050. pid_t command_pid;
  1051. debug(D_RRDHOST, "Attempting to fetch external labels via %s", label_script);
  1052. FILE *fp = mypopen(label_script, &command_pid);
  1053. if(fp) {
  1054. int MAX_LINE_SIZE=300;
  1055. char buffer[MAX_LINE_SIZE + 1];
  1056. while (fgets(buffer, MAX_LINE_SIZE, fp) != NULL) {
  1057. char *name=buffer;
  1058. char *value=buffer;
  1059. while (*value && *value != ':') value++;
  1060. if (*value == ':') {
  1061. *value = '\0';
  1062. value++;
  1063. }
  1064. char *eos=value;
  1065. while (*eos && *eos != '\n') eos++;
  1066. if (*eos == '\n') *eos = '\0';
  1067. if (strlen(value)>0) {
  1068. if (is_valid_label_key(name)){
  1069. l = add_label_to_list(l, name, value, LABEL_SOURCE_KUBERNETES);
  1070. } else {
  1071. info("Ignoring invalid label name '%s'", name);
  1072. }
  1073. } else {
  1074. error("%s outputted unexpected result: '%s'", label_script, name);
  1075. }
  1076. };
  1077. // Non-zero exit code means that all the script output is error messages. We've shown already any message that didn't include a ':'
  1078. // Here we'll inform with an ERROR that the script failed, show whatever (if anything) was added to the list of labels, free the memory and set the return to null
  1079. int retcode=mypclose(fp, command_pid);
  1080. if (retcode) {
  1081. error("%s exited abnormally. No kubernetes labels will be added to the host.", label_script);
  1082. struct label *ll=l;
  1083. while (ll != NULL) {
  1084. info("Ignoring Label [source id=%s]: \"%s\" -> \"%s\"\n", translate_label_source(ll->label_source), ll->key, ll->value);
  1085. ll = ll->next;
  1086. freez(l);
  1087. l=ll;
  1088. }
  1089. }
  1090. }
  1091. freez(label_script);
  1092. }
  1093. return l;
  1094. }
  1095. void reload_host_labels(void)
  1096. {
  1097. struct label *from_auto = rrdhost_load_auto_labels();
  1098. struct label *from_k8s = rrdhost_load_kubernetes_labels();
  1099. struct label *from_config = rrdhost_load_config_labels();
  1100. struct label *new_labels = merge_label_lists(from_auto, from_k8s);
  1101. new_labels = merge_label_lists(new_labels, from_config);
  1102. rrdhost_rdlock(localhost);
  1103. replace_label_list(&localhost->labels, new_labels);
  1104. health_label_log_save(localhost);
  1105. rrdhost_unlock(localhost);
  1106. /* TODO-GAPS - fix this so that it looks properly at the state and version of the sender
  1107. if(localhost->rrdpush_send_enabled && localhost->rrdpush_sender_buffer){
  1108. localhost->labels.labels_flag |= LABEL_FLAG_UPDATE_STREAM;
  1109. rrdpush_send_labels(localhost);
  1110. }
  1111. */
  1112. health_reload();
  1113. }
  1114. // ----------------------------------------------------------------------------
  1115. // RRDHOST - delete host files
  1116. void rrdhost_delete_charts(RRDHOST *host) {
  1117. if(!host) return;
  1118. info("Deleting database of host '%s'...", host->hostname);
  1119. RRDSET *st;
  1120. // we get a write lock
  1121. // to ensure only one thread is saving the database
  1122. rrdhost_wrlock(host);
  1123. rrdset_foreach_write(st, host) {
  1124. rrdset_rdlock(st);
  1125. rrdset_delete(st);
  1126. rrdset_unlock(st);
  1127. }
  1128. recursively_delete_dir(host->cache_dir, "left over host");
  1129. rrdhost_unlock(host);
  1130. }
  1131. // ----------------------------------------------------------------------------
  1132. // RRDHOST - cleanup host files
  1133. void rrdhost_cleanup_charts(RRDHOST *host) {
  1134. if(!host) return;
  1135. info("Cleaning up database of host '%s'...", host->hostname);
  1136. RRDSET *st;
  1137. uint32_t rrdhost_delete_obsolete_charts = rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS);
  1138. // we get a write lock
  1139. // to ensure only one thread is saving the database
  1140. rrdhost_wrlock(host);
  1141. rrdset_foreach_write(st, host) {
  1142. rrdset_rdlock(st);
  1143. if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))
  1144. rrdset_delete(st);
  1145. else if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS))
  1146. rrdset_delete_obsolete_dimensions(st);
  1147. else
  1148. rrdset_save(st);
  1149. rrdset_unlock(st);
  1150. }
  1151. rrdhost_unlock(host);
  1152. }
  1153. // ----------------------------------------------------------------------------
  1154. // RRDHOST - save all hosts to disk
  1155. void rrdhost_save_all(void) {
  1156. info("Saving database [%zu hosts(s)]...", rrd_hosts_available);
  1157. rrd_rdlock();
  1158. RRDHOST *host;
  1159. rrdhost_foreach_read(host)
  1160. rrdhost_save_charts(host);
  1161. rrd_unlock();
  1162. }
  1163. // ----------------------------------------------------------------------------
  1164. // RRDHOST - save or delete all hosts from disk
  1165. void rrdhost_cleanup_all(void) {
  1166. info("Cleaning up database [%zu hosts(s)]...", rrd_hosts_available);
  1167. rrd_rdlock();
  1168. RRDHOST *host;
  1169. rrdhost_foreach_read(host) {
  1170. if (host != localhost && rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST) && !host->receiver
  1171. #ifdef ENABLE_DBENGINE
  1172. /* don't delete multi-host DB host files */
  1173. && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && host->rrdeng_ctx == &multidb_ctx)
  1174. #endif
  1175. )
  1176. rrdhost_delete_charts(host);
  1177. else
  1178. rrdhost_cleanup_charts(host);
  1179. }
  1180. rrd_unlock();
  1181. }
  1182. // ----------------------------------------------------------------------------
  1183. // RRDHOST - save or delete all the host charts from disk
  1184. void rrdhost_cleanup_obsolete_charts(RRDHOST *host) {
  1185. time_t now = now_realtime_sec();
  1186. RRDSET *st;
  1187. uint32_t rrdhost_delete_obsolete_charts = rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS);
  1188. restart_after_removal:
  1189. rrdset_foreach_write(st, host) {
  1190. if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)
  1191. && st->last_accessed_time + rrdset_free_obsolete_time < now
  1192. && st->last_updated.tv_sec + rrdset_free_obsolete_time < now
  1193. && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now
  1194. )) {
  1195. st->rrdhost->obsolete_charts_count--;
  1196. #ifdef ENABLE_DBENGINE
  1197. if(st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  1198. RRDDIM *rd, *last;
  1199. rrdset_flag_set(st, RRDSET_FLAG_ARCHIVED);
  1200. while (st->variables) rrdsetvar_free(st->variables);
  1201. while (st->alarms) rrdsetcalc_unlink(st->alarms);
  1202. rrdset_wrlock(st);
  1203. for (rd = st->dimensions, last = NULL ; likely(rd) ; ) {
  1204. if (rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) {
  1205. last = rd;
  1206. rd = rd->next;
  1207. continue;
  1208. }
  1209. if (rrddim_flag_check(rd, RRDDIM_FLAG_ACLK)) {
  1210. last = rd;
  1211. rd = rd->next;
  1212. continue;
  1213. }
  1214. rrddim_flag_set(rd, RRDDIM_FLAG_ARCHIVED);
  1215. while (rd->variables)
  1216. rrddimvar_free(rd->variables);
  1217. if (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) {
  1218. rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE);
  1219. /* only a collector can mark a chart as obsolete, so we must remove the reference */
  1220. uint8_t can_delete_metric = rd->state->collect_ops.finalize(rd);
  1221. if (can_delete_metric) {
  1222. /* This metric has no data and no references */
  1223. delete_dimension_uuid(&rd->state->metric_uuid);
  1224. rrddim_free(st, rd);
  1225. if (unlikely(!last)) {
  1226. rd = st->dimensions;
  1227. }
  1228. else {
  1229. rd = last->next;
  1230. }
  1231. continue;
  1232. }
  1233. #if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
  1234. else
  1235. queue_dimension_to_aclk(rd, rd->last_collected_time.tv_sec);
  1236. #endif
  1237. }
  1238. last = rd;
  1239. rd = rd->next;
  1240. }
  1241. rrdset_unlock(st);
  1242. debug(D_RRD_CALLS, "RRDSET: Cleaning up remaining chart variables for host '%s', chart '%s'", host->hostname, st->id);
  1243. rrdvar_free_remaining_variables(host, &st->rrdvar_root_index);
  1244. rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE);
  1245. if (st->dimensions) {
  1246. /* If the chart still has dimensions don't delete it from the metadata log */
  1247. continue;
  1248. }
  1249. }
  1250. #endif
  1251. rrdset_rdlock(st);
  1252. if(rrdhost_delete_obsolete_charts)
  1253. rrdset_delete(st);
  1254. else
  1255. rrdset_save(st);
  1256. rrdset_unlock(st);
  1257. rrdset_free(st);
  1258. goto restart_after_removal;
  1259. }
  1260. #if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
  1261. else
  1262. sql_check_chart_liveness(st);
  1263. #endif
  1264. }
  1265. }
  1266. void rrdset_check_obsoletion(RRDHOST *host)
  1267. {
  1268. RRDSET *st;
  1269. time_t last_entry_t;
  1270. rrdset_foreach_read(st, host) {
  1271. last_entry_t = rrdset_last_entry_t(st);
  1272. if (last_entry_t && last_entry_t < host->senders_connect_time) {
  1273. rrdset_is_obsolete(st);
  1274. }
  1275. }
  1276. }
  1277. void rrd_cleanup_obsolete_charts()
  1278. {
  1279. rrd_rdlock();
  1280. RRDHOST *host;
  1281. rrdhost_foreach_read(host)
  1282. {
  1283. if (host->obsolete_charts_count) {
  1284. rrdhost_wrlock(host);
  1285. #ifdef ENABLE_ACLK
  1286. host->deleted_charts_count = 0;
  1287. #endif
  1288. rrdhost_cleanup_obsolete_charts(host);
  1289. #ifdef ENABLE_ACLK
  1290. if (host->deleted_charts_count)
  1291. aclk_update_chart(host, "dummy-chart", 0);
  1292. #endif
  1293. rrdhost_unlock(host);
  1294. }
  1295. if (host != localhost &&
  1296. host->trigger_chart_obsoletion_check &&
  1297. host->senders_last_chart_command &&
  1298. host->senders_last_chart_command + 120 < now_realtime_sec()) {
  1299. rrdhost_rdlock(host);
  1300. rrdset_check_obsoletion(host);
  1301. rrdhost_unlock(host);
  1302. host->trigger_chart_obsoletion_check = 0;
  1303. }
  1304. }
  1305. rrd_unlock();
  1306. }
  1307. // ----------------------------------------------------------------------------
  1308. // RRDHOST - set system info from environment variables
  1309. // system_info fields must be heap allocated or NULL
  1310. int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value) {
  1311. int res = 0;
  1312. if (!strcmp(name, "NETDATA_PROTOCOL_VERSION"))
  1313. return res;
  1314. else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_TYPE")){
  1315. freez(system_info->cloud_provider_type);
  1316. system_info->cloud_provider_type = strdupz(value);
  1317. }
  1318. else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE")){
  1319. freez(system_info->cloud_instance_type);
  1320. system_info->cloud_instance_type = strdupz(value);
  1321. }
  1322. else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_INSTANCE_REGION")){
  1323. freez(system_info->cloud_instance_region);
  1324. system_info->cloud_instance_region = strdupz(value);
  1325. }
  1326. else if(!strcmp(name, "NETDATA_CONTAINER_OS_NAME")){
  1327. freez(system_info->container_os_name);
  1328. system_info->container_os_name = strdupz(value);
  1329. }
  1330. else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID")){
  1331. freez(system_info->container_os_id);
  1332. system_info->container_os_id = strdupz(value);
  1333. }
  1334. else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID_LIKE")){
  1335. freez(system_info->container_os_id_like);
  1336. system_info->container_os_id_like = strdupz(value);
  1337. }
  1338. else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION")){
  1339. freez(system_info->container_os_version);
  1340. system_info->container_os_version = strdupz(value);
  1341. }
  1342. else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION_ID")){
  1343. freez(system_info->container_os_version_id);
  1344. system_info->container_os_version_id = strdupz(value);
  1345. }
  1346. else if(!strcmp(name, "NETDATA_CONTAINER_OS_DETECTION")){
  1347. freez(system_info->container_os_detection);
  1348. system_info->container_os_detection = strdupz(value);
  1349. }
  1350. else if(!strcmp(name, "NETDATA_HOST_OS_NAME")){
  1351. freez(system_info->host_os_name);
  1352. system_info->host_os_name = strdupz(value);
  1353. json_fix_string(system_info->host_os_name);
  1354. }
  1355. else if(!strcmp(name, "NETDATA_HOST_OS_ID")){
  1356. freez(system_info->host_os_id);
  1357. system_info->host_os_id = strdupz(value);
  1358. }
  1359. else if(!strcmp(name, "NETDATA_HOST_OS_ID_LIKE")){
  1360. freez(system_info->host_os_id_like);
  1361. system_info->host_os_id_like = strdupz(value);
  1362. }
  1363. else if(!strcmp(name, "NETDATA_HOST_OS_VERSION")){
  1364. freez(system_info->host_os_version);
  1365. system_info->host_os_version = strdupz(value);
  1366. }
  1367. else if(!strcmp(name, "NETDATA_HOST_OS_VERSION_ID")){
  1368. freez(system_info->host_os_version_id);
  1369. system_info->host_os_version_id = strdupz(value);
  1370. }
  1371. else if(!strcmp(name, "NETDATA_HOST_OS_DETECTION")){
  1372. freez(system_info->host_os_detection);
  1373. system_info->host_os_detection = strdupz(value);
  1374. }
  1375. else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_NAME")){
  1376. freez(system_info->kernel_name);
  1377. system_info->kernel_name = strdupz(value);
  1378. }
  1379. else if(!strcmp(name, "NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT")){
  1380. freez(system_info->host_cores);
  1381. system_info->host_cores = strdupz(value);
  1382. }
  1383. else if(!strcmp(name, "NETDATA_SYSTEM_CPU_FREQ")){
  1384. freez(system_info->host_cpu_freq);
  1385. system_info->host_cpu_freq = strdupz(value);
  1386. }
  1387. else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_RAM")){
  1388. freez(system_info->host_ram_total);
  1389. system_info->host_ram_total = strdupz(value);
  1390. }
  1391. else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_DISK_SIZE")){
  1392. freez(system_info->host_disk_space);
  1393. system_info->host_disk_space = strdupz(value);
  1394. }
  1395. else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_VERSION")){
  1396. freez(system_info->kernel_version);
  1397. system_info->kernel_version = strdupz(value);
  1398. }
  1399. else if(!strcmp(name, "NETDATA_SYSTEM_ARCHITECTURE")){
  1400. freez(system_info->architecture);
  1401. system_info->architecture = strdupz(value);
  1402. }
  1403. else if(!strcmp(name, "NETDATA_SYSTEM_VIRTUALIZATION")){
  1404. freez(system_info->virtualization);
  1405. system_info->virtualization = strdupz(value);
  1406. }
  1407. else if(!strcmp(name, "NETDATA_SYSTEM_VIRT_DETECTION")){
  1408. freez(system_info->virt_detection);
  1409. system_info->virt_detection = strdupz(value);
  1410. }
  1411. else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER")){
  1412. freez(system_info->container);
  1413. system_info->container = strdupz(value);
  1414. }
  1415. else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER_DETECTION")){
  1416. freez(system_info->container_detection);
  1417. system_info->container_detection = strdupz(value);
  1418. }
  1419. else if(!strcmp(name, "NETDATA_HOST_IS_K8S_NODE")){
  1420. freez(system_info->is_k8s_node);
  1421. system_info->is_k8s_node = strdupz(value);
  1422. }
  1423. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_VENDOR"))
  1424. return res;
  1425. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_MODEL"))
  1426. return res;
  1427. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_DETECTION"))
  1428. return res;
  1429. else if (!strcmp(name, "NETDATA_SYSTEM_RAM_DETECTION"))
  1430. return res;
  1431. else if (!strcmp(name, "NETDATA_SYSTEM_DISK_DETECTION"))
  1432. return res;
  1433. else if (!strcmp(name, "NETDATA_CONTAINER_IS_OFFICIAL_IMAGE"))
  1434. return res;
  1435. else {
  1436. res = 1;
  1437. }
  1438. return res;
  1439. }
  1440. /**
  1441. * Alarm Compare ID
  1442. *
  1443. * Callback function used with the binary trees to compare the id of RRDCALC
  1444. *
  1445. * @param a a pointer to the RRDCAL item to insert,compare or update the binary tree
  1446. * @param b the pointer to the binary tree.
  1447. *
  1448. * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b.
  1449. */
  1450. int alarm_compare_id(void *a, void *b) {
  1451. register uint32_t hash1 = ((RRDCALC *)a)->id;
  1452. register uint32_t hash2 = ((RRDCALC *)b)->id;
  1453. if(hash1 < hash2) return -1;
  1454. else if(hash1 > hash2) return 1;
  1455. return 0;
  1456. }
  1457. /**
  1458. * Alarm Compare NAME
  1459. *
  1460. * Callback function used with the binary trees to compare the name of RRDCALC
  1461. *
  1462. * @param a a pointer to the RRDCAL item to insert,compare or update the binary tree
  1463. * @param b the pointer to the binary tree.
  1464. *
  1465. * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b.
  1466. */
  1467. int alarm_compare_name(void *a, void *b) {
  1468. RRDCALC *in1 = (RRDCALC *)a;
  1469. RRDCALC *in2 = (RRDCALC *)b;
  1470. if(in1->hash < in2->hash) return -1;
  1471. else if(in1->hash > in2->hash) return 1;
  1472. return strcmp(in1->name,in2->name);
  1473. }
  1474. // Added for gap-filling, if this proves to be a bottleneck in large-scale systems then we will need to cache
  1475. // the last entry times as the metric updates, but let's see if it is a problem first.
  1476. time_t rrdhost_last_entry_t(RRDHOST *h) {
  1477. rrdhost_rdlock(h);
  1478. RRDSET *st;
  1479. time_t result = 0;
  1480. rrdset_foreach_read(st, h) {
  1481. time_t st_last = rrdset_last_entry_t(st);
  1482. if (st_last > result)
  1483. result = st_last;
  1484. }
  1485. rrdhost_unlock(h);
  1486. return result;
  1487. }