rrdhost.c 75 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #define NETDATA_RRD_INTERNALS
  3. #include "rrd.h"
  4. static void rrdhost_streaming_sender_structures_init(RRDHOST *host);
  5. bool dbengine_enabled = false; // will become true if and when dbengine is initialized
  6. size_t storage_tiers = 3;
  7. bool use_direct_io = true;
  8. size_t storage_tiers_grouping_iterations[RRD_STORAGE_TIERS] = { 1, 60, 60, 60, 60 };
  9. RRD_BACKFILL storage_tiers_backfill[RRD_STORAGE_TIERS] = { RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW };
  10. #if RRD_STORAGE_TIERS != 5
  11. #error RRD_STORAGE_TIERS is not 5 - you need to update the grouping iterations per tier
  12. #endif
  13. size_t get_tier_grouping(size_t tier) {
  14. if(unlikely(tier >= storage_tiers)) tier = storage_tiers - 1;
  15. size_t grouping = 1;
  16. // first tier is always 1 iteration of whatever update every the chart has
  17. for(size_t i = 1; i <= tier ;i++)
  18. grouping *= storage_tiers_grouping_iterations[i];
  19. return grouping;
  20. }
  21. RRDHOST *localhost = NULL;
  22. netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER;
  23. time_t rrdset_free_obsolete_time_s = 3600;
  24. time_t rrdhost_free_orphan_time_s = 3600;
  25. time_t rrdhost_free_ephemeral_time_s = 86400;
  26. bool is_storage_engine_shared(STORAGE_INSTANCE *engine __maybe_unused) {
  27. #ifdef ENABLE_DBENGINE
  28. if(!rrdeng_is_legacy(engine))
  29. return true;
  30. #endif
  31. return false;
  32. }
  33. RRDHOST *find_host_by_node_id(char *node_id) {
  34. uuid_t node_uuid;
  35. if (unlikely(!node_id || uuid_parse(node_id, node_uuid)))
  36. return NULL;
  37. RRDHOST *host, *ret = NULL;
  38. dfe_start_read(rrdhost_root_index, host) {
  39. if (host->node_id && !(uuid_memcmp(host->node_id, &node_uuid))) {
  40. ret = host;
  41. break;
  42. }
  43. }
  44. dfe_done(host);
  45. return ret;
  46. }
  47. // ----------------------------------------------------------------------------
  48. // RRDHOST indexes management
  49. DICTIONARY *rrdhost_root_index = NULL;
  50. static DICTIONARY *rrdhost_root_index_hostname = NULL;
  51. static inline void rrdhost_init() {
  52. if(unlikely(!rrdhost_root_index)) {
  53. rrdhost_root_index = dictionary_create_advanced(
  54. DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE,
  55. &dictionary_stats_category_rrdhost, 0);
  56. }
  57. if(unlikely(!rrdhost_root_index_hostname)) {
  58. rrdhost_root_index_hostname = dictionary_create_advanced(
  59. DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE,
  60. &dictionary_stats_category_rrdhost, 0);
  61. }
  62. }
  63. RRDHOST_ACQUIRED *rrdhost_find_and_acquire(const char *machine_guid) {
  64. return (RRDHOST_ACQUIRED *)dictionary_get_and_acquire_item(rrdhost_root_index, machine_guid);
  65. }
  66. RRDHOST *rrdhost_acquired_to_rrdhost(RRDHOST_ACQUIRED *rha) {
  67. if(unlikely(!rha))
  68. return NULL;
  69. return (RRDHOST *) dictionary_acquired_item_value((const DICTIONARY_ITEM *)rha);
  70. }
  71. void rrdhost_acquired_release(RRDHOST_ACQUIRED *rha) {
  72. if(unlikely(!rha))
  73. return;
  74. dictionary_acquired_item_release(rrdhost_root_index, (const DICTIONARY_ITEM *)rha);
  75. }
  76. // ----------------------------------------------------------------------------
  77. // RRDHOST index by UUID
  78. inline size_t rrdhost_hosts_available(void) {
  79. return dictionary_entries(rrdhost_root_index);
  80. }
  81. inline RRDHOST *rrdhost_find_by_guid(const char *guid) {
  82. return dictionary_get(rrdhost_root_index, guid);
  83. }
  84. static inline RRDHOST *rrdhost_index_add_by_guid(RRDHOST *host) {
  85. RRDHOST *ret_machine_guid = dictionary_set(rrdhost_root_index, host->machine_guid, host, sizeof(RRDHOST));
  86. if(ret_machine_guid == host)
  87. rrdhost_option_set(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID);
  88. else {
  89. rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID);
  90. nd_log(NDLS_DAEMON, NDLP_NOTICE,
  91. "RRDHOST: host with machine guid '%s' is already indexed. Not adding it again.",
  92. host->machine_guid);
  93. }
  94. return host;
  95. }
  96. static void rrdhost_index_del_by_guid(RRDHOST *host) {
  97. if(rrdhost_option_check(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID)) {
  98. if(!dictionary_del(rrdhost_root_index, host->machine_guid))
  99. nd_log(NDLS_DAEMON, NDLP_NOTICE,
  100. "RRDHOST: failed to delete machine guid '%s' from index",
  101. host->machine_guid);
  102. rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID);
  103. }
  104. }
  105. // ----------------------------------------------------------------------------
  106. // RRDHOST index by hostname
  107. inline RRDHOST *rrdhost_find_by_hostname(const char *hostname) {
  108. if(unlikely(!strcmp(hostname, "localhost")))
  109. return localhost;
  110. return dictionary_get(rrdhost_root_index_hostname, hostname);
  111. }
  112. static inline void rrdhost_index_del_hostname(RRDHOST *host) {
  113. if(unlikely(!host->hostname)) return;
  114. if(rrdhost_option_check(host, RRDHOST_OPTION_INDEXED_HOSTNAME)) {
  115. if(!dictionary_del(rrdhost_root_index_hostname, rrdhost_hostname(host)))
  116. nd_log(NDLS_DAEMON, NDLP_NOTICE,
  117. "RRDHOST: failed to delete hostname '%s' from index",
  118. rrdhost_hostname(host));
  119. rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_HOSTNAME);
  120. }
  121. }
  122. static inline RRDHOST *rrdhost_index_add_hostname(RRDHOST *host) {
  123. if(!host->hostname) return host;
  124. RRDHOST *ret_hostname = dictionary_set(rrdhost_root_index_hostname, rrdhost_hostname(host), host, sizeof(RRDHOST));
  125. if(ret_hostname == host)
  126. rrdhost_option_set(host, RRDHOST_OPTION_INDEXED_HOSTNAME);
  127. else {
  128. //have the same hostname but it's not the same host
  129. //keep the new one only if the old one is orphan or archived
  130. if (rrdhost_flag_check(ret_hostname, RRDHOST_FLAG_ORPHAN) || rrdhost_flag_check(ret_hostname, RRDHOST_FLAG_ARCHIVED)) {
  131. rrdhost_index_del_hostname(ret_hostname);
  132. rrdhost_index_add_hostname(host);
  133. }
  134. }
  135. return host;
  136. }
  137. // ----------------------------------------------------------------------------
  138. // RRDHOST - internal helpers
  139. static inline void rrdhost_init_tags(RRDHOST *host, const char *tags) {
  140. if(host->tags && tags && !strcmp(rrdhost_tags(host), tags))
  141. return;
  142. STRING *old = host->tags;
  143. host->tags = string_strdupz((tags && *tags)?tags:NULL);
  144. string_freez(old);
  145. }
  146. static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname, bool add_to_index) {
  147. if(unlikely(hostname && !*hostname)) hostname = NULL;
  148. if(host->hostname && hostname && !strcmp(rrdhost_hostname(host), hostname))
  149. return;
  150. rrdhost_index_del_hostname(host);
  151. STRING *old = host->hostname;
  152. host->hostname = string_strdupz(hostname?hostname:"localhost");
  153. string_freez(old);
  154. if(add_to_index)
  155. rrdhost_index_add_hostname(host);
  156. }
  157. static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
  158. if(host->os && os && !strcmp(rrdhost_os(host), os))
  159. return;
  160. STRING *old = host->os;
  161. host->os = string_strdupz(os?os:"unknown");
  162. string_freez(old);
  163. }
  164. static inline void rrdhost_init_timezone(RRDHOST *host, const char *timezone, const char *abbrev_timezone, int32_t utc_offset) {
  165. if (host->timezone && timezone && !strcmp(rrdhost_timezone(host), timezone) && host->abbrev_timezone && abbrev_timezone &&
  166. !strcmp(rrdhost_abbrev_timezone(host), abbrev_timezone) && host->utc_offset == utc_offset)
  167. return;
  168. STRING *old = host->timezone;
  169. host->timezone = string_strdupz((timezone && *timezone)?timezone:"unknown");
  170. string_freez(old);
  171. old = (void *)host->abbrev_timezone;
  172. host->abbrev_timezone = string_strdupz((abbrev_timezone && *abbrev_timezone) ? abbrev_timezone : "UTC");
  173. string_freez(old);
  174. host->utc_offset = utc_offset;
  175. }
  176. void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode,
  177. const char *registry_hostname, const char *os, const char *tags,
  178. const char *tzone, const char *abbrev_tzone, int32_t utc_offset, const char *program_name,
  179. const char *program_version)
  180. {
  181. host->rrd_update_every = update_every;
  182. host->rrd_memory_mode = memory_mode;
  183. rrdhost_init_os(host, os);
  184. rrdhost_init_timezone(host, tzone, abbrev_tzone, utc_offset);
  185. rrdhost_init_tags(host, tags);
  186. host->program_name = string_strdupz((program_name && *program_name) ? program_name : "unknown");
  187. host->program_version = string_strdupz((program_version && *program_version) ? program_version : "unknown");
  188. host->registry_hostname = string_strdupz((registry_hostname && *registry_hostname) ? registry_hostname : rrdhost_hostname(host));
  189. }
  190. // ----------------------------------------------------------------------------
  191. // RRDHOST - add a host
  192. static void rrdhost_initialize_rrdpush_sender(RRDHOST *host,
  193. unsigned int rrdpush_enabled,
  194. char *rrdpush_destination,
  195. char *rrdpush_api_key,
  196. char *rrdpush_send_charts_matching
  197. ) {
  198. if(rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED)) return;
  199. if(rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) {
  200. rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED);
  201. rrdhost_streaming_sender_structures_init(host);
  202. #ifdef ENABLE_HTTPS
  203. host->sender->ssl = NETDATA_SSL_UNSET_CONNECTION;
  204. #endif
  205. host->rrdpush_send_destination = strdupz(rrdpush_destination);
  206. rrdpush_destinations_init(host);
  207. host->rrdpush_send_api_key = strdupz(rrdpush_api_key);
  208. host->rrdpush_send_charts_matching = simple_pattern_create(rrdpush_send_charts_matching, NULL,
  209. SIMPLE_PATTERN_EXACT, true);
  210. rrdhost_option_set(host, RRDHOST_OPTION_SENDER_ENABLED);
  211. }
  212. else
  213. rrdhost_option_clear(host, RRDHOST_OPTION_SENDER_ENABLED);
  214. }
  215. static RRDHOST *rrdhost_create(
  216. const char *hostname,
  217. const char *registry_hostname,
  218. const char *guid,
  219. const char *os,
  220. const char *timezone,
  221. const char *abbrev_timezone,
  222. int32_t utc_offset,
  223. const char *tags,
  224. const char *program_name,
  225. const char *program_version,
  226. int update_every,
  227. long entries,
  228. RRD_MEMORY_MODE memory_mode,
  229. unsigned int health_enabled,
  230. unsigned int rrdpush_enabled,
  231. char *rrdpush_destination,
  232. char *rrdpush_api_key,
  233. char *rrdpush_send_charts_matching,
  234. bool rrdpush_enable_replication,
  235. time_t rrdpush_seconds_to_replicate,
  236. time_t rrdpush_replication_step,
  237. struct rrdhost_system_info *system_info,
  238. int is_localhost,
  239. bool archived
  240. ) {
  241. if(memory_mode == RRD_MEMORY_MODE_DBENGINE && !dbengine_enabled) {
  242. nd_log(NDLS_DAEMON, NDLP_ERR,
  243. "memory mode 'dbengine' is not enabled, but host '%s' is configured for it. Falling back to 'alloc'",
  244. hostname);
  245. memory_mode = RRD_MEMORY_MODE_ALLOC;
  246. }
  247. #ifdef ENABLE_DBENGINE
  248. int is_legacy = (memory_mode == RRD_MEMORY_MODE_DBENGINE) && is_legacy_child(guid);
  249. #else
  250. int is_legacy = 1;
  251. #endif
  252. int is_in_multihost = (memory_mode == RRD_MEMORY_MODE_DBENGINE && !is_legacy);
  253. RRDHOST *host = callocz(1, sizeof(RRDHOST));
  254. __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(RRDHOST), __ATOMIC_RELAXED);
  255. strncpyz(host->machine_guid, guid, GUID_LEN + 1);
  256. set_host_properties(host, (update_every > 0)?update_every:1, memory_mode, registry_hostname, os,
  257. tags, timezone, abbrev_timezone, utc_offset, program_name, program_version);
  258. rrdhost_init_hostname(host, hostname, false);
  259. host->rrd_history_entries = align_entries_to_pagesize(memory_mode, entries);
  260. host->health.health_enabled = ((memory_mode == RRD_MEMORY_MODE_NONE)) ? 0 : health_enabled;
  261. netdata_mutex_init(&host->aclk_state_lock);
  262. netdata_mutex_init(&host->receiver_lock);
  263. if (likely(!archived)) {
  264. rrdfunctions_host_init(host);
  265. host->last_connected = now_realtime_sec();
  266. host->rrdlabels = rrdlabels_create();
  267. rrdhost_initialize_rrdpush_sender(
  268. host, rrdpush_enabled, rrdpush_destination, rrdpush_api_key, rrdpush_send_charts_matching);
  269. }
  270. if(rrdpush_enable_replication)
  271. rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION);
  272. else
  273. rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION);
  274. host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate;
  275. host->rrdpush_replication_step = rrdpush_replication_step;
  276. host->rrdpush_receiver_replication_percent = 100.0;
  277. switch(memory_mode) {
  278. default:
  279. case RRD_MEMORY_MODE_ALLOC:
  280. case RRD_MEMORY_MODE_MAP:
  281. case RRD_MEMORY_MODE_SAVE:
  282. case RRD_MEMORY_MODE_RAM:
  283. if(host->rrdpush_seconds_to_replicate > (time_t) host->rrd_history_entries * (time_t) host->rrd_update_every)
  284. host->rrdpush_seconds_to_replicate = (time_t) host->rrd_history_entries * (time_t) host->rrd_update_every;
  285. break;
  286. case RRD_MEMORY_MODE_DBENGINE:
  287. break;
  288. }
  289. host->system_info = system_info;
  290. rrdset_index_init(host);
  291. if(config_get_boolean(CONFIG_SECTION_DB, "delete obsolete charts files", 1))
  292. rrdhost_option_set(host, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS);
  293. if(config_get_boolean(CONFIG_SECTION_DB, "delete orphan hosts files", 1) && !is_localhost)
  294. rrdhost_option_set(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST);
  295. char filename[FILENAME_MAX + 1];
  296. if(is_localhost)
  297. host->cache_dir = strdupz(netdata_configured_cache_dir);
  298. else {
  299. // this is not localhost - append our GUID to localhost path
  300. if (is_in_multihost) { // don't append to cache dir in multihost
  301. host->cache_dir = strdupz(netdata_configured_cache_dir);
  302. }
  303. else {
  304. snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid);
  305. host->cache_dir = strdupz(filename);
  306. }
  307. if((host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE ||
  308. (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_legacy))) {
  309. int r = mkdir(host->cache_dir, 0775);
  310. if(r != 0 && errno != EEXIST)
  311. nd_log(NDLS_DAEMON, NDLP_CRIT,
  312. "Host '%s': cannot create directory '%s'",
  313. rrdhost_hostname(host), host->cache_dir);
  314. }
  315. }
  316. // this is also needed for custom host variables - not only health
  317. if(!host->rrdvars)
  318. host->rrdvars = rrdvariables_create();
  319. if (likely(!uuid_parse(host->machine_guid, host->host_uuid)))
  320. sql_load_node_id(host);
  321. else
  322. error_report("Host machine GUID %s is not valid", host->machine_guid);
  323. rrdfamily_index_init(host);
  324. rrdcalctemplate_index_init(host);
  325. rrdcalc_rrdhost_index_init(host);
  326. if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  327. #ifdef ENABLE_DBENGINE
  328. char dbenginepath[FILENAME_MAX + 1];
  329. int ret;
  330. snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", host->cache_dir);
  331. ret = mkdir(dbenginepath, 0775);
  332. if (ret != 0 && errno != EEXIST)
  333. nd_log(NDLS_DAEMON, NDLP_CRIT,
  334. "Host '%s': cannot create directory '%s'",
  335. rrdhost_hostname(host), dbenginepath);
  336. else
  337. ret = 0; // succeed
  338. if (is_legacy) {
  339. // initialize legacy dbengine instance as needed
  340. host->db[0].mode = RRD_MEMORY_MODE_DBENGINE;
  341. host->db[0].eng = storage_engine_get(host->db[0].mode);
  342. host->db[0].tier_grouping = get_tier_grouping(0);
  343. ret = rrdeng_init(
  344. (struct rrdengine_instance **)&host->db[0].instance,
  345. dbenginepath,
  346. default_rrdeng_disk_quota_mb,
  347. 0); // may fail here for legacy dbengine initialization
  348. if(ret == 0) {
  349. rrdeng_readiness_wait((struct rrdengine_instance *)host->db[0].instance);
  350. // assign the rest of the shared storage instances to it
  351. // to allow them collect its metrics too
  352. for(size_t tier = 1; tier < storage_tiers ; tier++) {
  353. host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE;
  354. host->db[tier].eng = storage_engine_get(host->db[tier].mode);
  355. host->db[tier].instance = (STORAGE_INSTANCE *) multidb_ctx[tier];
  356. host->db[tier].tier_grouping = get_tier_grouping(tier);
  357. }
  358. }
  359. }
  360. else {
  361. for(size_t tier = 0; tier < storage_tiers ; tier++) {
  362. host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE;
  363. host->db[tier].eng = storage_engine_get(host->db[tier].mode);
  364. host->db[tier].instance = (STORAGE_INSTANCE *)multidb_ctx[tier];
  365. host->db[tier].tier_grouping = get_tier_grouping(tier);
  366. }
  367. }
  368. if (ret) { // check legacy or multihost initialization success
  369. nd_log(NDLS_DAEMON, NDLP_CRIT,
  370. "Host '%s': cannot initialize host with machine guid '%s'. Failed to initialize DB engine at '%s'.",
  371. rrdhost_hostname(host), host->machine_guid, host->cache_dir);
  372. rrd_wrlock();
  373. rrdhost_free___while_having_rrd_wrlock(host, true);
  374. rrd_unlock();
  375. return NULL;
  376. }
  377. #else
  378. fatal("RRD_MEMORY_MODE_DBENGINE is not supported in this platform.");
  379. #endif
  380. }
  381. else {
  382. host->db[0].mode = host->rrd_memory_mode;
  383. host->db[0].eng = storage_engine_get(host->db[0].mode);
  384. host->db[0].instance = NULL;
  385. host->db[0].tier_grouping = get_tier_grouping(0);
  386. #ifdef ENABLE_DBENGINE
  387. // the first tier is reserved for the non-dbengine modes
  388. for(size_t tier = 1; tier < storage_tiers ; tier++) {
  389. host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE;
  390. host->db[tier].eng = storage_engine_get(host->db[tier].mode);
  391. host->db[tier].instance = (STORAGE_INSTANCE *) multidb_ctx[tier];
  392. host->db[tier].tier_grouping = get_tier_grouping(tier);
  393. }
  394. #endif
  395. }
  396. // ------------------------------------------------------------------------
  397. // init new ML host and update system_info to let upstreams know
  398. // about ML functionality
  399. //
  400. if (is_localhost && host->system_info) {
  401. host->system_info->ml_capable = ml_capable();
  402. host->system_info->ml_enabled = ml_enabled(host);
  403. host->system_info->mc_version = enable_metric_correlations ? metric_correlations_version : 0;
  404. }
  405. // ------------------------------------------------------------------------
  406. // link it and add it to the index
  407. rrd_wrlock();
  408. RRDHOST *t = rrdhost_index_add_by_guid(host);
  409. if(t != host) {
  410. nd_log(NDLS_DAEMON, NDLP_NOTICE,
  411. "Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.",
  412. rrdhost_hostname(host), host->machine_guid, rrdhost_hostname(t), t->machine_guid);
  413. if (!is_localhost)
  414. rrdhost_free___while_having_rrd_wrlock(host, true);
  415. rrd_unlock();
  416. return NULL;
  417. }
  418. rrdhost_index_add_hostname(host);
  419. if(is_localhost)
  420. DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(localhost, host, prev, next);
  421. else
  422. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(localhost, host, prev, next);
  423. rrd_unlock();
  424. // ------------------------------------------------------------------------
  425. nd_log(NDLS_DAEMON, NDLP_INFO,
  426. "Host '%s' (at registry as '%s') with guid '%s' initialized"
  427. ", os '%s'"
  428. ", timezone '%s'"
  429. ", tags '%s'"
  430. ", program_name '%s'"
  431. ", program_version '%s'"
  432. ", update every %d"
  433. ", memory mode %s"
  434. ", history entries %d"
  435. ", streaming %s"
  436. " (to '%s' with api key '%s')"
  437. ", health %s"
  438. ", cache_dir '%s'"
  439. ", alarms default handler '%s'"
  440. ", alarms default recipient '%s'"
  441. , rrdhost_hostname(host)
  442. , rrdhost_registry_hostname(host)
  443. , host->machine_guid
  444. , rrdhost_os(host)
  445. , rrdhost_timezone(host)
  446. , rrdhost_tags(host)
  447. , rrdhost_program_name(host)
  448. , rrdhost_program_version(host)
  449. , host->rrd_update_every
  450. , rrd_memory_mode_name(host->rrd_memory_mode)
  451. , host->rrd_history_entries
  452. , rrdhost_has_rrdpush_sender_enabled(host)?"enabled":"disabled"
  453. , host->rrdpush_send_destination?host->rrdpush_send_destination:""
  454. , host->rrdpush_send_api_key?host->rrdpush_send_api_key:""
  455. , host->health.health_enabled?"enabled":"disabled"
  456. , host->cache_dir
  457. , string2str(host->health.health_default_exec)
  458. , string2str(host->health.health_default_recipient)
  459. );
  460. host->configurable_plugins = dyncfg_dictionary_create();
  461. dictionary_register_delete_callback(host->configurable_plugins, plugin_del_cb, NULL);
  462. if(!archived) {
  463. metaqueue_host_update_info(host);
  464. rrdhost_load_rrdcontext_data(host);
  465. // rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_INFO | RRDHOST_FLAG_METADATA_UPDATE);
  466. ml_host_new(host);
  467. } else
  468. rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD | RRDHOST_FLAG_ARCHIVED | RRDHOST_FLAG_ORPHAN);
  469. return host;
  470. }
  471. static void rrdhost_update(RRDHOST *host
  472. , const char *hostname
  473. , const char *registry_hostname
  474. , const char *guid
  475. , const char *os
  476. , const char *timezone
  477. , const char *abbrev_timezone
  478. , int32_t utc_offset
  479. , const char *tags
  480. , const char *program_name
  481. , const char *program_version
  482. , int update_every
  483. , long history
  484. , RRD_MEMORY_MODE mode
  485. , unsigned int health_enabled
  486. , unsigned int rrdpush_enabled
  487. , char *rrdpush_destination
  488. , char *rrdpush_api_key
  489. , char *rrdpush_send_charts_matching
  490. , bool rrdpush_enable_replication
  491. , time_t rrdpush_seconds_to_replicate
  492. , time_t rrdpush_replication_step
  493. , struct rrdhost_system_info *system_info
  494. )
  495. {
  496. UNUSED(guid);
  497. spinlock_lock(&host->rrdhost_update_lock);
  498. host->health.health_enabled = (mode == RRD_MEMORY_MODE_NONE) ? 0 : health_enabled;
  499. {
  500. struct rrdhost_system_info *old = host->system_info;
  501. host->system_info = system_info;
  502. rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_INFO | RRDHOST_FLAG_METADATA_CLAIMID | RRDHOST_FLAG_METADATA_UPDATE);
  503. rrdhost_system_info_free(old);
  504. }
  505. rrdhost_init_os(host, os);
  506. rrdhost_init_timezone(host, timezone, abbrev_timezone, utc_offset);
  507. string_freez(host->registry_hostname);
  508. host->registry_hostname = string_strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname);
  509. if(strcmp(rrdhost_hostname(host), hostname) != 0) {
  510. nd_log(NDLS_DAEMON, NDLP_WARNING,
  511. "Host '%s' has been renamed to '%s'. If this is not intentional it may mean multiple hosts are using the same machine_guid.",
  512. rrdhost_hostname(host), hostname);
  513. rrdhost_init_hostname(host, hostname, true);
  514. } else {
  515. rrdhost_index_add_hostname(host);
  516. }
  517. if(strcmp(rrdhost_program_name(host), program_name) != 0) {
  518. nd_log(NDLS_DAEMON, NDLP_NOTICE,
  519. "Host '%s' switched program name from '%s' to '%s'",
  520. rrdhost_hostname(host), rrdhost_program_name(host), program_name);
  521. STRING *t = host->program_name;
  522. host->program_name = string_strdupz(program_name);
  523. string_freez(t);
  524. }
  525. if(strcmp(rrdhost_program_version(host), program_version) != 0) {
  526. nd_log(NDLS_DAEMON, NDLP_NOTICE,
  527. "Host '%s' switched program version from '%s' to '%s'",
  528. rrdhost_hostname(host), rrdhost_program_version(host), program_version);
  529. STRING *t = host->program_version;
  530. host->program_version = string_strdupz(program_version);
  531. string_freez(t);
  532. }
  533. if(host->rrd_update_every != update_every)
  534. nd_log(NDLS_DAEMON, NDLP_WARNING,
  535. "Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds. "
  536. "Restart netdata here to apply the new settings.",
  537. rrdhost_hostname(host), host->rrd_update_every, update_every);
  538. if(host->rrd_memory_mode != mode)
  539. nd_log(NDLS_DAEMON, NDLP_WARNING,
  540. "Host '%s' has memory mode '%s', but the wanted one is '%s'. "
  541. "Restart netdata here to apply the new settings.",
  542. rrdhost_hostname(host),
  543. rrd_memory_mode_name(host->rrd_memory_mode),
  544. rrd_memory_mode_name(mode));
  545. else if(host->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && host->rrd_history_entries < history)
  546. nd_log(NDLS_DAEMON, NDLP_WARNING,
  547. "Host '%s' has history of %d entries, but the wanted one is %ld entries. "
  548. "Restart netdata here to apply the new settings.",
  549. rrdhost_hostname(host),
  550. host->rrd_history_entries,
  551. history);
  552. // update host tags
  553. rrdhost_init_tags(host, tags);
  554. if(!host->rrdvars)
  555. host->rrdvars = rrdvariables_create();
  556. host->last_connected = now_realtime_sec();
  557. if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) {
  558. rrdhost_flag_clear(host, RRDHOST_FLAG_ARCHIVED);
  559. rrdfunctions_host_init(host);
  560. if(!host->rrdlabels)
  561. host->rrdlabels = rrdlabels_create();
  562. if (!host->rrdset_root_index)
  563. rrdset_index_init(host);
  564. rrdhost_initialize_rrdpush_sender(host,
  565. rrdpush_enabled,
  566. rrdpush_destination,
  567. rrdpush_api_key,
  568. rrdpush_send_charts_matching);
  569. rrdfamily_index_init(host);
  570. rrdcalctemplate_index_init(host);
  571. rrdcalc_rrdhost_index_init(host);
  572. if(rrdpush_enable_replication)
  573. rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION);
  574. else
  575. rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION);
  576. host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate;
  577. host->rrdpush_replication_step = rrdpush_replication_step;
  578. ml_host_new(host);
  579. rrdhost_load_rrdcontext_data(host);
  580. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  581. "Host %s is not in archived mode anymore",
  582. rrdhost_hostname(host));
  583. }
  584. spinlock_unlock(&host->rrdhost_update_lock);
  585. }
  586. RRDHOST *rrdhost_find_or_create(
  587. const char *hostname
  588. , const char *registry_hostname
  589. , const char *guid
  590. , const char *os
  591. , const char *timezone
  592. , const char *abbrev_timezone
  593. , int32_t utc_offset
  594. , const char *tags
  595. , const char *program_name
  596. , const char *program_version
  597. , int update_every
  598. , long history
  599. , RRD_MEMORY_MODE mode
  600. , unsigned int health_enabled
  601. , unsigned int rrdpush_enabled
  602. , char *rrdpush_destination
  603. , char *rrdpush_api_key
  604. , char *rrdpush_send_charts_matching
  605. , bool rrdpush_enable_replication
  606. , time_t rrdpush_seconds_to_replicate
  607. , time_t rrdpush_replication_step
  608. , struct rrdhost_system_info *system_info
  609. , bool archived
  610. ) {
  611. RRDHOST *host = rrdhost_find_by_guid(guid);
  612. if (unlikely(host && host->rrd_memory_mode != mode && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) {
  613. if (likely(!archived && rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD)))
  614. return host;
  615. /* If a legacy memory mode instantiates all dbengine state must be discarded to avoid inconsistencies */
  616. nd_log(NDLS_DAEMON, NDLP_INFO,
  617. "Archived host '%s' has memory mode '%s', but the wanted one is '%s'. Discarding archived state.",
  618. rrdhost_hostname(host),
  619. rrd_memory_mode_name(host->rrd_memory_mode),
  620. rrd_memory_mode_name(mode));
  621. rrd_wrlock();
  622. rrdhost_free___while_having_rrd_wrlock(host, true);
  623. host = NULL;
  624. rrd_unlock();
  625. }
  626. if(!host) {
  627. host = rrdhost_create(
  628. hostname
  629. , registry_hostname
  630. , guid
  631. , os
  632. , timezone
  633. , abbrev_timezone
  634. , utc_offset
  635. , tags
  636. , program_name
  637. , program_version
  638. , update_every
  639. , history
  640. , mode
  641. , health_enabled
  642. , rrdpush_enabled
  643. , rrdpush_destination
  644. , rrdpush_api_key
  645. , rrdpush_send_charts_matching
  646. , rrdpush_enable_replication
  647. , rrdpush_seconds_to_replicate
  648. , rrdpush_replication_step
  649. , system_info
  650. , 0
  651. , archived
  652. );
  653. }
  654. else {
  655. if (likely(!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD)))
  656. rrdhost_update(host
  657. , hostname
  658. , registry_hostname
  659. , guid
  660. , os
  661. , timezone
  662. , abbrev_timezone
  663. , utc_offset
  664. , tags
  665. , program_name
  666. , program_version
  667. , update_every
  668. , history
  669. , mode
  670. , health_enabled
  671. , rrdpush_enabled
  672. , rrdpush_destination
  673. , rrdpush_api_key
  674. , rrdpush_send_charts_matching
  675. , rrdpush_enable_replication
  676. , rrdpush_seconds_to_replicate
  677. , rrdpush_replication_step
  678. , system_info);
  679. }
  680. return host;
  681. }
  682. inline int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected_host, time_t now_s) {
  683. if(host != protected_host
  684. && host != localhost
  685. && rrdhost_receiver_replicating_charts(host) == 0
  686. && rrdhost_sender_replicating_charts(host) == 0
  687. && rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN)
  688. && !rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD)
  689. && !host->receiver
  690. && host->child_disconnected_time
  691. && host->child_disconnected_time + rrdhost_free_orphan_time_s < now_s)
  692. return 1;
  693. return 0;
  694. }
  695. // ----------------------------------------------------------------------------
  696. // RRDHOST global / startup initialization
  697. #ifdef ENABLE_DBENGINE
  698. struct dbengine_initialization {
  699. netdata_thread_t thread;
  700. char path[FILENAME_MAX + 1];
  701. int disk_space_mb;
  702. size_t tier;
  703. int ret;
  704. };
  705. void *dbengine_tier_init(void *ptr) {
  706. struct dbengine_initialization *dbi = ptr;
  707. dbi->ret = rrdeng_init(NULL, dbi->path, dbi->disk_space_mb, dbi->tier);
  708. return ptr;
  709. }
  710. #endif
  711. void dbengine_init(char *hostname) {
  712. #ifdef ENABLE_DBENGINE
  713. use_direct_io = config_get_boolean(CONFIG_SECTION_DB, "dbengine use direct io", use_direct_io);
  714. unsigned read_num = (unsigned)config_get_number(CONFIG_SECTION_DB, "dbengine pages per extent", MAX_PAGES_PER_EXTENT);
  715. if (read_num > 0 && read_num <= MAX_PAGES_PER_EXTENT)
  716. rrdeng_pages_per_extent = read_num;
  717. else {
  718. nd_log(NDLS_DAEMON, NDLP_WARNING,
  719. "Invalid dbengine pages per extent %u given. Using %u.",
  720. read_num, rrdeng_pages_per_extent);
  721. config_set_number(CONFIG_SECTION_DB, "dbengine pages per extent", rrdeng_pages_per_extent);
  722. }
  723. storage_tiers = config_get_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
  724. if(storage_tiers < 1) {
  725. nd_log(NDLS_DAEMON, NDLP_WARNING,
  726. "At least 1 storage tier is required. Assuming 1.");
  727. storage_tiers = 1;
  728. config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
  729. }
  730. if(storage_tiers > RRD_STORAGE_TIERS) {
  731. nd_log(NDLS_DAEMON, NDLP_WARNING,
  732. "Up to %d storage tier are supported. Assuming %d.",
  733. RRD_STORAGE_TIERS, RRD_STORAGE_TIERS);
  734. storage_tiers = RRD_STORAGE_TIERS;
  735. config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
  736. }
  737. bool parallel_initialization = (storage_tiers <= (size_t)get_netdata_cpus()) ? true : false;
  738. parallel_initialization = config_get_boolean(CONFIG_SECTION_DB, "dbengine parallel initialization", parallel_initialization);
  739. struct dbengine_initialization tiers_init[RRD_STORAGE_TIERS] = {};
  740. size_t created_tiers = 0;
  741. char dbenginepath[FILENAME_MAX + 1];
  742. char dbengineconfig[200 + 1];
  743. int divisor = 1;
  744. for(size_t tier = 0; tier < storage_tiers ;tier++) {
  745. if(tier == 0)
  746. snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", netdata_configured_cache_dir);
  747. else
  748. snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine-tier%zu", netdata_configured_cache_dir, tier);
  749. int ret = mkdir(dbenginepath, 0775);
  750. if (ret != 0 && errno != EEXIST) {
  751. nd_log(NDLS_DAEMON, NDLP_CRIT,
  752. "DBENGINE on '%s': cannot create directory '%s'",
  753. hostname, dbenginepath);
  754. break;
  755. }
  756. if(tier > 0)
  757. divisor *= 2;
  758. int disk_space_mb = default_multidb_disk_quota_mb / divisor;
  759. size_t grouping_iterations = storage_tiers_grouping_iterations[tier];
  760. RRD_BACKFILL backfill = storage_tiers_backfill[tier];
  761. if(tier > 0) {
  762. snprintfz(dbengineconfig, sizeof(dbengineconfig) - 1, "dbengine tier %zu multihost disk space MB", tier);
  763. disk_space_mb = config_get_number(CONFIG_SECTION_DB, dbengineconfig, disk_space_mb);
  764. snprintfz(dbengineconfig, sizeof(dbengineconfig) - 1, "dbengine tier %zu update every iterations", tier);
  765. grouping_iterations = config_get_number(CONFIG_SECTION_DB, dbengineconfig, grouping_iterations);
  766. if(grouping_iterations < 2) {
  767. grouping_iterations = 2;
  768. config_set_number(CONFIG_SECTION_DB, dbengineconfig, grouping_iterations);
  769. nd_log(NDLS_DAEMON, NDLP_WARNING,
  770. "DBENGINE on '%s': 'dbegnine tier %zu update every iterations' cannot be less than 2. Assuming 2.",
  771. hostname, tier);
  772. }
  773. snprintfz(dbengineconfig, sizeof(dbengineconfig) - 1, "dbengine tier %zu backfill", tier);
  774. const char *bf = config_get(CONFIG_SECTION_DB, dbengineconfig, backfill == RRD_BACKFILL_NEW ? "new" : backfill == RRD_BACKFILL_FULL ? "full" : "none");
  775. if(strcmp(bf, "new") == 0) backfill = RRD_BACKFILL_NEW;
  776. else if(strcmp(bf, "full") == 0) backfill = RRD_BACKFILL_FULL;
  777. else if(strcmp(bf, "none") == 0) backfill = RRD_BACKFILL_NONE;
  778. else {
  779. nd_log(NDLS_DAEMON, NDLP_WARNING,
  780. "DBENGINE: unknown backfill value '%s', assuming 'new'",
  781. bf);
  782. config_set(CONFIG_SECTION_DB, dbengineconfig, "new");
  783. backfill = RRD_BACKFILL_NEW;
  784. }
  785. }
  786. storage_tiers_grouping_iterations[tier] = grouping_iterations;
  787. storage_tiers_backfill[tier] = backfill;
  788. if(tier > 0 && get_tier_grouping(tier) > 65535) {
  789. storage_tiers_grouping_iterations[tier] = 1;
  790. nd_log(NDLS_DAEMON, NDLP_WARNING,
  791. "DBENGINE on '%s': dbengine tier %zu gives aggregation of more than 65535 points of tier 0. "
  792. "Disabling tiers above %zu",
  793. hostname, tier, tier);
  794. break;
  795. }
  796. internal_error(true, "DBENGINE tier %zu grouping iterations is set to %zu", tier, storage_tiers_grouping_iterations[tier]);
  797. tiers_init[tier].disk_space_mb = disk_space_mb;
  798. tiers_init[tier].tier = tier;
  799. strncpyz(tiers_init[tier].path, dbenginepath, FILENAME_MAX);
  800. tiers_init[tier].ret = 0;
  801. if(parallel_initialization) {
  802. char tag[NETDATA_THREAD_TAG_MAX + 1];
  803. snprintfz(tag, NETDATA_THREAD_TAG_MAX, "DBENGINIT[%zu]", tier);
  804. netdata_thread_create(&tiers_init[tier].thread, tag, NETDATA_THREAD_OPTION_JOINABLE,
  805. dbengine_tier_init, &tiers_init[tier]);
  806. }
  807. else
  808. dbengine_tier_init(&tiers_init[tier]);
  809. }
  810. for(size_t tier = 0; tier < storage_tiers ;tier++) {
  811. void *ptr;
  812. if(parallel_initialization)
  813. netdata_thread_join(tiers_init[tier].thread, &ptr);
  814. if(tiers_init[tier].ret != 0) {
  815. nd_log(NDLS_DAEMON, NDLP_ERR,
  816. "DBENGINE on '%s': Failed to initialize multi-host database tier %zu on path '%s'",
  817. hostname, tiers_init[tier].tier, tiers_init[tier].path);
  818. }
  819. else if(created_tiers == tier)
  820. created_tiers++;
  821. }
  822. if(created_tiers && created_tiers < storage_tiers) {
  823. nd_log(NDLS_DAEMON, NDLP_WARNING,
  824. "DBENGINE on '%s': Managed to create %zu tiers instead of %zu. Continuing with %zu available.",
  825. hostname, created_tiers, storage_tiers, created_tiers);
  826. storage_tiers = created_tiers;
  827. }
  828. else if(!created_tiers)
  829. fatal("DBENGINE on '%s', failed to initialize databases at '%s'.", hostname, netdata_configured_cache_dir);
  830. for(size_t tier = 0; tier < storage_tiers ;tier++)
  831. rrdeng_readiness_wait(multidb_ctx[tier]);
  832. dbengine_enabled = true;
  833. #else
  834. storage_tiers = config_get_number(CONFIG_SECTION_DB, "storage tiers", 1);
  835. if(storage_tiers != 1) {
  836. nd_log(NDLS_DAEMON, NDLP_WARNING,
  837. "DBENGINE is not available on '%s', so only 1 database tier can be supported.",
  838. hostname);
  839. storage_tiers = 1;
  840. config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
  841. }
  842. dbengine_enabled = false;
  843. #endif
  844. }
  845. int rrd_init(char *hostname, struct rrdhost_system_info *system_info, bool unittest) {
  846. rrdhost_init();
  847. if (unlikely(sql_init_database(DB_CHECK_NONE, system_info ? 0 : 1))) {
  848. if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  849. set_late_global_environment(system_info);
  850. fatal("Failed to initialize SQLite");
  851. }
  852. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  853. "Skipping SQLITE metadata initialization since memory mode is not dbengine");
  854. }
  855. if (unlikely(sql_init_context_database(system_info ? 0 : 1))) {
  856. error_report("Failed to initialize context metadata database");
  857. }
  858. if (unlikely(unittest)) {
  859. dbengine_enabled = true;
  860. }
  861. else {
  862. health_init();
  863. rrdpush_init();
  864. if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE || rrdpush_receiver_needs_dbengine()) {
  865. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  866. "DBENGINE: Initializing ...");
  867. dbengine_init(hostname);
  868. }
  869. else
  870. storage_tiers = 1;
  871. if (!dbengine_enabled) {
  872. if (storage_tiers > 1) {
  873. nd_log(NDLS_DAEMON, NDLP_WARNING,
  874. "dbengine is not enabled, but %zu tiers have been requested. Resetting tiers to 1",
  875. storage_tiers);
  876. storage_tiers = 1;
  877. }
  878. if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  879. nd_log(NDLS_DAEMON, NDLP_WARNING,
  880. "dbengine is not enabled, but it has been given as the default db mode. "
  881. "Resetting db mode to alloc");
  882. default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC;
  883. }
  884. }
  885. }
  886. if(!unittest)
  887. metadata_sync_init();
  888. localhost = rrdhost_create(
  889. hostname
  890. , registry_get_this_machine_hostname()
  891. , registry_get_this_machine_guid()
  892. , os_type
  893. , netdata_configured_timezone
  894. , netdata_configured_abbrev_timezone
  895. , netdata_configured_utc_offset
  896. , ""
  897. , program_name
  898. , program_version
  899. , default_rrd_update_every
  900. , default_rrd_history_entries
  901. , default_rrd_memory_mode
  902. , default_health_enabled
  903. , default_rrdpush_enabled
  904. , default_rrdpush_destination
  905. , default_rrdpush_api_key
  906. , default_rrdpush_send_charts_matching
  907. , default_rrdpush_enable_replication
  908. , default_rrdpush_seconds_to_replicate
  909. , default_rrdpush_replication_step
  910. , system_info
  911. , 1
  912. , 0
  913. );
  914. if (unlikely(!localhost)) {
  915. return 1;
  916. }
  917. // we register this only on localhost
  918. // for the other nodes, the origin server should register it
  919. rrd_collector_started(); // this creates a collector that runs for as long as netdata runs
  920. rrd_function_add(localhost, NULL, "streaming", 10,
  921. RRDFUNCTIONS_STREAMING_HELP, true,
  922. rrdhost_function_streaming, NULL);
  923. if (likely(system_info)) {
  924. migrate_localhost(&localhost->host_uuid);
  925. sql_aclk_sync_init();
  926. web_client_api_v1_management_init();
  927. }
  928. return localhost==NULL;
  929. }
  930. // ----------------------------------------------------------------------------
  931. // RRDHOST - free
  932. void rrdhost_system_info_free(struct rrdhost_system_info *system_info) {
  933. if(likely(system_info)) {
  934. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED);
  935. freez(system_info->cloud_provider_type);
  936. freez(system_info->cloud_instance_type);
  937. freez(system_info->cloud_instance_region);
  938. freez(system_info->host_os_name);
  939. freez(system_info->host_os_id);
  940. freez(system_info->host_os_id_like);
  941. freez(system_info->host_os_version);
  942. freez(system_info->host_os_version_id);
  943. freez(system_info->host_os_detection);
  944. freez(system_info->host_cores);
  945. freez(system_info->host_cpu_freq);
  946. freez(system_info->host_ram_total);
  947. freez(system_info->host_disk_space);
  948. freez(system_info->container_os_name);
  949. freez(system_info->container_os_id);
  950. freez(system_info->container_os_id_like);
  951. freez(system_info->container_os_version);
  952. freez(system_info->container_os_version_id);
  953. freez(system_info->container_os_detection);
  954. freez(system_info->kernel_name);
  955. freez(system_info->kernel_version);
  956. freez(system_info->architecture);
  957. freez(system_info->virtualization);
  958. freez(system_info->virt_detection);
  959. freez(system_info->container);
  960. freez(system_info->container_detection);
  961. freez(system_info->is_k8s_node);
  962. freez(system_info->install_type);
  963. freez(system_info->prebuilt_arch);
  964. freez(system_info->prebuilt_dist);
  965. freez(system_info);
  966. }
  967. }
  968. static void rrdhost_streaming_sender_structures_init(RRDHOST *host)
  969. {
  970. if (host->sender)
  971. return;
  972. host->sender = callocz(1, sizeof(*host->sender));
  973. __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(*host->sender), __ATOMIC_RELAXED);
  974. host->sender->host = host;
  975. host->sender->buffer = cbuffer_new(CBUFFER_INITIAL_SIZE, 1024 * 1024, &netdata_buffers_statistics.cbuffers_streaming);
  976. host->sender->capabilities = stream_our_capabilities(host, true);
  977. host->sender->rrdpush_sender_pipe[PIPE_READ] = -1;
  978. host->sender->rrdpush_sender_pipe[PIPE_WRITE] = -1;
  979. host->sender->rrdpush_sender_socket = -1;
  980. host->sender->disabled_capabilities = STREAM_CAP_NONE;
  981. if(!default_rrdpush_compression_enabled)
  982. host->sender->disabled_capabilities |= STREAM_CAP_COMPRESSIONS_AVAILABLE;
  983. spinlock_init(&host->sender->spinlock);
  984. replication_init_sender(host->sender);
  985. }
  986. static void rrdhost_streaming_sender_structures_free(RRDHOST *host)
  987. {
  988. rrdhost_option_clear(host, RRDHOST_OPTION_SENDER_ENABLED);
  989. if (unlikely(!host->sender))
  990. return;
  991. rrdpush_sender_thread_stop(host, STREAM_HANDSHAKE_DISCONNECT_HOST_CLEANUP, true); // stop a possibly running thread
  992. cbuffer_free(host->sender->buffer);
  993. rrdpush_compressor_destroy(&host->sender->compressor);
  994. replication_cleanup_sender(host->sender);
  995. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(*host->sender), __ATOMIC_RELAXED);
  996. freez(host->sender);
  997. host->sender = NULL;
  998. rrdhost_flag_clear(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED);
  999. }
  1000. void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) {
  1001. if(!host) return;
  1002. if (netdata_exit || force) {
  1003. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  1004. "RRD: 'host:%s' freeing memory...",
  1005. rrdhost_hostname(host));
  1006. // ------------------------------------------------------------------------
  1007. // first remove it from the indexes, so that it will not be discoverable
  1008. rrdhost_index_del_hostname(host);
  1009. rrdhost_index_del_by_guid(host);
  1010. if (host->prev)
  1011. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(localhost, host, prev, next);
  1012. }
  1013. // ------------------------------------------------------------------------
  1014. // clean up streaming chart slots
  1015. rrdhost_pluginsd_send_chart_slots_free(host);
  1016. rrdhost_pluginsd_receive_chart_slots_free(host);
  1017. // ------------------------------------------------------------------------
  1018. // clean up streaming
  1019. rrdhost_streaming_sender_structures_free(host);
  1020. if (netdata_exit || force)
  1021. stop_streaming_receiver(host, STREAM_HANDSHAKE_DISCONNECT_HOST_CLEANUP);
  1022. // ------------------------------------------------------------------------
  1023. // clean up alarms
  1024. rrdcalc_delete_all(host);
  1025. // ------------------------------------------------------------------------
  1026. // release its children resources
  1027. #ifdef ENABLE_DBENGINE
  1028. for(size_t tier = 0; tier < storage_tiers ;tier++) {
  1029. if(host->db[tier].mode == RRD_MEMORY_MODE_DBENGINE
  1030. && host->db[tier].instance
  1031. && !is_storage_engine_shared(host->db[tier].instance))
  1032. rrdeng_prepare_exit((struct rrdengine_instance *)host->db[tier].instance);
  1033. }
  1034. #endif
  1035. // delete all the RRDSETs of the host
  1036. rrdset_index_destroy(host);
  1037. rrdcalc_rrdhost_index_destroy(host);
  1038. rrdcalctemplate_index_destroy(host);
  1039. // cleanup ML resources
  1040. ml_host_delete(host);
  1041. freez(host->exporting_flags);
  1042. health_alarm_log_free(host);
  1043. #ifdef ENABLE_DBENGINE
  1044. for(size_t tier = 0; tier < storage_tiers ;tier++) {
  1045. if(host->db[tier].mode == RRD_MEMORY_MODE_DBENGINE
  1046. && host->db[tier].instance
  1047. && !is_storage_engine_shared(host->db[tier].instance))
  1048. rrdeng_exit((struct rrdengine_instance *)host->db[tier].instance);
  1049. }
  1050. #endif
  1051. if (!netdata_exit && !force) {
  1052. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  1053. "RRD: 'host:%s' is now in archive mode...",
  1054. rrdhost_hostname(host));
  1055. rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED | RRDHOST_FLAG_ORPHAN);
  1056. return;
  1057. }
  1058. // ------------------------------------------------------------------------
  1059. // free it
  1060. pthread_mutex_destroy(&host->aclk_state_lock);
  1061. freez(host->aclk_state.claimed_id);
  1062. freez(host->aclk_state.prev_claimed_id);
  1063. string_freez(host->tags);
  1064. rrdlabels_destroy(host->rrdlabels);
  1065. string_freez(host->os);
  1066. string_freez(host->timezone);
  1067. string_freez(host->abbrev_timezone);
  1068. string_freez(host->program_name);
  1069. string_freez(host->program_version);
  1070. rrdhost_system_info_free(host->system_info);
  1071. freez(host->cache_dir);
  1072. freez(host->rrdpush_send_api_key);
  1073. freez(host->rrdpush_send_destination);
  1074. rrdpush_destinations_free(host);
  1075. string_freez(host->health.health_default_exec);
  1076. string_freez(host->health.health_default_recipient);
  1077. string_freez(host->registry_hostname);
  1078. simple_pattern_free(host->rrdpush_send_charts_matching);
  1079. freez(host->node_id);
  1080. rrdfamily_index_destroy(host);
  1081. rrdfunctions_host_destroy(host);
  1082. rrdvariables_destroy(host->rrdvars);
  1083. if (host == localhost)
  1084. rrdvariables_destroy(health_rrdvars);
  1085. rrdhost_destroy_rrdcontexts(host);
  1086. string_freez(host->hostname);
  1087. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(RRDHOST), __ATOMIC_RELAXED);
  1088. freez(host);
  1089. }
  1090. void rrdhost_free_all(void) {
  1091. rrd_wrlock();
  1092. /* Make sure child-hosts are released before the localhost. */
  1093. while(localhost && localhost->next)
  1094. rrdhost_free___while_having_rrd_wrlock(localhost->next, true);
  1095. if(localhost)
  1096. rrdhost_free___while_having_rrd_wrlock(localhost, true);
  1097. rrd_unlock();
  1098. }
  1099. void rrd_finalize_collection_for_all_hosts(void) {
  1100. RRDHOST *host;
  1101. dfe_start_reentrant(rrdhost_root_index, host) {
  1102. rrdhost_finalize_collection(host);
  1103. }
  1104. dfe_done(host);
  1105. }
  1106. // ----------------------------------------------------------------------------
  1107. // RRDHOST - save host files
  1108. void rrdhost_save_charts(RRDHOST *host) {
  1109. if(!host) return;
  1110. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  1111. "RRD: 'host:%s' saving / closing database...",
  1112. rrdhost_hostname(host));
  1113. RRDSET *st;
  1114. // we get a write lock
  1115. // to ensure only one thread is saving the database
  1116. rrdset_foreach_write(st, host) {
  1117. rrdset_save(st);
  1118. }
  1119. rrdset_foreach_done(st);
  1120. }
  1121. struct rrdhost_system_info *rrdhost_labels_to_system_info(RRDLABELS *labels) {
  1122. struct rrdhost_system_info *info = callocz(1, sizeof(struct rrdhost_system_info));
  1123. info->hops = 1;
  1124. rrdlabels_get_value_strdup_or_null(labels, &info->cloud_provider_type, "_cloud_provider_type");
  1125. rrdlabels_get_value_strdup_or_null(labels, &info->cloud_instance_type, "_cloud_instance_type");
  1126. rrdlabels_get_value_strdup_or_null(labels, &info->cloud_instance_region, "_cloud_instance_region");
  1127. rrdlabels_get_value_strdup_or_null(labels, &info->host_os_name, "_os_name");
  1128. rrdlabels_get_value_strdup_or_null(labels, &info->host_os_version, "_os_version");
  1129. rrdlabels_get_value_strdup_or_null(labels, &info->kernel_version, "_kernel_version");
  1130. rrdlabels_get_value_strdup_or_null(labels, &info->host_cores, "_system_cores");
  1131. rrdlabels_get_value_strdup_or_null(labels, &info->host_cpu_freq, "_system_cpu_freq");
  1132. rrdlabels_get_value_strdup_or_null(labels, &info->host_ram_total, "_system_ram_total");
  1133. rrdlabels_get_value_strdup_or_null(labels, &info->host_disk_space, "_system_disk_space");
  1134. rrdlabels_get_value_strdup_or_null(labels, &info->architecture, "_architecture");
  1135. rrdlabels_get_value_strdup_or_null(labels, &info->virtualization, "_virtualization");
  1136. rrdlabels_get_value_strdup_or_null(labels, &info->container, "_container");
  1137. rrdlabels_get_value_strdup_or_null(labels, &info->container_detection, "_container_detection");
  1138. rrdlabels_get_value_strdup_or_null(labels, &info->virt_detection, "_virt_detection");
  1139. rrdlabels_get_value_strdup_or_null(labels, &info->is_k8s_node, "_is_k8s_node");
  1140. rrdlabels_get_value_strdup_or_null(labels, &info->install_type, "_install_type");
  1141. rrdlabels_get_value_strdup_or_null(labels, &info->prebuilt_arch, "_prebuilt_arch");
  1142. rrdlabels_get_value_strdup_or_null(labels, &info->prebuilt_dist, "_prebuilt_dist");
  1143. return info;
  1144. }
  1145. static void rrdhost_load_auto_labels(void) {
  1146. RRDLABELS *labels = localhost->rrdlabels;
  1147. if (localhost->system_info->cloud_provider_type)
  1148. rrdlabels_add(labels, "_cloud_provider_type", localhost->system_info->cloud_provider_type, RRDLABEL_SRC_AUTO);
  1149. if (localhost->system_info->cloud_instance_type)
  1150. rrdlabels_add(labels, "_cloud_instance_type", localhost->system_info->cloud_instance_type, RRDLABEL_SRC_AUTO);
  1151. if (localhost->system_info->cloud_instance_region)
  1152. rrdlabels_add(labels, "_cloud_instance_region", localhost->system_info->cloud_instance_region, RRDLABEL_SRC_AUTO);
  1153. if (localhost->system_info->host_os_name)
  1154. rrdlabels_add(labels, "_os_name", localhost->system_info->host_os_name, RRDLABEL_SRC_AUTO);
  1155. if (localhost->system_info->host_os_version)
  1156. rrdlabels_add(labels, "_os_version", localhost->system_info->host_os_version, RRDLABEL_SRC_AUTO);
  1157. if (localhost->system_info->kernel_version)
  1158. rrdlabels_add(labels, "_kernel_version", localhost->system_info->kernel_version, RRDLABEL_SRC_AUTO);
  1159. if (localhost->system_info->host_cores)
  1160. rrdlabels_add(labels, "_system_cores", localhost->system_info->host_cores, RRDLABEL_SRC_AUTO);
  1161. if (localhost->system_info->host_cpu_freq)
  1162. rrdlabels_add(labels, "_system_cpu_freq", localhost->system_info->host_cpu_freq, RRDLABEL_SRC_AUTO);
  1163. if (localhost->system_info->host_ram_total)
  1164. rrdlabels_add(labels, "_system_ram_total", localhost->system_info->host_ram_total, RRDLABEL_SRC_AUTO);
  1165. if (localhost->system_info->host_disk_space)
  1166. rrdlabels_add(labels, "_system_disk_space", localhost->system_info->host_disk_space, RRDLABEL_SRC_AUTO);
  1167. if (localhost->system_info->architecture)
  1168. rrdlabels_add(labels, "_architecture", localhost->system_info->architecture, RRDLABEL_SRC_AUTO);
  1169. if (localhost->system_info->virtualization)
  1170. rrdlabels_add(labels, "_virtualization", localhost->system_info->virtualization, RRDLABEL_SRC_AUTO);
  1171. if (localhost->system_info->container)
  1172. rrdlabels_add(labels, "_container", localhost->system_info->container, RRDLABEL_SRC_AUTO);
  1173. if (localhost->system_info->container_detection)
  1174. rrdlabels_add(labels, "_container_detection", localhost->system_info->container_detection, RRDLABEL_SRC_AUTO);
  1175. if (localhost->system_info->virt_detection)
  1176. rrdlabels_add(labels, "_virt_detection", localhost->system_info->virt_detection, RRDLABEL_SRC_AUTO);
  1177. if (localhost->system_info->is_k8s_node)
  1178. rrdlabels_add(labels, "_is_k8s_node", localhost->system_info->is_k8s_node, RRDLABEL_SRC_AUTO);
  1179. if (localhost->system_info->install_type)
  1180. rrdlabels_add(labels, "_install_type", localhost->system_info->install_type, RRDLABEL_SRC_AUTO);
  1181. if (localhost->system_info->prebuilt_arch)
  1182. rrdlabels_add(labels, "_prebuilt_arch", localhost->system_info->prebuilt_arch, RRDLABEL_SRC_AUTO);
  1183. if (localhost->system_info->prebuilt_dist)
  1184. rrdlabels_add(labels, "_prebuilt_dist", localhost->system_info->prebuilt_dist, RRDLABEL_SRC_AUTO);
  1185. add_aclk_host_labels();
  1186. // The source should be CONF, but when it is set, these labels are exported by default ('send configured labels' in exporting.conf).
  1187. // Their export seems to break exporting to Graphite, see https://github.com/netdata/netdata/issues/14084.
  1188. int is_ephemeral = appconfig_get_boolean(&netdata_config, CONFIG_SECTION_GLOBAL, "is ephemeral node", CONFIG_BOOLEAN_NO);
  1189. rrdlabels_add(labels, "_is_ephemeral", is_ephemeral ? "true" : "false", RRDLABEL_SRC_AUTO);
  1190. int has_unstable_connection = appconfig_get_boolean(&netdata_config, CONFIG_SECTION_GLOBAL, "has unstable connection", CONFIG_BOOLEAN_NO);
  1191. rrdlabels_add(labels, "_has_unstable_connection", has_unstable_connection ? "true" : "false", RRDLABEL_SRC_AUTO);
  1192. rrdlabels_add(labels, "_is_parent", (localhost->connected_children_count > 0) ? "true" : "false", RRDLABEL_SRC_AUTO);
  1193. if (localhost->rrdpush_send_destination)
  1194. rrdlabels_add(labels, "_streams_to", localhost->rrdpush_send_destination, RRDLABEL_SRC_AUTO);
  1195. }
  1196. void rrdhost_set_is_parent_label(void) {
  1197. int count = __atomic_load_n(&localhost->connected_children_count, __ATOMIC_RELAXED);
  1198. if (count == 0 || count == 1) {
  1199. RRDLABELS *labels = localhost->rrdlabels;
  1200. rrdlabels_add(labels, "_is_parent", (count) ? "true" : "false", RRDLABEL_SRC_AUTO);
  1201. //queue a node info
  1202. #ifdef ENABLE_ACLK
  1203. if (netdata_cloud_enabled) {
  1204. aclk_queue_node_info(localhost, false);
  1205. }
  1206. #endif
  1207. }
  1208. }
  1209. static void rrdhost_load_config_labels(void) {
  1210. int status = config_load(NULL, 1, CONFIG_SECTION_HOST_LABEL);
  1211. if(!status) {
  1212. char *filename = CONFIG_DIR "/" CONFIG_FILENAME;
  1213. nd_log(NDLS_DAEMON, NDLP_WARNING,
  1214. "RRDLABEL: Cannot reload the configuration file '%s', using labels in memory",
  1215. filename);
  1216. }
  1217. struct section *co = appconfig_get_section(&netdata_config, CONFIG_SECTION_HOST_LABEL);
  1218. if(co) {
  1219. config_section_wrlock(co);
  1220. struct config_option *cv;
  1221. for(cv = co->values; cv ; cv = cv->next) {
  1222. rrdlabels_add(localhost->rrdlabels, cv->name, cv->value, RRDLABEL_SRC_CONFIG);
  1223. cv->flags |= CONFIG_VALUE_USED;
  1224. }
  1225. config_section_unlock(co);
  1226. }
  1227. }
  1228. static void rrdhost_load_kubernetes_labels(void) {
  1229. char label_script[sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("get-kubernetes-labels.sh") + 2)];
  1230. sprintf(label_script, "%s/%s", netdata_configured_primary_plugins_dir, "get-kubernetes-labels.sh");
  1231. if (unlikely(access(label_script, R_OK) != 0)) {
  1232. nd_log(NDLS_DAEMON, NDLP_ERR,
  1233. "Kubernetes pod label fetching script %s not found.",
  1234. label_script);
  1235. return;
  1236. }
  1237. pid_t pid;
  1238. FILE *fp_child_input;
  1239. FILE *fp_child_output = netdata_popen(label_script, &pid, &fp_child_input);
  1240. if(!fp_child_output) return;
  1241. char buffer[1000 + 1];
  1242. while (fgets(buffer, 1000, fp_child_output) != NULL)
  1243. rrdlabels_add_pair(localhost->rrdlabels, buffer, RRDLABEL_SRC_AUTO|RRDLABEL_SRC_K8S);
  1244. // Non-zero exit code means that all the script output is error messages. We've shown already any message that didn't include a ':'
  1245. // Here we'll inform with an ERROR that the script failed, show whatever (if anything) was added to the list of labels, free the memory and set the return to null
  1246. int rc = netdata_pclose(fp_child_input, fp_child_output, pid);
  1247. if(rc)
  1248. nd_log(NDLS_DAEMON, NDLP_ERR,
  1249. "%s exited abnormally. Failed to get kubernetes labels.",
  1250. label_script);
  1251. }
  1252. void reload_host_labels(void) {
  1253. if(!localhost->rrdlabels)
  1254. localhost->rrdlabels = rrdlabels_create();
  1255. rrdlabels_unmark_all(localhost->rrdlabels);
  1256. // priority is important here
  1257. rrdhost_load_config_labels();
  1258. rrdhost_load_kubernetes_labels();
  1259. rrdhost_load_auto_labels();
  1260. rrdhost_flag_set(localhost,RRDHOST_FLAG_METADATA_LABELS | RRDHOST_FLAG_METADATA_UPDATE);
  1261. rrdpush_send_host_labels(localhost);
  1262. }
  1263. void rrdhost_finalize_collection(RRDHOST *host) {
  1264. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  1265. "RRD: 'host:%s' stopping data collection...",
  1266. rrdhost_hostname(host));
  1267. RRDSET *st;
  1268. rrdset_foreach_read(st, host)
  1269. rrdset_finalize_collection(st, true);
  1270. rrdset_foreach_done(st);
  1271. }
  1272. // ----------------------------------------------------------------------------
  1273. // RRDHOST - delete host files
  1274. void rrdhost_delete_charts(RRDHOST *host) {
  1275. if(!host) return;
  1276. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  1277. "RRD: 'host:%s' deleting disk files...",
  1278. rrdhost_hostname(host));
  1279. RRDSET *st;
  1280. if(host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || host->rrd_memory_mode == RRD_MEMORY_MODE_MAP) {
  1281. // we get a write lock
  1282. // to ensure only one thread is saving the database
  1283. rrdset_foreach_write(st, host){
  1284. rrdset_delete_files(st);
  1285. }
  1286. rrdset_foreach_done(st);
  1287. }
  1288. recursively_delete_dir(host->cache_dir, "left over host");
  1289. }
  1290. // ----------------------------------------------------------------------------
  1291. // RRDHOST - cleanup host files
  1292. void rrdhost_cleanup_charts(RRDHOST *host) {
  1293. if(!host) return;
  1294. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  1295. "RRD: 'host:%s' cleaning up disk files...",
  1296. rrdhost_hostname(host));
  1297. RRDSET *st;
  1298. uint32_t rrdhost_delete_obsolete_charts = rrdhost_option_check(host, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS);
  1299. // we get a write lock
  1300. // to ensure only one thread is saving the database
  1301. rrdset_foreach_write(st, host) {
  1302. if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))
  1303. rrdset_delete_files(st);
  1304. else if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS))
  1305. rrdset_delete_obsolete_dimensions(st);
  1306. else
  1307. rrdset_save(st);
  1308. }
  1309. rrdset_foreach_done(st);
  1310. }
  1311. // ----------------------------------------------------------------------------
  1312. // RRDHOST - save all hosts to disk
  1313. void rrdhost_save_all(void) {
  1314. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  1315. "RRD: saving databases [%zu hosts(s)]...",
  1316. rrdhost_hosts_available());
  1317. rrd_rdlock();
  1318. RRDHOST *host;
  1319. rrdhost_foreach_read(host)
  1320. rrdhost_save_charts(host);
  1321. rrd_unlock();
  1322. }
  1323. // ----------------------------------------------------------------------------
  1324. // RRDHOST - save or delete all hosts from disk
  1325. void rrdhost_cleanup_all(void) {
  1326. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  1327. "RRD: cleaning up database [%zu hosts(s)]...",
  1328. rrdhost_hosts_available());
  1329. rrd_rdlock();
  1330. RRDHOST *host;
  1331. rrdhost_foreach_read(host) {
  1332. if (host != localhost && rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) && !host->receiver
  1333. /* don't delete multi-host DB host files */
  1334. && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance))
  1335. )
  1336. rrdhost_delete_charts(host);
  1337. else
  1338. rrdhost_cleanup_charts(host);
  1339. }
  1340. rrd_unlock();
  1341. }
  1342. // ----------------------------------------------------------------------------
  1343. // RRDHOST - set system info from environment variables
  1344. // system_info fields must be heap allocated or NULL
  1345. int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value) {
  1346. int res = 0;
  1347. if (!strcmp(name, "NETDATA_PROTOCOL_VERSION"))
  1348. return res;
  1349. else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_TYPE")){
  1350. freez(system_info->cloud_provider_type);
  1351. system_info->cloud_provider_type = strdupz(value);
  1352. }
  1353. else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE")){
  1354. freez(system_info->cloud_instance_type);
  1355. system_info->cloud_instance_type = strdupz(value);
  1356. }
  1357. else if(!strcmp(name, "NETDATA_INSTANCE_CLOUD_INSTANCE_REGION")){
  1358. freez(system_info->cloud_instance_region);
  1359. system_info->cloud_instance_region = strdupz(value);
  1360. }
  1361. else if(!strcmp(name, "NETDATA_CONTAINER_OS_NAME")){
  1362. freez(system_info->container_os_name);
  1363. system_info->container_os_name = strdupz(value);
  1364. }
  1365. else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID")){
  1366. freez(system_info->container_os_id);
  1367. system_info->container_os_id = strdupz(value);
  1368. }
  1369. else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID_LIKE")){
  1370. freez(system_info->container_os_id_like);
  1371. system_info->container_os_id_like = strdupz(value);
  1372. }
  1373. else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION")){
  1374. freez(system_info->container_os_version);
  1375. system_info->container_os_version = strdupz(value);
  1376. }
  1377. else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION_ID")){
  1378. freez(system_info->container_os_version_id);
  1379. system_info->container_os_version_id = strdupz(value);
  1380. }
  1381. else if(!strcmp(name, "NETDATA_CONTAINER_OS_DETECTION")){
  1382. freez(system_info->container_os_detection);
  1383. system_info->container_os_detection = strdupz(value);
  1384. }
  1385. else if(!strcmp(name, "NETDATA_HOST_OS_NAME")){
  1386. freez(system_info->host_os_name);
  1387. system_info->host_os_name = strdupz(value);
  1388. json_fix_string(system_info->host_os_name);
  1389. }
  1390. else if(!strcmp(name, "NETDATA_HOST_OS_ID")){
  1391. freez(system_info->host_os_id);
  1392. system_info->host_os_id = strdupz(value);
  1393. }
  1394. else if(!strcmp(name, "NETDATA_HOST_OS_ID_LIKE")){
  1395. freez(system_info->host_os_id_like);
  1396. system_info->host_os_id_like = strdupz(value);
  1397. }
  1398. else if(!strcmp(name, "NETDATA_HOST_OS_VERSION")){
  1399. freez(system_info->host_os_version);
  1400. system_info->host_os_version = strdupz(value);
  1401. }
  1402. else if(!strcmp(name, "NETDATA_HOST_OS_VERSION_ID")){
  1403. freez(system_info->host_os_version_id);
  1404. system_info->host_os_version_id = strdupz(value);
  1405. }
  1406. else if(!strcmp(name, "NETDATA_HOST_OS_DETECTION")){
  1407. freez(system_info->host_os_detection);
  1408. system_info->host_os_detection = strdupz(value);
  1409. }
  1410. else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_NAME")){
  1411. freez(system_info->kernel_name);
  1412. system_info->kernel_name = strdupz(value);
  1413. }
  1414. else if(!strcmp(name, "NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT")){
  1415. freez(system_info->host_cores);
  1416. system_info->host_cores = strdupz(value);
  1417. }
  1418. else if(!strcmp(name, "NETDATA_SYSTEM_CPU_FREQ")){
  1419. freez(system_info->host_cpu_freq);
  1420. system_info->host_cpu_freq = strdupz(value);
  1421. }
  1422. else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_RAM")){
  1423. freez(system_info->host_ram_total);
  1424. system_info->host_ram_total = strdupz(value);
  1425. }
  1426. else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_DISK_SIZE")){
  1427. freez(system_info->host_disk_space);
  1428. system_info->host_disk_space = strdupz(value);
  1429. }
  1430. else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_VERSION")){
  1431. freez(system_info->kernel_version);
  1432. system_info->kernel_version = strdupz(value);
  1433. }
  1434. else if(!strcmp(name, "NETDATA_SYSTEM_ARCHITECTURE")){
  1435. freez(system_info->architecture);
  1436. system_info->architecture = strdupz(value);
  1437. }
  1438. else if(!strcmp(name, "NETDATA_SYSTEM_VIRTUALIZATION")){
  1439. freez(system_info->virtualization);
  1440. system_info->virtualization = strdupz(value);
  1441. }
  1442. else if(!strcmp(name, "NETDATA_SYSTEM_VIRT_DETECTION")){
  1443. freez(system_info->virt_detection);
  1444. system_info->virt_detection = strdupz(value);
  1445. }
  1446. else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER")){
  1447. freez(system_info->container);
  1448. system_info->container = strdupz(value);
  1449. }
  1450. else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER_DETECTION")){
  1451. freez(system_info->container_detection);
  1452. system_info->container_detection = strdupz(value);
  1453. }
  1454. else if(!strcmp(name, "NETDATA_HOST_IS_K8S_NODE")){
  1455. freez(system_info->is_k8s_node);
  1456. system_info->is_k8s_node = strdupz(value);
  1457. }
  1458. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_VENDOR"))
  1459. return res;
  1460. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_MODEL"))
  1461. return res;
  1462. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_DETECTION"))
  1463. return res;
  1464. else if (!strcmp(name, "NETDATA_SYSTEM_RAM_DETECTION"))
  1465. return res;
  1466. else if (!strcmp(name, "NETDATA_SYSTEM_DISK_DETECTION"))
  1467. return res;
  1468. else if (!strcmp(name, "NETDATA_CONTAINER_IS_OFFICIAL_IMAGE"))
  1469. return res;
  1470. else {
  1471. res = 1;
  1472. }
  1473. return res;
  1474. }
  1475. static NETDATA_DOUBLE rrdhost_sender_replication_completion_unsafe(RRDHOST *host, time_t now, size_t *instances) {
  1476. size_t charts = rrdhost_sender_replicating_charts(host);
  1477. NETDATA_DOUBLE completion;
  1478. if(!charts || !host->sender || !host->sender->replication.oldest_request_after_t)
  1479. completion = 100.0;
  1480. else if(!host->sender->replication.latest_completed_before_t || host->sender->replication.latest_completed_before_t < host->sender->replication.oldest_request_after_t)
  1481. completion = 0.0;
  1482. else {
  1483. time_t total = now - host->sender->replication.oldest_request_after_t;
  1484. time_t current = host->sender->replication.latest_completed_before_t - host->sender->replication.oldest_request_after_t;
  1485. completion = (NETDATA_DOUBLE) current * 100.0 / (NETDATA_DOUBLE) total;
  1486. }
  1487. *instances = charts;
  1488. return completion;
  1489. }
  1490. bool rrdhost_matches_window(RRDHOST *host, time_t after, time_t before, time_t now) {
  1491. time_t first_time_s, last_time_s;
  1492. rrdhost_retention(host, now, rrdhost_is_online(host), &first_time_s, &last_time_s);
  1493. return query_matches_retention(after, before, first_time_s, last_time_s, 0);
  1494. }
  1495. bool rrdhost_state_cloud_emulation(RRDHOST *host) {
  1496. return rrdhost_is_online(host);
  1497. }
  1498. void rrdhost_status(RRDHOST *host, time_t now, RRDHOST_STATUS *s) {
  1499. memset(s, 0, sizeof(*s));
  1500. s->host = host;
  1501. s->now = now;
  1502. RRDHOST_FLAGS flags = __atomic_load_n(&host->flags, __ATOMIC_RELAXED);
  1503. // --- db ---
  1504. bool online = rrdhost_is_online(host);
  1505. rrdhost_retention(host, now, online, &s->db.first_time_s, &s->db.last_time_s);
  1506. s->db.metrics = host->rrdctx.metrics;
  1507. s->db.instances = host->rrdctx.instances;
  1508. s->db.contexts = dictionary_entries(host->rrdctx.contexts);
  1509. if(!s->db.first_time_s || !s->db.last_time_s || !s->db.metrics || !s->db.instances || !s->db.contexts ||
  1510. (flags & (RRDHOST_FLAG_PENDING_CONTEXT_LOAD|RRDHOST_FLAG_CONTEXT_LOAD_IN_PROGRESS)))
  1511. s->db.status = RRDHOST_DB_STATUS_INITIALIZING;
  1512. else
  1513. s->db.status = RRDHOST_DB_STATUS_QUERYABLE;
  1514. s->db.mode = host->rrd_memory_mode;
  1515. // --- ingest ---
  1516. s->ingest.since = MAX(host->child_connect_time, host->child_disconnected_time);
  1517. s->ingest.reason = (online) ? STREAM_HANDSHAKE_NEVER : host->rrdpush_last_receiver_exit_reason;
  1518. netdata_mutex_lock(&host->receiver_lock);
  1519. s->ingest.hops = (host->system_info ? host->system_info->hops : (host == localhost) ? 0 : 1);
  1520. bool has_receiver = false;
  1521. if (host->receiver) {
  1522. has_receiver = true;
  1523. s->ingest.replication.instances = rrdhost_receiver_replicating_charts(host);
  1524. s->ingest.replication.completion = host->rrdpush_receiver_replication_percent;
  1525. s->ingest.replication.in_progress = s->ingest.replication.instances > 0;
  1526. s->ingest.capabilities = host->receiver->capabilities;
  1527. s->ingest.peers = socket_peers(host->receiver->fd);
  1528. #ifdef ENABLE_HTTPS
  1529. s->ingest.ssl = SSL_connection(&host->receiver->ssl);
  1530. #endif
  1531. }
  1532. netdata_mutex_unlock(&host->receiver_lock);
  1533. if (online) {
  1534. if(s->db.status == RRDHOST_DB_STATUS_INITIALIZING)
  1535. s->ingest.status = RRDHOST_INGEST_STATUS_INITIALIZING;
  1536. else if (host == localhost || rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST)) {
  1537. s->ingest.status = RRDHOST_INGEST_STATUS_ONLINE;
  1538. s->ingest.since = netdata_start_time;
  1539. }
  1540. else if (s->ingest.replication.in_progress)
  1541. s->ingest.status = RRDHOST_INGEST_STATUS_REPLICATING;
  1542. else
  1543. s->ingest.status = RRDHOST_INGEST_STATUS_ONLINE;
  1544. }
  1545. else {
  1546. if (!s->ingest.since) {
  1547. s->ingest.status = RRDHOST_INGEST_STATUS_ARCHIVED;
  1548. s->ingest.since = s->db.last_time_s;
  1549. }
  1550. else
  1551. s->ingest.status = RRDHOST_INGEST_STATUS_OFFLINE;
  1552. }
  1553. if(host == localhost)
  1554. s->ingest.type = RRDHOST_INGEST_TYPE_LOCALHOST;
  1555. else if(has_receiver || rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED))
  1556. s->ingest.type = RRDHOST_INGEST_TYPE_CHILD;
  1557. else if(rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST))
  1558. s->ingest.type = RRDHOST_INGEST_TYPE_VIRTUAL;
  1559. else
  1560. s->ingest.type = RRDHOST_INGEST_TYPE_ARCHIVED;
  1561. s->ingest.id = host->rrdpush_receiver_connection_counter;
  1562. if(!s->ingest.since)
  1563. s->ingest.since = netdata_start_time;
  1564. if(s->ingest.status == RRDHOST_INGEST_STATUS_ONLINE)
  1565. s->db.liveness = RRDHOST_DB_LIVENESS_LIVE;
  1566. else
  1567. s->db.liveness = RRDHOST_DB_LIVENESS_STALE;
  1568. // --- stream ---
  1569. if (!host->sender) {
  1570. s->stream.status = RRDHOST_STREAM_STATUS_DISABLED;
  1571. s->stream.hops = s->ingest.hops + 1;
  1572. }
  1573. else {
  1574. sender_lock(host->sender);
  1575. s->stream.since = host->sender->last_state_since_t;
  1576. s->stream.peers = socket_peers(host->sender->rrdpush_sender_socket);
  1577. #ifdef ENABLE_HTTPS
  1578. s->stream.ssl = SSL_connection(&host->sender->ssl);
  1579. #endif
  1580. memcpy(s->stream.sent_bytes_on_this_connection_per_type,
  1581. host->sender->sent_bytes_on_this_connection_per_type,
  1582. MIN(sizeof(s->stream.sent_bytes_on_this_connection_per_type),
  1583. sizeof(host->sender->sent_bytes_on_this_connection_per_type)));
  1584. if (rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED)) {
  1585. s->stream.hops = host->sender->hops;
  1586. s->stream.reason = STREAM_HANDSHAKE_NEVER;
  1587. s->stream.capabilities = host->sender->capabilities;
  1588. s->stream.replication.completion = rrdhost_sender_replication_completion_unsafe(host, now, &s->stream.replication.instances);
  1589. s->stream.replication.in_progress = s->stream.replication.instances > 0;
  1590. if(s->stream.replication.in_progress)
  1591. s->stream.status = RRDHOST_STREAM_STATUS_REPLICATING;
  1592. else
  1593. s->stream.status = RRDHOST_STREAM_STATUS_ONLINE;
  1594. s->stream.compression = host->sender->compressor.initialized;
  1595. }
  1596. else {
  1597. s->stream.status = RRDHOST_STREAM_STATUS_OFFLINE;
  1598. s->stream.hops = s->ingest.hops + 1;
  1599. s->stream.reason = host->sender->exit.reason;
  1600. }
  1601. sender_unlock(host->sender);
  1602. }
  1603. s->stream.id = host->rrdpush_sender_connection_counter;
  1604. if(!s->stream.since)
  1605. s->stream.since = netdata_start_time;
  1606. // --- ml ---
  1607. if(ml_host_get_host_status(host, &s->ml.metrics)) {
  1608. s->ml.type = RRDHOST_ML_TYPE_SELF;
  1609. if(s->ingest.status == RRDHOST_INGEST_STATUS_OFFLINE || s->ingest.status == RRDHOST_INGEST_STATUS_ARCHIVED)
  1610. s->ml.status = RRDHOST_ML_STATUS_OFFLINE;
  1611. else
  1612. s->ml.status = RRDHOST_ML_STATUS_RUNNING;
  1613. }
  1614. else if(stream_has_capability(&s->ingest, STREAM_CAP_DATA_WITH_ML)) {
  1615. s->ml.type = RRDHOST_ML_TYPE_RECEIVED;
  1616. s->ml.status = RRDHOST_ML_STATUS_RUNNING;
  1617. }
  1618. else {
  1619. // does not receive ML, does not run ML
  1620. s->ml.type = RRDHOST_ML_TYPE_DISABLED;
  1621. s->ml.status = RRDHOST_ML_STATUS_DISABLED;
  1622. }
  1623. // --- health ---
  1624. if(host->health.health_enabled) {
  1625. if(flags & RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION)
  1626. s->health.status = RRDHOST_HEALTH_STATUS_INITIALIZING;
  1627. else {
  1628. s->health.status = RRDHOST_HEALTH_STATUS_RUNNING;
  1629. RRDCALC *rc;
  1630. foreach_rrdcalc_in_rrdhost_read(host, rc) {
  1631. if (unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
  1632. continue;
  1633. switch (rc->status) {
  1634. default:
  1635. case RRDCALC_STATUS_REMOVED:
  1636. break;
  1637. case RRDCALC_STATUS_CLEAR:
  1638. s->health.alerts.clear++;
  1639. break;
  1640. case RRDCALC_STATUS_WARNING:
  1641. s->health.alerts.warning++;
  1642. break;
  1643. case RRDCALC_STATUS_CRITICAL:
  1644. s->health.alerts.critical++;
  1645. break;
  1646. case RRDCALC_STATUS_UNDEFINED:
  1647. s->health.alerts.undefined++;
  1648. break;
  1649. case RRDCALC_STATUS_UNINITIALIZED:
  1650. s->health.alerts.uninitialized++;
  1651. break;
  1652. }
  1653. }
  1654. foreach_rrdcalc_in_rrdhost_done(rc);
  1655. }
  1656. }
  1657. else
  1658. s->health.status = RRDHOST_HEALTH_STATUS_DISABLED;
  1659. }