rrdhost.c 57 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #define NETDATA_RRD_INTERNALS
  3. #include "rrd.h"
  4. RRDHOST *localhost = NULL;
  5. size_t rrd_hosts_available = 0;
  6. netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER;
  7. time_t rrdset_free_obsolete_time = 3600;
  8. time_t rrdhost_free_orphan_time = 3600;
  9. // ----------------------------------------------------------------------------
  10. // RRDHOST index
  11. int rrdhost_compare(void* a, void* b) {
  12. if(((RRDHOST *)a)->hash_machine_guid < ((RRDHOST *)b)->hash_machine_guid) return -1;
  13. else if(((RRDHOST *)a)->hash_machine_guid > ((RRDHOST *)b)->hash_machine_guid) return 1;
  14. else return strcmp(((RRDHOST *)a)->machine_guid, ((RRDHOST *)b)->machine_guid);
  15. }
  16. avl_tree_lock rrdhost_root_index = {
  17. .avl_tree = { NULL, rrdhost_compare },
  18. .rwlock = AVL_LOCK_INITIALIZER
  19. };
  20. RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) {
  21. debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid);
  22. RRDHOST tmp;
  23. strncpyz(tmp.machine_guid, guid, GUID_LEN);
  24. tmp.hash_machine_guid = (hash)?hash:simple_hash(tmp.machine_guid);
  25. return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp);
  26. }
  27. RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) {
  28. if(unlikely(!strcmp(hostname, "localhost")))
  29. return localhost;
  30. if(unlikely(!hash)) hash = simple_hash(hostname);
  31. rrd_rdlock();
  32. RRDHOST *host;
  33. rrdhost_foreach_read(host) {
  34. if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) {
  35. rrd_unlock();
  36. return host;
  37. }
  38. }
  39. rrd_unlock();
  40. return NULL;
  41. }
  42. #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost))
  43. #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost))
  44. // ----------------------------------------------------------------------------
  45. // RRDHOST - internal helpers
  46. static inline void rrdhost_init_tags(RRDHOST *host, const char *tags) {
  47. if(host->tags && tags && !strcmp(host->tags, tags))
  48. return;
  49. void *old = (void *)host->tags;
  50. host->tags = (tags && *tags)?strdupz(tags):NULL;
  51. freez(old);
  52. }
  53. static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) {
  54. if(host->hostname && hostname && !strcmp(host->hostname, hostname))
  55. return;
  56. void *old = host->hostname;
  57. host->hostname = strdupz(hostname?hostname:"localhost");
  58. host->hash_hostname = simple_hash(host->hostname);
  59. freez(old);
  60. }
  61. static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
  62. if(host->os && os && !strcmp(host->os, os))
  63. return;
  64. void *old = (void *)host->os;
  65. host->os = strdupz(os?os:"unknown");
  66. freez(old);
  67. }
  68. static inline void rrdhost_init_timezone(RRDHOST *host, const char *timezone) {
  69. if(host->timezone && timezone && !strcmp(host->timezone, timezone))
  70. return;
  71. void *old = (void *)host->timezone;
  72. host->timezone = strdupz((timezone && *timezone)?timezone:"unknown");
  73. freez(old);
  74. }
  75. static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) {
  76. strncpy(host->machine_guid, machine_guid, GUID_LEN);
  77. host->machine_guid[GUID_LEN] = '\0';
  78. host->hash_machine_guid = simple_hash(host->machine_guid);
  79. }
  80. void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode, const char *hostname,
  81. const char *registry_hostname, const char *guid, const char *os, const char *tags,
  82. const char *tzone, const char *program_name, const char *program_version)
  83. {
  84. host->rrd_update_every = update_every;
  85. host->rrd_memory_mode = memory_mode;
  86. rrdhost_init_hostname(host, hostname);
  87. rrdhost_init_machine_guid(host, guid);
  88. rrdhost_init_os(host, os);
  89. rrdhost_init_timezone(host, tzone);
  90. rrdhost_init_tags(host, tags);
  91. host->program_name = strdupz((program_name && *program_name) ? program_name : "unknown");
  92. host->program_version = strdupz((program_version && *program_version) ? program_version : "unknown");
  93. host->registry_hostname = strdupz((registry_hostname && *registry_hostname) ? registry_hostname : host->hostname);
  94. }
  95. // ----------------------------------------------------------------------------
  96. // RRDHOST - add a host
  97. RRDHOST *rrdhost_create(const char *hostname,
  98. const char *registry_hostname,
  99. const char *guid,
  100. const char *os,
  101. const char *timezone,
  102. const char *tags,
  103. const char *program_name,
  104. const char *program_version,
  105. int update_every,
  106. long entries,
  107. RRD_MEMORY_MODE memory_mode,
  108. unsigned int health_enabled,
  109. unsigned int rrdpush_enabled,
  110. char *rrdpush_destination,
  111. char *rrdpush_api_key,
  112. char *rrdpush_send_charts_matching,
  113. struct rrdhost_system_info *system_info,
  114. int is_localhost
  115. ) {
  116. debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid);
  117. #ifdef ENABLE_DBENGINE
  118. int is_legacy = (memory_mode == RRD_MEMORY_MODE_DBENGINE) && is_legacy_child(guid);
  119. #else
  120. int is_legacy = 1;
  121. #endif
  122. rrd_check_wrlock();
  123. int is_in_multihost = (memory_mode == RRD_MEMORY_MODE_DBENGINE && !is_legacy);
  124. RRDHOST *host = callocz(1, sizeof(RRDHOST));
  125. set_host_properties(host, (update_every > 0)?update_every:1, memory_mode, hostname, registry_hostname, guid, os,
  126. tags, timezone, program_name, program_version);
  127. host->rrd_history_entries = align_entries_to_pagesize(memory_mode, entries);
  128. host->health_enabled = ((memory_mode == RRD_MEMORY_MODE_NONE)) ? 0 : health_enabled;
  129. host->sender = mallocz(sizeof(*host->sender));
  130. sender_init(host->sender, host);
  131. netdata_mutex_init(&host->receiver_lock);
  132. host->rrdpush_send_enabled = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) ? 1 : 0;
  133. host->rrdpush_send_destination = (host->rrdpush_send_enabled)?strdupz(rrdpush_destination):NULL;
  134. host->rrdpush_send_api_key = (host->rrdpush_send_enabled)?strdupz(rrdpush_api_key):NULL;
  135. host->rrdpush_send_charts_matching = simple_pattern_create(rrdpush_send_charts_matching, NULL, SIMPLE_PATTERN_EXACT);
  136. host->rrdpush_sender_pipe[0] = -1;
  137. host->rrdpush_sender_pipe[1] = -1;
  138. host->rrdpush_sender_socket = -1;
  139. //host->stream_version = STREAMING_PROTOCOL_CURRENT_VERSION; Unused?
  140. #ifdef ENABLE_HTTPS
  141. host->ssl.conn = NULL;
  142. host->ssl.flags = NETDATA_SSL_START;
  143. host->stream_ssl.conn = NULL;
  144. host->stream_ssl.flags = NETDATA_SSL_START;
  145. #endif
  146. netdata_rwlock_init(&host->rrdhost_rwlock);
  147. netdata_rwlock_init(&host->labels.labels_rwlock);
  148. netdata_mutex_init(&host->aclk_state_lock);
  149. host->system_info = system_info;
  150. avl_init_lock(&(host->rrdset_root_index), rrdset_compare);
  151. avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name);
  152. avl_init_lock(&(host->rrdfamily_root_index), rrdfamily_compare);
  153. avl_init_lock(&(host->rrdvar_root_index), rrdvar_compare);
  154. if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", 1))
  155. rrdhost_flag_set(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS);
  156. if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", 1) && !is_localhost)
  157. rrdhost_flag_set(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST);
  158. host->health_default_warn_repeat_every = config_get_duration(CONFIG_SECTION_HEALTH, "default repeat warning", "never");
  159. host->health_default_crit_repeat_every = config_get_duration(CONFIG_SECTION_HEALTH, "default repeat critical", "never");
  160. avl_init_lock(&(host->alarms_idx_health_log), alarm_compare_id);
  161. avl_init_lock(&(host->alarms_idx_name), alarm_compare_name);
  162. // ------------------------------------------------------------------------
  163. // initialize health variables
  164. host->health_log.next_log_id = 1;
  165. host->health_log.next_alarm_id = 1;
  166. host->health_log.max = 1000;
  167. host->health_log.next_log_id = (uint32_t)now_realtime_sec();
  168. host->health_log.next_alarm_id = 0;
  169. long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max);
  170. if(n < 10) {
  171. error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max);
  172. config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max);
  173. }
  174. else
  175. host->health_log.max = (unsigned int)n;
  176. netdata_rwlock_init(&host->health_log.alarm_log_rwlock);
  177. char filename[FILENAME_MAX + 1];
  178. if(is_localhost) {
  179. host->cache_dir = strdupz(netdata_configured_cache_dir);
  180. host->varlib_dir = strdupz(netdata_configured_varlib_dir);
  181. }
  182. else {
  183. // this is not localhost - append our GUID to localhost path
  184. if (is_in_multihost) { // don't append to cache dir in multihost
  185. host->cache_dir = strdupz(netdata_configured_cache_dir);
  186. } else {
  187. snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid);
  188. host->cache_dir = strdupz(filename);
  189. }
  190. if((host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || (
  191. host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_legacy))) {
  192. int r = mkdir(host->cache_dir, 0775);
  193. if(r != 0 && errno != EEXIST)
  194. error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir);
  195. }
  196. snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid);
  197. host->varlib_dir = strdupz(filename);
  198. if(host->health_enabled) {
  199. int r = mkdir(host->varlib_dir, 0775);
  200. if(r != 0 && errno != EEXIST)
  201. error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
  202. }
  203. }
  204. if(host->health_enabled) {
  205. snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
  206. int r = mkdir(filename, 0775);
  207. if(r != 0 && errno != EEXIST)
  208. error("Host '%s': cannot create directory '%s'", host->hostname, filename);
  209. }
  210. snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
  211. host->health_log_filename = strdupz(filename);
  212. snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_primary_plugins_dir);
  213. host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
  214. host->health_default_recipient = strdupz("root");
  215. // ------------------------------------------------------------------------
  216. // load health configuration
  217. if(host->health_enabled) {
  218. rrdhost_wrlock(host);
  219. health_readdir(host, health_user_config_dir(), health_stock_config_dir(), NULL);
  220. rrdhost_unlock(host);
  221. health_alarm_log_load(host);
  222. health_alarm_log_open(host);
  223. }
  224. RRDHOST *t = rrdhost_index_add(host);
  225. if(t != host) {
  226. error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid);
  227. rrdhost_free(host);
  228. return NULL;
  229. }
  230. if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  231. #ifdef ENABLE_DBENGINE
  232. if (likely(!uuid_parse(host->machine_guid, host->host_uuid))) {
  233. int rc = sql_store_host(&host->host_uuid, hostname, registry_hostname, update_every, os, timezone, tags);
  234. if (unlikely(rc))
  235. error_report("Failed to store machine GUID to the database");
  236. }
  237. else
  238. error_report("Host machine GUID %s is not valid", host->machine_guid);
  239. char dbenginepath[FILENAME_MAX + 1];
  240. int ret;
  241. snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", host->cache_dir);
  242. ret = mkdir(dbenginepath, 0775);
  243. if (ret != 0 && errno != EEXIST)
  244. error("Host '%s': cannot create directory '%s'", host->hostname, dbenginepath);
  245. else ret = 0; // succeed
  246. if (is_legacy) // initialize legacy dbengine instance as needed
  247. ret = rrdeng_init(host, &host->rrdeng_ctx, dbenginepath, default_rrdeng_page_cache_mb,
  248. default_rrdeng_disk_quota_mb); // may fail here for legacy dbengine initialization
  249. else
  250. host->rrdeng_ctx = &multidb_ctx;
  251. if (ret) { // check legacy or multihost initialization success
  252. error(
  253. "Host '%s': cannot initialize host with machine guid '%s'. Failed to initialize DB engine at '%s'.",
  254. host->hostname, host->machine_guid, host->cache_dir);
  255. rrdhost_free(host);
  256. host = NULL;
  257. //rrd_hosts_available++; //TODO: maybe we want this?
  258. return host;
  259. }
  260. #else
  261. fatal("RRD_MEMORY_MODE_DBENGINE is not supported in this platform.");
  262. #endif
  263. }
  264. // ------------------------------------------------------------------------
  265. // link it and add it to the index
  266. if(is_localhost) {
  267. host->next = localhost;
  268. localhost = host;
  269. }
  270. else {
  271. if(localhost) {
  272. host->next = localhost->next;
  273. localhost->next = host;
  274. }
  275. else localhost = host;
  276. }
  277. info("Host '%s' (at registry as '%s') with guid '%s' initialized"
  278. ", os '%s'"
  279. ", timezone '%s'"
  280. ", tags '%s'"
  281. ", program_name '%s'"
  282. ", program_version '%s'"
  283. ", update every %d"
  284. ", memory mode %s"
  285. ", history entries %ld"
  286. ", streaming %s"
  287. " (to '%s' with api key '%s')"
  288. ", health %s"
  289. ", cache_dir '%s'"
  290. ", varlib_dir '%s'"
  291. ", health_log '%s'"
  292. ", alarms default handler '%s'"
  293. ", alarms default recipient '%s'"
  294. , host->hostname
  295. , host->registry_hostname
  296. , host->machine_guid
  297. , host->os
  298. , host->timezone
  299. , (host->tags)?host->tags:""
  300. , host->program_name
  301. , host->program_version
  302. , host->rrd_update_every
  303. , rrd_memory_mode_name(host->rrd_memory_mode)
  304. , host->rrd_history_entries
  305. , host->rrdpush_send_enabled?"enabled":"disabled"
  306. , host->rrdpush_send_destination?host->rrdpush_send_destination:""
  307. , host->rrdpush_send_api_key?host->rrdpush_send_api_key:""
  308. , host->health_enabled?"enabled":"disabled"
  309. , host->cache_dir
  310. , host->varlib_dir
  311. , host->health_log_filename
  312. , host->health_default_exec
  313. , host->health_default_recipient
  314. );
  315. rrd_hosts_available++;
  316. return host;
  317. }
  318. void rrdhost_update(RRDHOST *host
  319. , const char *hostname
  320. , const char *registry_hostname
  321. , const char *guid
  322. , const char *os
  323. , const char *timezone
  324. , const char *tags
  325. , const char *program_name
  326. , const char *program_version
  327. , int update_every
  328. , long history
  329. , RRD_MEMORY_MODE mode
  330. , unsigned int health_enabled
  331. , unsigned int rrdpush_enabled
  332. , char *rrdpush_destination
  333. , char *rrdpush_api_key
  334. , char *rrdpush_send_charts_matching
  335. , struct rrdhost_system_info *system_info
  336. )
  337. {
  338. UNUSED(guid);
  339. UNUSED(rrdpush_enabled);
  340. UNUSED(rrdpush_destination);
  341. UNUSED(rrdpush_api_key);
  342. UNUSED(rrdpush_send_charts_matching);
  343. host->health_enabled = (mode == RRD_MEMORY_MODE_NONE) ? 0 : health_enabled;
  344. //host->stream_version = STREAMING_PROTOCOL_CURRENT_VERSION; Unused?
  345. rrdhost_system_info_free(host->system_info);
  346. host->system_info = system_info;
  347. rrdhost_init_os(host, os);
  348. rrdhost_init_timezone(host, timezone);
  349. freez(host->registry_hostname);
  350. host->registry_hostname = strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname);
  351. if(strcmp(host->hostname, hostname) != 0) {
  352. info("Host '%s' has been renamed to '%s'. If this is not intentional it may mean multiple hosts are using the same machine_guid.", host->hostname, hostname);
  353. char *t = host->hostname;
  354. host->hostname = strdupz(hostname);
  355. host->hash_hostname = simple_hash(host->hostname);
  356. freez(t);
  357. }
  358. if(strcmp(host->program_name, program_name) != 0) {
  359. info("Host '%s' switched program name from '%s' to '%s'", host->hostname, host->program_name, program_name);
  360. char *t = host->program_name;
  361. host->program_name = strdupz(program_name);
  362. freez(t);
  363. }
  364. if(strcmp(host->program_version, program_version) != 0) {
  365. info("Host '%s' switched program version from '%s' to '%s'", host->hostname, host->program_version, program_version);
  366. char *t = host->program_version;
  367. host->program_version = strdupz(program_version);
  368. freez(t);
  369. }
  370. if(host->rrd_update_every != update_every)
  371. error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds. Restart netdata here to apply the new settings.", host->hostname, host->rrd_update_every, update_every);
  372. if(host->rrd_history_entries < history)
  373. error("Host '%s' has history of %ld entries, but the wanted one is %ld entries. Restart netdata here to apply the new settings.", host->hostname, host->rrd_history_entries, history);
  374. if(host->rrd_memory_mode != mode)
  375. error("Host '%s' has memory mode '%s', but the wanted one is '%s'. Restart netdata here to apply the new settings.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
  376. // update host tags
  377. rrdhost_init_tags(host, tags);
  378. if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) {
  379. rrdhost_flag_clear(host, RRDHOST_FLAG_ARCHIVED);
  380. if(host->health_enabled) {
  381. int r;
  382. char filename[FILENAME_MAX + 1];
  383. if (host != localhost) {
  384. r = mkdir(host->varlib_dir, 0775);
  385. if (r != 0 && errno != EEXIST)
  386. error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
  387. }
  388. snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
  389. r = mkdir(filename, 0775);
  390. if(r != 0 && errno != EEXIST)
  391. error("Host '%s': cannot create directory '%s'", host->hostname, filename);
  392. rrdhost_wrlock(host);
  393. health_readdir(host, health_user_config_dir(), health_stock_config_dir(), NULL);
  394. rrdhost_unlock(host);
  395. health_alarm_log_load(host);
  396. health_alarm_log_open(host);
  397. }
  398. rrd_hosts_available++;
  399. info("Host %s is not in archived mode anymore", host->hostname);
  400. }
  401. return;
  402. }
  403. RRDHOST *rrdhost_find_or_create(
  404. const char *hostname
  405. , const char *registry_hostname
  406. , const char *guid
  407. , const char *os
  408. , const char *timezone
  409. , const char *tags
  410. , const char *program_name
  411. , const char *program_version
  412. , int update_every
  413. , long history
  414. , RRD_MEMORY_MODE mode
  415. , unsigned int health_enabled
  416. , unsigned int rrdpush_enabled
  417. , char *rrdpush_destination
  418. , char *rrdpush_api_key
  419. , char *rrdpush_send_charts_matching
  420. , struct rrdhost_system_info *system_info
  421. ) {
  422. debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
  423. rrd_wrlock();
  424. RRDHOST *host = rrdhost_find_by_guid(guid, 0);
  425. if (unlikely(host && RRD_MEMORY_MODE_DBENGINE != mode && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) {
  426. /* If a legacy memory mode instantiates all dbengine state must be discarded to avoid inconsistencies */
  427. error("Archived host '%s' has memory mode '%s', but the wanted one is '%s'. Discarding archived state.",
  428. host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
  429. rrdhost_free(host);
  430. host = NULL;
  431. }
  432. if(!host) {
  433. host = rrdhost_create(
  434. hostname
  435. , registry_hostname
  436. , guid
  437. , os
  438. , timezone
  439. , tags
  440. , program_name
  441. , program_version
  442. , update_every
  443. , history
  444. , mode
  445. , health_enabled
  446. , rrdpush_enabled
  447. , rrdpush_destination
  448. , rrdpush_api_key
  449. , rrdpush_send_charts_matching
  450. , system_info
  451. , 0
  452. );
  453. }
  454. else {
  455. rrdhost_update(host
  456. , hostname
  457. , registry_hostname
  458. , guid
  459. , os
  460. , timezone
  461. , tags
  462. , program_name
  463. , program_version
  464. , update_every
  465. , history
  466. , mode
  467. , health_enabled
  468. , rrdpush_enabled
  469. , rrdpush_destination
  470. , rrdpush_api_key
  471. , rrdpush_send_charts_matching
  472. , system_info);
  473. }
  474. if (host) {
  475. rrdhost_wrlock(host);
  476. rrdhost_flag_clear(host, RRDHOST_FLAG_ORPHAN);
  477. host->senders_disconnected_time = 0;
  478. rrdhost_unlock(host);
  479. }
  480. rrdhost_cleanup_orphan_hosts_nolock(host);
  481. rrd_unlock();
  482. return host;
  483. }
  484. inline int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected, time_t now) {
  485. if(host != protected
  486. && host != localhost
  487. && rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN)
  488. && host->receiver
  489. && host->senders_disconnected_time
  490. && host->senders_disconnected_time + rrdhost_free_orphan_time < now)
  491. return 1;
  492. return 0;
  493. }
  494. void rrdhost_cleanup_orphan_hosts_nolock(RRDHOST *protected) {
  495. time_t now = now_realtime_sec();
  496. RRDHOST *host;
  497. restart_after_removal:
  498. rrdhost_foreach_write(host) {
  499. if(rrdhost_should_be_removed(host, protected, now)) {
  500. info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", host->hostname, host->machine_guid);
  501. if (rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST)
  502. #ifdef ENABLE_DBENGINE
  503. /* don't delete multi-host DB host files */
  504. && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && host->rrdeng_ctx == &multidb_ctx)
  505. #endif
  506. )
  507. rrdhost_delete_charts(host);
  508. else
  509. rrdhost_save_charts(host);
  510. rrdhost_free(host);
  511. goto restart_after_removal;
  512. }
  513. }
  514. }
  515. // ----------------------------------------------------------------------------
  516. // RRDHOST global / startup initialization
  517. int rrd_init(char *hostname, struct rrdhost_system_info *system_info) {
  518. rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", rrdset_free_obsolete_time);
  519. gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_GLOBAL, "gap when lost iterations above", gap_when_lost_iterations_above);
  520. if (gap_when_lost_iterations_above < 1)
  521. gap_when_lost_iterations_above = 1;
  522. #ifdef ENABLE_DBENGINE
  523. if (unlikely(sql_init_database())) {
  524. return 1;
  525. }
  526. #endif
  527. health_init();
  528. rrdpush_init();
  529. debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
  530. rrd_wrlock();
  531. localhost = rrdhost_create(
  532. hostname
  533. , registry_get_this_machine_hostname()
  534. , registry_get_this_machine_guid()
  535. , os_type
  536. , netdata_configured_timezone
  537. , config_get(CONFIG_SECTION_BACKEND, "host tags", "")
  538. , program_name
  539. , program_version
  540. , default_rrd_update_every
  541. , default_rrd_history_entries
  542. , default_rrd_memory_mode
  543. , default_health_enabled
  544. , default_rrdpush_enabled
  545. , default_rrdpush_destination
  546. , default_rrdpush_api_key
  547. , default_rrdpush_send_charts_matching
  548. , system_info
  549. , 1
  550. );
  551. if (unlikely(!localhost)) {
  552. rrd_unlock();
  553. return 1;
  554. }
  555. #ifdef ENABLE_DBENGINE
  556. char dbenginepath[FILENAME_MAX + 1];
  557. int ret;
  558. snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", localhost->cache_dir);
  559. ret = mkdir(dbenginepath, 0775);
  560. if (ret != 0 && errno != EEXIST)
  561. error("Host '%s': cannot create directory '%s'", localhost->hostname, dbenginepath);
  562. else // Unconditionally create multihost db to support on demand host creation
  563. ret = rrdeng_init(NULL, NULL, dbenginepath, default_rrdeng_page_cache_mb, default_multidb_disk_quota_mb);
  564. if (ret) {
  565. error(
  566. "Host '%s' with machine guid '%s' failed to initialize multi-host DB engine instance at '%s'.",
  567. localhost->hostname, localhost->machine_guid, localhost->cache_dir);
  568. rrdhost_free(localhost);
  569. localhost = NULL;
  570. rrd_unlock();
  571. return 1;
  572. }
  573. #endif
  574. rrd_unlock();
  575. web_client_api_v1_management_init();
  576. return localhost==NULL;
  577. }
  578. // ----------------------------------------------------------------------------
  579. // RRDHOST - lock validations
  580. // there are only used when NETDATA_INTERNAL_CHECKS is set
  581. void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
  582. debug(D_RRDHOST, "Checking read lock on host '%s'", host->hostname);
  583. int ret = netdata_rwlock_trywrlock(&host->rrdhost_rwlock);
  584. if(ret == 0)
  585. fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
  586. }
  587. void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
  588. debug(D_RRDHOST, "Checking write lock on host '%s'", host->hostname);
  589. int ret = netdata_rwlock_tryrdlock(&host->rrdhost_rwlock);
  590. if(ret == 0)
  591. fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
  592. }
  593. void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line) {
  594. debug(D_RRDHOST, "Checking read lock on all RRDs");
  595. int ret = netdata_rwlock_trywrlock(&rrd_rwlock);
  596. if(ret == 0)
  597. fatal("RRDs should be read-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
  598. }
  599. void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line) {
  600. debug(D_RRDHOST, "Checking write lock on all RRDs");
  601. int ret = netdata_rwlock_tryrdlock(&rrd_rwlock);
  602. if(ret == 0)
  603. fatal("RRDs should be write-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
  604. }
  605. // ----------------------------------------------------------------------------
  606. // RRDHOST - free
  607. void rrdhost_system_info_free(struct rrdhost_system_info *system_info) {
  608. info("SYSTEM_INFO: free %p", system_info);
  609. if(likely(system_info)) {
  610. freez(system_info->host_os_name);
  611. freez(system_info->host_os_id);
  612. freez(system_info->host_os_id_like);
  613. freez(system_info->host_os_version);
  614. freez(system_info->host_os_version_id);
  615. freez(system_info->host_os_detection);
  616. freez(system_info->host_cores);
  617. freez(system_info->host_cpu_freq);
  618. freez(system_info->host_ram_total);
  619. freez(system_info->host_disk_space);
  620. freez(system_info->container_os_name);
  621. freez(system_info->container_os_id);
  622. freez(system_info->container_os_id_like);
  623. freez(system_info->container_os_version);
  624. freez(system_info->container_os_version_id);
  625. freez(system_info->container_os_detection);
  626. freez(system_info->kernel_name);
  627. freez(system_info->kernel_version);
  628. freez(system_info->architecture);
  629. freez(system_info->virtualization);
  630. freez(system_info->virt_detection);
  631. freez(system_info->container);
  632. freez(system_info->container_detection);
  633. freez(system_info->is_k8s_node);
  634. freez(system_info);
  635. }
  636. }
  637. void destroy_receiver_state(struct receiver_state *rpt);
  638. void rrdhost_free(RRDHOST *host) {
  639. if(!host) return;
  640. info("Freeing all memory for host '%s'...", host->hostname);
  641. rrd_check_wrlock(); // make sure the RRDs are write locked
  642. // ------------------------------------------------------------------------
  643. // clean up streaming
  644. rrdpush_sender_thread_stop(host); // stop a possibly running thread
  645. cbuffer_free(host->sender->buffer);
  646. buffer_free(host->sender->build);
  647. freez(host->sender);
  648. host->sender = NULL;
  649. if (netdata_exit) {
  650. netdata_mutex_lock(&host->receiver_lock);
  651. if (host->receiver) {
  652. if (!host->receiver->exited)
  653. netdata_thread_cancel(host->receiver->thread);
  654. netdata_mutex_unlock(&host->receiver_lock);
  655. struct receiver_state *rpt = host->receiver;
  656. while (host->receiver && !rpt->exited)
  657. sleep_usec(50 * USEC_PER_MS);
  658. // If the receiver detached from the host then its thread will destroy the state
  659. if (host->receiver == rpt)
  660. destroy_receiver_state(host->receiver);
  661. }
  662. else
  663. netdata_mutex_unlock(&host->receiver_lock);
  664. }
  665. rrdhost_wrlock(host); // lock this RRDHOST
  666. // ------------------------------------------------------------------------
  667. // release its children resources
  668. #ifdef ENABLE_DBENGINE
  669. if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  670. if (host->rrdeng_ctx != &multidb_ctx)
  671. rrdeng_prepare_exit(host->rrdeng_ctx);
  672. }
  673. #endif
  674. while(host->rrdset_root)
  675. rrdset_free(host->rrdset_root);
  676. freez(host->exporting_flags);
  677. while(host->alarms)
  678. rrdcalc_unlink_and_free(host, host->alarms);
  679. RRDCALC *rc,*nc;
  680. for(rc = host->alarms_with_foreach; rc ; rc = nc) {
  681. nc = rc->next;
  682. rrdcalc_free(rc);
  683. }
  684. host->alarms_with_foreach = NULL;
  685. while(host->templates)
  686. rrdcalctemplate_unlink_and_free(host, host->templates);
  687. RRDCALCTEMPLATE *rt,*next;
  688. for(rt = host->alarms_template_with_foreach; rt ; rt = next) {
  689. next = rt->next;
  690. rrdcalctemplate_free(rt);
  691. }
  692. host->alarms_template_with_foreach = NULL;
  693. debug(D_RRD_CALLS, "RRDHOST: Cleaning up remaining host variables for host '%s'", host->hostname);
  694. rrdvar_free_remaining_variables(host, &host->rrdvar_root_index);
  695. health_alarm_log_free(host);
  696. #ifdef ENABLE_DBENGINE
  697. if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && host->rrdeng_ctx != &multidb_ctx)
  698. rrdeng_exit(host->rrdeng_ctx);
  699. #endif
  700. // ------------------------------------------------------------------------
  701. // remove it from the indexes
  702. if(rrdhost_index_del(host) != host)
  703. error("RRDHOST '%s' removed from index, deleted the wrong entry.", host->hostname);
  704. // ------------------------------------------------------------------------
  705. // unlink it from the host
  706. if(host == localhost) {
  707. localhost = host->next;
  708. }
  709. else {
  710. // find the previous one
  711. RRDHOST *h;
  712. for(h = localhost; h && h->next != host ; h = h->next) ;
  713. // bypass it
  714. if(h) h->next = host->next;
  715. else error("Request to free RRDHOST '%s': cannot find it", host->hostname);
  716. }
  717. // ------------------------------------------------------------------------
  718. // free it
  719. pthread_mutex_destroy(&host->aclk_state_lock);
  720. freez(host->aclk_state.claimed_id);
  721. freez((void *)host->tags);
  722. free_label_list(host->labels.head);
  723. freez((void *)host->os);
  724. freez((void *)host->timezone);
  725. freez(host->program_version);
  726. freez(host->program_name);
  727. rrdhost_system_info_free(host->system_info);
  728. freez(host->cache_dir);
  729. freez(host->varlib_dir);
  730. freez(host->rrdpush_send_api_key);
  731. freez(host->rrdpush_send_destination);
  732. freez(host->health_default_exec);
  733. freez(host->health_default_recipient);
  734. freez(host->health_log_filename);
  735. freez(host->hostname);
  736. freez(host->registry_hostname);
  737. simple_pattern_free(host->rrdpush_send_charts_matching);
  738. rrdhost_unlock(host);
  739. netdata_rwlock_destroy(&host->labels.labels_rwlock);
  740. netdata_rwlock_destroy(&host->health_log.alarm_log_rwlock);
  741. netdata_rwlock_destroy(&host->rrdhost_rwlock);
  742. freez(host);
  743. rrd_hosts_available--;
  744. }
  745. void rrdhost_free_all(void) {
  746. rrd_wrlock();
  747. /* Make sure child-hosts are released before the localhost. */
  748. while(localhost->next) rrdhost_free(localhost->next);
  749. rrdhost_free(localhost);
  750. rrd_unlock();
  751. }
  752. // ----------------------------------------------------------------------------
  753. // RRDHOST - save host files
  754. void rrdhost_save_charts(RRDHOST *host) {
  755. if(!host) return;
  756. info("Saving/Closing database of host '%s'...", host->hostname);
  757. RRDSET *st;
  758. // we get a write lock
  759. // to ensure only one thread is saving the database
  760. rrdhost_wrlock(host);
  761. rrdset_foreach_write(st, host) {
  762. rrdset_rdlock(st);
  763. rrdset_save(st);
  764. rrdset_unlock(st);
  765. }
  766. rrdhost_unlock(host);
  767. }
  768. static struct label *rrdhost_load_auto_labels(void)
  769. {
  770. struct label *label_list = NULL;
  771. if (localhost->system_info->host_os_name)
  772. label_list =
  773. add_label_to_list(label_list, "_os_name", localhost->system_info->host_os_name, LABEL_SOURCE_AUTO);
  774. if (localhost->system_info->host_os_version)
  775. label_list =
  776. add_label_to_list(label_list, "_os_version", localhost->system_info->host_os_version, LABEL_SOURCE_AUTO);
  777. if (localhost->system_info->kernel_version)
  778. label_list =
  779. add_label_to_list(label_list, "_kernel_version", localhost->system_info->kernel_version, LABEL_SOURCE_AUTO);
  780. if (localhost->system_info->host_cores)
  781. label_list =
  782. add_label_to_list(label_list, "_system_cores", localhost->system_info->host_cores, LABEL_SOURCE_AUTO);
  783. if (localhost->system_info->host_cpu_freq)
  784. label_list =
  785. add_label_to_list(label_list, "_system_cpu_freq", localhost->system_info->host_cpu_freq, LABEL_SOURCE_AUTO);
  786. if (localhost->system_info->host_ram_total)
  787. label_list =
  788. add_label_to_list(label_list, "_system_ram_total", localhost->system_info->host_ram_total, LABEL_SOURCE_AUTO);
  789. if (localhost->system_info->host_disk_space)
  790. label_list =
  791. add_label_to_list(label_list, "_system_disk_space", localhost->system_info->host_disk_space, LABEL_SOURCE_AUTO);
  792. if (localhost->system_info->architecture)
  793. label_list =
  794. add_label_to_list(label_list, "_architecture", localhost->system_info->architecture, LABEL_SOURCE_AUTO);
  795. if (localhost->system_info->virtualization)
  796. label_list =
  797. add_label_to_list(label_list, "_virtualization", localhost->system_info->virtualization, LABEL_SOURCE_AUTO);
  798. if (localhost->system_info->container)
  799. label_list =
  800. add_label_to_list(label_list, "_container", localhost->system_info->container, LABEL_SOURCE_AUTO);
  801. if (localhost->system_info->container_detection)
  802. label_list =
  803. add_label_to_list(label_list, "_container_detection", localhost->system_info->container_detection, LABEL_SOURCE_AUTO);
  804. if (localhost->system_info->virt_detection)
  805. label_list =
  806. add_label_to_list(label_list, "_virt_detection", localhost->system_info->virt_detection, LABEL_SOURCE_AUTO);
  807. if (localhost->system_info->is_k8s_node)
  808. label_list =
  809. add_label_to_list(label_list, "_is_k8s_node", localhost->system_info->is_k8s_node, LABEL_SOURCE_AUTO);
  810. label_list = add_label_to_list(
  811. label_list, "_is_parent", (localhost->next || configured_as_parent()) ? "true" : "false", LABEL_SOURCE_AUTO);
  812. if (localhost->rrdpush_send_destination)
  813. label_list =
  814. add_label_to_list(label_list, "_streams_to", localhost->rrdpush_send_destination, LABEL_SOURCE_AUTO);
  815. return label_list;
  816. }
  817. static inline int rrdhost_is_valid_label_config_option(char *name, char *value)
  818. {
  819. return (is_valid_label_key(name) && is_valid_label_value(value) && strcmp(name, "from environment") &&
  820. strcmp(name, "from kubernetes pods"));
  821. }
  822. static struct label *rrdhost_load_config_labels()
  823. {
  824. int status = config_load(NULL, 1, CONFIG_SECTION_HOST_LABEL);
  825. if(!status) {
  826. char *filename = CONFIG_DIR "/" CONFIG_FILENAME;
  827. error("LABEL: Cannot reload the configuration file '%s', using labels in memory", filename);
  828. }
  829. struct label *l = NULL;
  830. struct section *co = appconfig_get_section(&netdata_config, CONFIG_SECTION_HOST_LABEL);
  831. if(co) {
  832. config_section_wrlock(co);
  833. struct config_option *cv;
  834. for(cv = co->values; cv ; cv = cv->next) {
  835. if(rrdhost_is_valid_label_config_option(cv->name, cv->value)) {
  836. l = add_label_to_list(l, cv->name, cv->value, LABEL_SOURCE_NETDATA_CONF);
  837. cv->flags |= CONFIG_VALUE_USED;
  838. } else {
  839. error("LABELS: It was not possible to create the label '%s' because it contains invalid character(s) or values."
  840. , cv->name);
  841. }
  842. }
  843. config_section_unlock(co);
  844. }
  845. return l;
  846. }
  847. struct label *parse_simple_tags(
  848. struct label *label_list,
  849. const char *tags,
  850. char key_value_separator,
  851. char label_separator,
  852. STRIP_QUOTES_OPTION strip_quotes_from_key,
  853. STRIP_QUOTES_OPTION strip_quotes_from_value,
  854. SKIP_ESCAPED_CHARACTERS_OPTION skip_escaped_characters)
  855. {
  856. const char *end = tags;
  857. while (*end) {
  858. const char *start = end;
  859. char key[CONFIG_MAX_VALUE + 1];
  860. char value[CONFIG_MAX_VALUE + 1];
  861. while (*end && *end != key_value_separator)
  862. end++;
  863. strncpyz(key, start, end - start);
  864. if (*end)
  865. start = ++end;
  866. while (*end && *end != label_separator)
  867. end++;
  868. strncpyz(value, start, end - start);
  869. label_list = add_label_to_list(
  870. label_list,
  871. strip_quotes_from_key ? strip_double_quotes(trim(key), skip_escaped_characters) : trim(key),
  872. strip_quotes_from_value ? strip_double_quotes(trim(value), skip_escaped_characters) : trim(value),
  873. LABEL_SOURCE_NETDATA_CONF);
  874. if (*end)
  875. end++;
  876. }
  877. return label_list;
  878. }
  879. struct label *parse_json_tags(struct label *label_list, const char *tags)
  880. {
  881. char tags_buf[CONFIG_MAX_VALUE + 1];
  882. strncpy(tags_buf, tags, CONFIG_MAX_VALUE);
  883. char *str = tags_buf;
  884. switch (*str) {
  885. case '{':
  886. str++;
  887. strip_last_symbol(str, '}', SKIP_ESCAPED_CHARACTERS);
  888. label_list = parse_simple_tags(label_list, str, ':', ',', STRIP_QUOTES, STRIP_QUOTES, SKIP_ESCAPED_CHARACTERS);
  889. break;
  890. case '[':
  891. str++;
  892. strip_last_symbol(str, ']', SKIP_ESCAPED_CHARACTERS);
  893. char *end = str + strlen(str);
  894. size_t i = 0;
  895. while (str < end) {
  896. char key[CONFIG_MAX_VALUE + 1];
  897. snprintfz(key, CONFIG_MAX_VALUE, "host_tag%zu", i);
  898. str = strip_double_quotes(trim(str), SKIP_ESCAPED_CHARACTERS);
  899. label_list = add_label_to_list(label_list, key, str, LABEL_SOURCE_NETDATA_CONF);
  900. // skip to the next element in the array
  901. str += strlen(str) + 1;
  902. while (*str && *str != ',')
  903. str++;
  904. str++;
  905. i++;
  906. }
  907. break;
  908. case '"':
  909. label_list = add_label_to_list(
  910. label_list, "host_tag", strip_double_quotes(str, SKIP_ESCAPED_CHARACTERS), LABEL_SOURCE_NETDATA_CONF);
  911. break;
  912. default:
  913. label_list = add_label_to_list(label_list, "host_tag", str, LABEL_SOURCE_NETDATA_CONF);
  914. break;
  915. }
  916. return label_list;
  917. }
  918. static struct label *rrdhost_load_labels_from_tags(void)
  919. {
  920. if (!localhost->tags)
  921. return NULL;
  922. struct label *label_list = NULL;
  923. BACKEND_TYPE type = BACKEND_TYPE_UNKNOWN;
  924. if (config_exists(CONFIG_SECTION_BACKEND, "enabled")) {
  925. if (config_get_boolean(CONFIG_SECTION_BACKEND, "enabled", CONFIG_BOOLEAN_NO) != CONFIG_BOOLEAN_NO) {
  926. const char *type_name = config_get(CONFIG_SECTION_BACKEND, "type", "graphite");
  927. type = backend_select_type(type_name);
  928. }
  929. }
  930. switch (type) {
  931. case BACKEND_TYPE_GRAPHITE:
  932. label_list = parse_simple_tags(
  933. label_list, localhost->tags, '=', ';', DO_NOT_STRIP_QUOTES, DO_NOT_STRIP_QUOTES,
  934. DO_NOT_SKIP_ESCAPED_CHARACTERS);
  935. break;
  936. case BACKEND_TYPE_OPENTSDB_USING_TELNET:
  937. label_list = parse_simple_tags(
  938. label_list, localhost->tags, '=', ' ', DO_NOT_STRIP_QUOTES, DO_NOT_STRIP_QUOTES,
  939. DO_NOT_SKIP_ESCAPED_CHARACTERS);
  940. break;
  941. case BACKEND_TYPE_OPENTSDB_USING_HTTP:
  942. label_list = parse_simple_tags(
  943. label_list, localhost->tags, ':', ',', STRIP_QUOTES, STRIP_QUOTES,
  944. DO_NOT_SKIP_ESCAPED_CHARACTERS);
  945. break;
  946. case BACKEND_TYPE_JSON:
  947. label_list = parse_json_tags(label_list, localhost->tags);
  948. break;
  949. default:
  950. label_list = parse_simple_tags(
  951. label_list, localhost->tags, '=', ',', DO_NOT_STRIP_QUOTES, STRIP_QUOTES,
  952. DO_NOT_SKIP_ESCAPED_CHARACTERS);
  953. break;
  954. }
  955. return label_list;
  956. }
  957. static struct label *rrdhost_load_kubernetes_labels(void)
  958. {
  959. struct label *l=NULL;
  960. char *label_script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("get-kubernetes-labels.sh") + 2));
  961. sprintf(label_script, "%s/%s", netdata_configured_primary_plugins_dir, "get-kubernetes-labels.sh");
  962. if (unlikely(access(label_script, R_OK) != 0)) {
  963. error("Kubernetes pod label fetching script %s not found.",label_script);
  964. freez(label_script);
  965. } else {
  966. pid_t command_pid;
  967. debug(D_RRDHOST, "Attempting to fetch external labels via %s", label_script);
  968. FILE *fp = mypopen(label_script, &command_pid);
  969. if(fp) {
  970. int MAX_LINE_SIZE=300;
  971. char buffer[MAX_LINE_SIZE + 1];
  972. while (fgets(buffer, MAX_LINE_SIZE, fp) != NULL) {
  973. char *name=buffer;
  974. char *value=buffer;
  975. while (*value && *value != ':') value++;
  976. if (*value == ':') {
  977. *value = '\0';
  978. value++;
  979. }
  980. char *eos=value;
  981. while (*eos && *eos != '\n') eos++;
  982. if (*eos == '\n') *eos = '\0';
  983. if (strlen(value)>0) {
  984. if (is_valid_label_key(name)){
  985. l = add_label_to_list(l, name, value, LABEL_SOURCE_KUBERNETES);
  986. } else {
  987. info("Ignoring invalid label name '%s'", name);
  988. }
  989. } else {
  990. error("%s outputted unexpected result: '%s'", label_script, name);
  991. }
  992. };
  993. // Non-zero exit code means that all the script output is error messages. We've shown already any message that didn't include a ':'
  994. // Here we'll inform with an ERROR that the script failed, show whatever (if anything) was added to the list of labels, free the memory and set the return to null
  995. int retcode=mypclose(fp, command_pid);
  996. if (retcode) {
  997. error("%s exited abnormally. No kubernetes labels will be added to the host.", label_script);
  998. struct label *ll=l;
  999. while (ll != NULL) {
  1000. info("Ignoring Label [source id=%s]: \"%s\" -> \"%s\"\n", translate_label_source(ll->label_source), ll->key, ll->value);
  1001. ll = ll->next;
  1002. freez(l);
  1003. l=ll;
  1004. }
  1005. }
  1006. }
  1007. freez(label_script);
  1008. }
  1009. return l;
  1010. }
  1011. void reload_host_labels(void)
  1012. {
  1013. struct label *from_auto = rrdhost_load_auto_labels();
  1014. struct label *from_k8s = rrdhost_load_kubernetes_labels();
  1015. struct label *from_config = rrdhost_load_config_labels();
  1016. struct label *from_tags = rrdhost_load_labels_from_tags();
  1017. struct label *new_labels = merge_label_lists(from_auto, from_k8s);
  1018. new_labels = merge_label_lists(new_labels, from_tags);
  1019. new_labels = merge_label_lists(new_labels, from_config);
  1020. rrdhost_rdlock(localhost);
  1021. replace_label_list(&localhost->labels, new_labels);
  1022. health_label_log_save(localhost);
  1023. rrdhost_unlock(localhost);
  1024. /* TODO-GAPS - fix this so that it looks properly at the state and version of the sender
  1025. if(localhost->rrdpush_send_enabled && localhost->rrdpush_sender_buffer){
  1026. localhost->labels.labels_flag |= LABEL_FLAG_UPDATE_STREAM;
  1027. rrdpush_send_labels(localhost);
  1028. }
  1029. */
  1030. health_reload();
  1031. }
  1032. // ----------------------------------------------------------------------------
  1033. // RRDHOST - delete host files
  1034. void rrdhost_delete_charts(RRDHOST *host) {
  1035. if(!host) return;
  1036. info("Deleting database of host '%s'...", host->hostname);
  1037. RRDSET *st;
  1038. // we get a write lock
  1039. // to ensure only one thread is saving the database
  1040. rrdhost_wrlock(host);
  1041. rrdset_foreach_write(st, host) {
  1042. rrdset_rdlock(st);
  1043. rrdset_delete(st);
  1044. rrdset_unlock(st);
  1045. }
  1046. recursively_delete_dir(host->cache_dir, "left over host");
  1047. rrdhost_unlock(host);
  1048. }
  1049. // ----------------------------------------------------------------------------
  1050. // RRDHOST - cleanup host files
  1051. void rrdhost_cleanup_charts(RRDHOST *host) {
  1052. if(!host) return;
  1053. info("Cleaning up database of host '%s'...", host->hostname);
  1054. RRDSET *st;
  1055. uint32_t rrdhost_delete_obsolete_charts = rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS);
  1056. // we get a write lock
  1057. // to ensure only one thread is saving the database
  1058. rrdhost_wrlock(host);
  1059. rrdset_foreach_write(st, host) {
  1060. rrdset_rdlock(st);
  1061. if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))
  1062. rrdset_delete(st);
  1063. else if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS))
  1064. rrdset_delete_obsolete_dimensions(st);
  1065. else
  1066. rrdset_save(st);
  1067. rrdset_unlock(st);
  1068. }
  1069. rrdhost_unlock(host);
  1070. }
  1071. // ----------------------------------------------------------------------------
  1072. // RRDHOST - save all hosts to disk
  1073. void rrdhost_save_all(void) {
  1074. info("Saving database [%zu hosts(s)]...", rrd_hosts_available);
  1075. rrd_rdlock();
  1076. RRDHOST *host;
  1077. rrdhost_foreach_read(host)
  1078. rrdhost_save_charts(host);
  1079. rrd_unlock();
  1080. }
  1081. // ----------------------------------------------------------------------------
  1082. // RRDHOST - save or delete all hosts from disk
  1083. void rrdhost_cleanup_all(void) {
  1084. info("Cleaning up database [%zu hosts(s)]...", rrd_hosts_available);
  1085. rrd_rdlock();
  1086. RRDHOST *host;
  1087. rrdhost_foreach_read(host) {
  1088. if (host != localhost && rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST) && !host->receiver
  1089. #ifdef ENABLE_DBENGINE
  1090. /* don't delete multi-host DB host files */
  1091. && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && host->rrdeng_ctx == &multidb_ctx)
  1092. #endif
  1093. )
  1094. rrdhost_delete_charts(host);
  1095. else
  1096. rrdhost_cleanup_charts(host);
  1097. }
  1098. rrd_unlock();
  1099. }
  1100. // ----------------------------------------------------------------------------
  1101. // RRDHOST - save or delete all the host charts from disk
  1102. void rrdhost_cleanup_obsolete_charts(RRDHOST *host) {
  1103. time_t now = now_realtime_sec();
  1104. RRDSET *st;
  1105. uint32_t rrdhost_delete_obsolete_charts = rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS);
  1106. restart_after_removal:
  1107. rrdset_foreach_write(st, host) {
  1108. if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)
  1109. && st->last_accessed_time + rrdset_free_obsolete_time < now
  1110. && st->last_updated.tv_sec + rrdset_free_obsolete_time < now
  1111. && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now
  1112. )) {
  1113. #ifdef ENABLE_DBENGINE
  1114. if(st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  1115. RRDDIM *rd, *last;
  1116. rrdset_flag_set(st, RRDSET_FLAG_ARCHIVED);
  1117. while (st->variables) rrdsetvar_free(st->variables);
  1118. while (st->alarms) rrdsetcalc_unlink(st->alarms);
  1119. rrdset_wrlock(st);
  1120. for (rd = st->dimensions, last = NULL ; likely(rd) ; ) {
  1121. if (rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) {
  1122. last = rd;
  1123. rd = rd->next;
  1124. continue;
  1125. }
  1126. rrddim_flag_set(rd, RRDDIM_FLAG_ARCHIVED);
  1127. while (rd->variables)
  1128. rrddimvar_free(rd->variables);
  1129. if (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) {
  1130. rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE);
  1131. /* only a collector can mark a chart as obsolete, so we must remove the reference */
  1132. uint8_t can_delete_metric = rd->state->collect_ops.finalize(rd);
  1133. if (can_delete_metric) {
  1134. /* This metric has no data and no references */
  1135. delete_dimension_uuid(rd->state->metric_uuid);
  1136. rrddim_free(st, rd);
  1137. if (unlikely(!last)) {
  1138. rd = st->dimensions;
  1139. }
  1140. else {
  1141. rd = last->next;
  1142. }
  1143. continue;
  1144. }
  1145. }
  1146. last = rd;
  1147. rd = rd->next;
  1148. }
  1149. rrdset_unlock(st);
  1150. debug(D_RRD_CALLS, "RRDSET: Cleaning up remaining chart variables for host '%s', chart '%s'", host->hostname, st->id);
  1151. rrdvar_free_remaining_variables(host, &st->rrdvar_root_index);
  1152. rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE);
  1153. if (st->dimensions) {
  1154. /* If the chart still has dimensions don't delete it from the metadata log */
  1155. continue;
  1156. }
  1157. }
  1158. #endif
  1159. rrdset_rdlock(st);
  1160. if(rrdhost_delete_obsolete_charts)
  1161. rrdset_delete(st);
  1162. else
  1163. rrdset_save(st);
  1164. rrdset_unlock(st);
  1165. rrdset_free(st);
  1166. goto restart_after_removal;
  1167. }
  1168. }
  1169. }
  1170. // ----------------------------------------------------------------------------
  1171. // RRDHOST - set system info from environment variables
  1172. // system_info fields must be heap allocated or NULL
  1173. int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value) {
  1174. int res = 0;
  1175. if (!strcmp(name, "NETDATA_PROTOCOL_VERSION"))
  1176. return res;
  1177. else if(!strcmp(name, "NETDATA_CONTAINER_OS_NAME")){
  1178. freez(system_info->container_os_name);
  1179. system_info->container_os_name = strdupz(value);
  1180. }
  1181. else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID")){
  1182. freez(system_info->container_os_id);
  1183. system_info->container_os_id = strdupz(value);
  1184. }
  1185. else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID_LIKE")){
  1186. freez(system_info->container_os_id_like);
  1187. system_info->container_os_id_like = strdupz(value);
  1188. }
  1189. else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION")){
  1190. freez(system_info->container_os_version);
  1191. system_info->container_os_version = strdupz(value);
  1192. }
  1193. else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION_ID")){
  1194. freez(system_info->container_os_version_id);
  1195. system_info->container_os_version_id = strdupz(value);
  1196. }
  1197. else if(!strcmp(name, "NETDATA_CONTAINER_OS_DETECTION")){
  1198. freez(system_info->host_os_detection);
  1199. system_info->host_os_detection = strdupz(value);
  1200. }
  1201. else if(!strcmp(name, "NETDATA_HOST_OS_NAME")){
  1202. freez(system_info->host_os_name);
  1203. system_info->host_os_name = strdupz(value);
  1204. }
  1205. else if(!strcmp(name, "NETDATA_HOST_OS_ID")){
  1206. freez(system_info->host_os_id);
  1207. system_info->host_os_id = strdupz(value);
  1208. }
  1209. else if(!strcmp(name, "NETDATA_HOST_OS_ID_LIKE")){
  1210. freez(system_info->host_os_id_like);
  1211. system_info->host_os_id_like = strdupz(value);
  1212. }
  1213. else if(!strcmp(name, "NETDATA_HOST_OS_VERSION")){
  1214. freez(system_info->host_os_version);
  1215. system_info->host_os_version = strdupz(value);
  1216. }
  1217. else if(!strcmp(name, "NETDATA_HOST_OS_VERSION_ID")){
  1218. freez(system_info->host_os_version_id);
  1219. system_info->host_os_version_id = strdupz(value);
  1220. }
  1221. else if(!strcmp(name, "NETDATA_HOST_OS_DETECTION")){
  1222. freez(system_info->host_os_detection);
  1223. system_info->host_os_detection = strdupz(value);
  1224. }
  1225. else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_NAME")){
  1226. freez(system_info->kernel_name);
  1227. system_info->kernel_name = strdupz(value);
  1228. }
  1229. else if(!strcmp(name, "NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT")){
  1230. freez(system_info->host_cores);
  1231. system_info->host_cores = strdupz(value);
  1232. }
  1233. else if(!strcmp(name, "NETDATA_SYSTEM_CPU_FREQ")){
  1234. freez(system_info->host_cpu_freq);
  1235. system_info->host_cpu_freq = strdupz(value);
  1236. }
  1237. else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_RAM")){
  1238. freez(system_info->host_ram_total);
  1239. system_info->host_ram_total = strdupz(value);
  1240. }
  1241. else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_DISK_SIZE")){
  1242. freez(system_info->host_disk_space);
  1243. system_info->host_disk_space = strdupz(value);
  1244. }
  1245. else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_VERSION")){
  1246. freez(system_info->kernel_version);
  1247. system_info->kernel_version = strdupz(value);
  1248. }
  1249. else if(!strcmp(name, "NETDATA_SYSTEM_ARCHITECTURE")){
  1250. freez(system_info->architecture);
  1251. system_info->architecture = strdupz(value);
  1252. }
  1253. else if(!strcmp(name, "NETDATA_SYSTEM_VIRTUALIZATION")){
  1254. freez(system_info->virtualization);
  1255. system_info->virtualization = strdupz(value);
  1256. }
  1257. else if(!strcmp(name, "NETDATA_SYSTEM_VIRT_DETECTION")){
  1258. freez(system_info->virt_detection);
  1259. system_info->virt_detection = strdupz(value);
  1260. }
  1261. else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER")){
  1262. freez(system_info->container);
  1263. system_info->container = strdupz(value);
  1264. }
  1265. else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER_DETECTION")){
  1266. freez(system_info->container_detection);
  1267. system_info->container_detection = strdupz(value);
  1268. }
  1269. else if(!strcmp(name, "NETDATA_HOST_IS_K8S_NODE")){
  1270. freez(system_info->is_k8s_node);
  1271. system_info->is_k8s_node = strdupz(value);
  1272. }
  1273. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_VENDOR"))
  1274. return res;
  1275. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_MODEL"))
  1276. return res;
  1277. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_DETECTION"))
  1278. return res;
  1279. else if (!strcmp(name, "NETDATA_SYSTEM_RAM_DETECTION"))
  1280. return res;
  1281. else if (!strcmp(name, "NETDATA_SYSTEM_DISK_DETECTION"))
  1282. return res;
  1283. else {
  1284. res = 1;
  1285. }
  1286. return res;
  1287. }
  1288. /**
  1289. * Alarm Compare ID
  1290. *
  1291. * Callback function used with the binary trees to compare the id of RRDCALC
  1292. *
  1293. * @param a a pointer to the RRDCAL item to insert,compare or update the binary tree
  1294. * @param b the pointer to the binary tree.
  1295. *
  1296. * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b.
  1297. */
  1298. int alarm_compare_id(void *a, void *b) {
  1299. register uint32_t hash1 = ((RRDCALC *)a)->id;
  1300. register uint32_t hash2 = ((RRDCALC *)b)->id;
  1301. if(hash1 < hash2) return -1;
  1302. else if(hash1 > hash2) return 1;
  1303. return 0;
  1304. }
  1305. /**
  1306. * Alarm Compare NAME
  1307. *
  1308. * Callback function used with the binary trees to compare the name of RRDCALC
  1309. *
  1310. * @param a a pointer to the RRDCAL item to insert,compare or update the binary tree
  1311. * @param b the pointer to the binary tree.
  1312. *
  1313. * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b.
  1314. */
  1315. int alarm_compare_name(void *a, void *b) {
  1316. RRDCALC *in1 = (RRDCALC *)a;
  1317. RRDCALC *in2 = (RRDCALC *)b;
  1318. if(in1->hash < in2->hash) return -1;
  1319. else if(in1->hash > in2->hash) return 1;
  1320. return strcmp(in1->name,in2->name);
  1321. }
  1322. // Added for gap-filling, if this proves to be a bottleneck in large-scale systems then we will need to cache
  1323. // the last entry times as the metric updates, but let's see if it is a problem first.
  1324. time_t rrdhost_last_entry_t(RRDHOST *h) {
  1325. rrdhost_rdlock(h);
  1326. RRDSET *st;
  1327. time_t result = 0;
  1328. rrdset_foreach_read(st, h) {
  1329. time_t st_last = rrdset_last_entry_t(st);
  1330. if (st_last > result)
  1331. result = st_last;
  1332. }
  1333. rrdhost_unlock(h);
  1334. return result;
  1335. }