rrdhost.c 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #define NETDATA_RRD_INTERNALS
  3. #include "rrd.h"
  4. RRDHOST *localhost = NULL;
  5. size_t rrd_hosts_available = 0;
  6. netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER;
  7. time_t rrdset_free_obsolete_time = 3600;
  8. time_t rrdhost_free_orphan_time = 3600;
  9. // ----------------------------------------------------------------------------
  10. // RRDHOST index
  11. int rrdhost_compare(void* a, void* b) {
  12. if(((RRDHOST *)a)->hash_machine_guid < ((RRDHOST *)b)->hash_machine_guid) return -1;
  13. else if(((RRDHOST *)a)->hash_machine_guid > ((RRDHOST *)b)->hash_machine_guid) return 1;
  14. else return strcmp(((RRDHOST *)a)->machine_guid, ((RRDHOST *)b)->machine_guid);
  15. }
  16. avl_tree_lock rrdhost_root_index = {
  17. .avl_tree = { NULL, rrdhost_compare },
  18. .rwlock = AVL_LOCK_INITIALIZER
  19. };
  20. RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) {
  21. debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid);
  22. RRDHOST tmp;
  23. strncpyz(tmp.machine_guid, guid, GUID_LEN);
  24. tmp.hash_machine_guid = (hash)?hash:simple_hash(tmp.machine_guid);
  25. return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp);
  26. }
  27. RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) {
  28. if(unlikely(!strcmp(hostname, "localhost")))
  29. return localhost;
  30. if(unlikely(!hash)) hash = simple_hash(hostname);
  31. rrd_rdlock();
  32. RRDHOST *host;
  33. rrdhost_foreach_read(host) {
  34. if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) {
  35. rrd_unlock();
  36. return host;
  37. }
  38. }
  39. rrd_unlock();
  40. return NULL;
  41. }
  42. #define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost))
  43. #define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost))
  44. // ----------------------------------------------------------------------------
  45. // RRDHOST - internal helpers
  46. static inline void rrdhost_init_tags(RRDHOST *host, const char *tags) {
  47. if(host->tags && tags && !strcmp(host->tags, tags))
  48. return;
  49. void *old = (void *)host->tags;
  50. host->tags = (tags && *tags)?strdupz(tags):NULL;
  51. freez(old);
  52. }
  53. static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) {
  54. if(host->hostname && hostname && !strcmp(host->hostname, hostname))
  55. return;
  56. void *old = host->hostname;
  57. host->hostname = strdupz(hostname?hostname:"localhost");
  58. host->hash_hostname = simple_hash(host->hostname);
  59. freez(old);
  60. }
  61. static inline void rrdhost_init_os(RRDHOST *host, const char *os) {
  62. if(host->os && os && !strcmp(host->os, os))
  63. return;
  64. void *old = (void *)host->os;
  65. host->os = strdupz(os?os:"unknown");
  66. freez(old);
  67. }
  68. static inline void rrdhost_init_timezone(RRDHOST *host, const char *timezone) {
  69. if(host->timezone && timezone && !strcmp(host->timezone, timezone))
  70. return;
  71. void *old = (void *)host->timezone;
  72. host->timezone = strdupz((timezone && *timezone)?timezone:"unknown");
  73. freez(old);
  74. }
  75. static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) {
  76. strncpy(host->machine_guid, machine_guid, GUID_LEN);
  77. host->machine_guid[GUID_LEN] = '\0';
  78. host->hash_machine_guid = simple_hash(host->machine_guid);
  79. }
  80. // ----------------------------------------------------------------------------
  81. // RRDHOST - add a host
  82. RRDHOST *rrdhost_create(const char *hostname,
  83. const char *registry_hostname,
  84. const char *guid,
  85. const char *os,
  86. const char *timezone,
  87. const char *tags,
  88. const char *program_name,
  89. const char *program_version,
  90. int update_every,
  91. long entries,
  92. RRD_MEMORY_MODE memory_mode,
  93. unsigned int health_enabled,
  94. unsigned int rrdpush_enabled,
  95. char *rrdpush_destination,
  96. char *rrdpush_api_key,
  97. char *rrdpush_send_charts_matching,
  98. struct rrdhost_system_info *system_info,
  99. int is_localhost
  100. ) {
  101. debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid);
  102. rrd_check_wrlock();
  103. RRDHOST *host = callocz(1, sizeof(RRDHOST));
  104. host->rrd_update_every = (update_every > 0)?update_every:1;
  105. host->rrd_history_entries = align_entries_to_pagesize(memory_mode, entries);
  106. host->rrd_memory_mode = memory_mode;
  107. #ifdef ENABLE_DBENGINE
  108. host->page_cache_mb = default_rrdeng_page_cache_mb;
  109. host->disk_space_mb = default_rrdeng_disk_quota_mb;
  110. #endif
  111. host->health_enabled = (memory_mode == RRD_MEMORY_MODE_NONE)? 0 : health_enabled;
  112. host->sender = mallocz(sizeof(*host->sender));
  113. sender_init(host->sender, host);
  114. netdata_mutex_init(&host->receiver_lock);
  115. host->rrdpush_send_enabled = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) ? 1 : 0;
  116. host->rrdpush_send_destination = (host->rrdpush_send_enabled)?strdupz(rrdpush_destination):NULL;
  117. host->rrdpush_send_api_key = (host->rrdpush_send_enabled)?strdupz(rrdpush_api_key):NULL;
  118. host->rrdpush_send_charts_matching = simple_pattern_create(rrdpush_send_charts_matching, NULL, SIMPLE_PATTERN_EXACT);
  119. host->rrdpush_sender_pipe[0] = -1;
  120. host->rrdpush_sender_pipe[1] = -1;
  121. host->rrdpush_sender_socket = -1;
  122. //host->stream_version = STREAMING_PROTOCOL_CURRENT_VERSION; Unused?
  123. #ifdef ENABLE_HTTPS
  124. host->ssl.conn = NULL;
  125. host->ssl.flags = NETDATA_SSL_START;
  126. host->stream_ssl.conn = NULL;
  127. host->stream_ssl.flags = NETDATA_SSL_START;
  128. #endif
  129. netdata_rwlock_init(&host->rrdhost_rwlock);
  130. netdata_rwlock_init(&host->labels_rwlock);
  131. rrdhost_init_hostname(host, hostname);
  132. rrdhost_init_machine_guid(host, guid);
  133. rrdhost_init_os(host, os);
  134. rrdhost_init_timezone(host, timezone);
  135. rrdhost_init_tags(host, tags);
  136. host->program_name = strdupz((program_name && *program_name)?program_name:"unknown");
  137. host->program_version = strdupz((program_version && *program_version)?program_version:"unknown");
  138. host->registry_hostname = strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname);
  139. host->system_info = system_info;
  140. avl_init_lock(&(host->rrdset_root_index), rrdset_compare);
  141. avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name);
  142. avl_init_lock(&(host->rrdfamily_root_index), rrdfamily_compare);
  143. avl_init_lock(&(host->rrdvar_root_index), rrdvar_compare);
  144. if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", 1))
  145. rrdhost_flag_set(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS);
  146. if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", 1) && !is_localhost)
  147. rrdhost_flag_set(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST);
  148. host->health_default_warn_repeat_every = config_get_duration(CONFIG_SECTION_HEALTH, "default repeat warning", "never");
  149. host->health_default_crit_repeat_every = config_get_duration(CONFIG_SECTION_HEALTH, "default repeat critical", "never");
  150. avl_init_lock(&(host->alarms_idx_health_log), alarm_compare_id);
  151. avl_init_lock(&(host->alarms_idx_name), alarm_compare_name);
  152. // ------------------------------------------------------------------------
  153. // initialize health variables
  154. host->health_log.next_log_id = 1;
  155. host->health_log.next_alarm_id = 1;
  156. host->health_log.max = 1000;
  157. host->health_log.next_log_id =
  158. host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
  159. long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max);
  160. if(n < 10) {
  161. error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max);
  162. config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max);
  163. }
  164. else
  165. host->health_log.max = (unsigned int)n;
  166. netdata_rwlock_init(&host->health_log.alarm_log_rwlock);
  167. char filename[FILENAME_MAX + 1];
  168. if(is_localhost) {
  169. host->cache_dir = strdupz(netdata_configured_cache_dir);
  170. host->varlib_dir = strdupz(netdata_configured_varlib_dir);
  171. }
  172. else {
  173. // this is not localhost - append our GUID to localhost path
  174. snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid);
  175. host->cache_dir = strdupz(filename);
  176. if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE ||
  177. host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  178. int r = mkdir(host->cache_dir, 0775);
  179. if(r != 0 && errno != EEXIST)
  180. error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir);
  181. }
  182. snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid);
  183. host->varlib_dir = strdupz(filename);
  184. if(host->health_enabled) {
  185. int r = mkdir(host->varlib_dir, 0775);
  186. if(r != 0 && errno != EEXIST)
  187. error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir);
  188. }
  189. }
  190. if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  191. #ifdef ENABLE_DBENGINE
  192. if (unlikely(-1 == uuid_parse(host->machine_guid, host->host_uuid))) {
  193. error("Host machine GUID is not valid.");
  194. }
  195. host->objects_nr = 1;
  196. host->compaction_id = 0;
  197. char dbenginepath[FILENAME_MAX + 1];
  198. int ret;
  199. snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", host->cache_dir);
  200. ret = mkdir(dbenginepath, 0775);
  201. if(ret != 0 && errno != EEXIST)
  202. error("Host '%s': cannot create directory '%s'", host->hostname, dbenginepath);
  203. else
  204. ret = rrdeng_init(host, &host->rrdeng_ctx, dbenginepath, host->page_cache_mb, host->disk_space_mb);
  205. if(ret) {
  206. error("Host '%s': cannot initialize host with machine guid '%s'. Failed to initialize DB engine at '%s'.",
  207. host->hostname, host->machine_guid, host->cache_dir);
  208. rrdhost_free(host);
  209. host = NULL;
  210. //rrd_hosts_available++; //TODO: maybe we want this?
  211. return host;
  212. }
  213. #else
  214. fatal("RRD_MEMORY_MODE_DBENGINE is not supported in this platform.");
  215. #endif
  216. }
  217. if(host->health_enabled) {
  218. snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
  219. int r = mkdir(filename, 0775);
  220. if(r != 0 && errno != EEXIST)
  221. error("Host '%s': cannot create directory '%s'", host->hostname, filename);
  222. }
  223. snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir);
  224. host->health_log_filename = strdupz(filename);
  225. snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_primary_plugins_dir);
  226. host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename));
  227. host->health_default_recipient = strdupz("root");
  228. // ------------------------------------------------------------------------
  229. // load health configuration
  230. if(host->health_enabled) {
  231. rrdhost_wrlock(host);
  232. health_readdir(host, health_user_config_dir(), health_stock_config_dir(), NULL);
  233. rrdhost_unlock(host);
  234. health_alarm_log_load(host);
  235. health_alarm_log_open(host);
  236. }
  237. // ------------------------------------------------------------------------
  238. // link it and add it to the index
  239. if(is_localhost) {
  240. host->next = localhost;
  241. localhost = host;
  242. }
  243. else {
  244. if(localhost) {
  245. host->next = localhost->next;
  246. localhost->next = host;
  247. }
  248. else localhost = host;
  249. }
  250. RRDHOST *t = rrdhost_index_add(host);
  251. if(t != host) {
  252. error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid);
  253. rrdhost_free(host);
  254. host = NULL;
  255. }
  256. else {
  257. info("Host '%s' (at registry as '%s') with guid '%s' initialized"
  258. ", os '%s'"
  259. ", timezone '%s'"
  260. ", tags '%s'"
  261. ", program_name '%s'"
  262. ", program_version '%s'"
  263. ", update every %d"
  264. ", memory mode %s"
  265. ", history entries %ld"
  266. ", streaming %s"
  267. " (to '%s' with api key '%s')"
  268. ", health %s"
  269. ", cache_dir '%s'"
  270. ", varlib_dir '%s'"
  271. ", health_log '%s'"
  272. ", alarms default handler '%s'"
  273. ", alarms default recipient '%s'"
  274. , host->hostname
  275. , host->registry_hostname
  276. , host->machine_guid
  277. , host->os
  278. , host->timezone
  279. , (host->tags)?host->tags:""
  280. , host->program_name
  281. , host->program_version
  282. , host->rrd_update_every
  283. , rrd_memory_mode_name(host->rrd_memory_mode)
  284. , host->rrd_history_entries
  285. , host->rrdpush_send_enabled?"enabled":"disabled"
  286. , host->rrdpush_send_destination?host->rrdpush_send_destination:""
  287. , host->rrdpush_send_api_key?host->rrdpush_send_api_key:""
  288. , host->health_enabled?"enabled":"disabled"
  289. , host->cache_dir
  290. , host->varlib_dir
  291. , host->health_log_filename
  292. , host->health_default_exec
  293. , host->health_default_recipient
  294. );
  295. }
  296. rrd_hosts_available++;
  297. #ifdef ENABLE_DBENGINE
  298. if (likely(!is_localhost && host && host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE))
  299. metalog_commit_update_host(host);
  300. #endif
  301. return host;
  302. }
  303. void rrdhost_update(RRDHOST *host
  304. , const char *hostname
  305. , const char *registry_hostname
  306. , const char *guid
  307. , const char *os
  308. , const char *timezone
  309. , const char *tags
  310. , const char *program_name
  311. , const char *program_version
  312. , int update_every
  313. , long history
  314. , RRD_MEMORY_MODE mode
  315. , unsigned int health_enabled
  316. , unsigned int rrdpush_enabled
  317. , char *rrdpush_destination
  318. , char *rrdpush_api_key
  319. , char *rrdpush_send_charts_matching
  320. , struct rrdhost_system_info *system_info
  321. )
  322. {
  323. UNUSED(guid);
  324. UNUSED(rrdpush_enabled);
  325. UNUSED(rrdpush_destination);
  326. UNUSED(rrdpush_api_key);
  327. UNUSED(rrdpush_send_charts_matching);
  328. host->health_enabled = health_enabled;
  329. //host->stream_version = STREAMING_PROTOCOL_CURRENT_VERSION; Unused?
  330. rrdhost_system_info_free(host->system_info);
  331. host->system_info = system_info;
  332. rrdhost_init_os(host, os);
  333. rrdhost_init_timezone(host, timezone);
  334. freez(host->registry_hostname);
  335. host->registry_hostname = strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname);
  336. if(strcmp(host->hostname, hostname) != 0) {
  337. info("Host '%s' has been renamed to '%s'. If this is not intentional it may mean multiple hosts are using the same machine_guid.", host->hostname, hostname);
  338. char *t = host->hostname;
  339. host->hostname = strdupz(hostname);
  340. host->hash_hostname = simple_hash(host->hostname);
  341. freez(t);
  342. }
  343. if(strcmp(host->program_name, program_name) != 0) {
  344. info("Host '%s' switched program name from '%s' to '%s'", host->hostname, host->program_name, program_name);
  345. char *t = host->program_name;
  346. host->program_name = strdupz(program_name);
  347. freez(t);
  348. }
  349. if(strcmp(host->program_version, program_version) != 0) {
  350. info("Host '%s' switched program version from '%s' to '%s'", host->hostname, host->program_version, program_version);
  351. char *t = host->program_version;
  352. host->program_version = strdupz(program_version);
  353. freez(t);
  354. }
  355. if(host->rrd_update_every != update_every)
  356. error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds. Restart netdata here to apply the new settings.", host->hostname, host->rrd_update_every, update_every);
  357. if(host->rrd_history_entries < history)
  358. error("Host '%s' has history of %ld entries, but the wanted one is %ld entries. Restart netdata here to apply the new settings.", host->hostname, host->rrd_history_entries, history);
  359. if(host->rrd_memory_mode != mode)
  360. error("Host '%s' has memory mode '%s', but the wanted one is '%s'. Restart netdata here to apply the new settings.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode));
  361. // update host tags
  362. rrdhost_init_tags(host, tags);
  363. #ifdef ENABLE_DBENGINE
  364. if (likely(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE))
  365. metalog_commit_update_host(host);
  366. #endif
  367. return;
  368. }
  369. RRDHOST *rrdhost_find_or_create(
  370. const char *hostname
  371. , const char *registry_hostname
  372. , const char *guid
  373. , const char *os
  374. , const char *timezone
  375. , const char *tags
  376. , const char *program_name
  377. , const char *program_version
  378. , int update_every
  379. , long history
  380. , RRD_MEMORY_MODE mode
  381. , unsigned int health_enabled
  382. , unsigned int rrdpush_enabled
  383. , char *rrdpush_destination
  384. , char *rrdpush_api_key
  385. , char *rrdpush_send_charts_matching
  386. , struct rrdhost_system_info *system_info
  387. ) {
  388. debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid);
  389. rrd_wrlock();
  390. RRDHOST *host = rrdhost_find_by_guid(guid, 0);
  391. if(!host) {
  392. host = rrdhost_create(
  393. hostname
  394. , registry_hostname
  395. , guid
  396. , os
  397. , timezone
  398. , tags
  399. , program_name
  400. , program_version
  401. , update_every
  402. , history
  403. , mode
  404. , health_enabled
  405. , rrdpush_enabled
  406. , rrdpush_destination
  407. , rrdpush_api_key
  408. , rrdpush_send_charts_matching
  409. , system_info
  410. , 0
  411. );
  412. }
  413. else {
  414. rrdhost_update(host
  415. , hostname
  416. , registry_hostname
  417. , guid
  418. , os
  419. , timezone
  420. , tags
  421. , program_name
  422. , program_version
  423. , update_every
  424. , history
  425. , mode
  426. , health_enabled
  427. , rrdpush_enabled
  428. , rrdpush_destination
  429. , rrdpush_api_key
  430. , rrdpush_send_charts_matching
  431. , system_info);
  432. }
  433. rrdhost_cleanup_orphan_hosts_nolock(host);
  434. rrd_unlock();
  435. return host;
  436. }
  437. inline int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected, time_t now) {
  438. if(host != protected
  439. && host != localhost
  440. && rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN)
  441. && host->receiver
  442. && host->senders_disconnected_time
  443. && host->senders_disconnected_time + rrdhost_free_orphan_time < now)
  444. return 1;
  445. return 0;
  446. }
  447. void rrdhost_cleanup_orphan_hosts_nolock(RRDHOST *protected) {
  448. time_t now = now_realtime_sec();
  449. RRDHOST *host;
  450. restart_after_removal:
  451. rrdhost_foreach_write(host) {
  452. if(rrdhost_should_be_removed(host, protected, now)) {
  453. info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", host->hostname, host->machine_guid);
  454. if(rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST))
  455. rrdhost_delete_charts(host);
  456. else
  457. rrdhost_save_charts(host);
  458. rrdhost_free(host);
  459. goto restart_after_removal;
  460. }
  461. }
  462. }
  463. // ----------------------------------------------------------------------------
  464. // RRDHOST global / startup initialization
  465. int rrd_init(char *hostname, struct rrdhost_system_info *system_info) {
  466. rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", rrdset_free_obsolete_time);
  467. gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_GLOBAL, "gap when lost iterations above", gap_when_lost_iterations_above);
  468. if (gap_when_lost_iterations_above < 1)
  469. gap_when_lost_iterations_above = 1;
  470. health_init();
  471. registry_init();
  472. rrdpush_init();
  473. debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
  474. rrd_wrlock();
  475. localhost = rrdhost_create(
  476. hostname
  477. , registry_get_this_machine_hostname()
  478. , registry_get_this_machine_guid()
  479. , os_type
  480. , netdata_configured_timezone
  481. , config_get(CONFIG_SECTION_BACKEND, "host tags", "")
  482. , program_name
  483. , program_version
  484. , default_rrd_update_every
  485. , default_rrd_history_entries
  486. , default_rrd_memory_mode
  487. , default_health_enabled
  488. , default_rrdpush_enabled
  489. , default_rrdpush_destination
  490. , default_rrdpush_api_key
  491. , default_rrdpush_send_charts_matching
  492. , system_info
  493. , 1
  494. );
  495. rrd_unlock();
  496. web_client_api_v1_management_init();
  497. return localhost==NULL;
  498. }
  499. // ----------------------------------------------------------------------------
  500. // RRDHOST - lock validations
  501. // there are only used when NETDATA_INTERNAL_CHECKS is set
  502. void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
  503. debug(D_RRDHOST, "Checking read lock on host '%s'", host->hostname);
  504. int ret = netdata_rwlock_trywrlock(&host->rrdhost_rwlock);
  505. if(ret == 0)
  506. fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
  507. }
  508. void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) {
  509. debug(D_RRDHOST, "Checking write lock on host '%s'", host->hostname);
  510. int ret = netdata_rwlock_tryrdlock(&host->rrdhost_rwlock);
  511. if(ret == 0)
  512. fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file);
  513. }
  514. void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line) {
  515. debug(D_RRDHOST, "Checking read lock on all RRDs");
  516. int ret = netdata_rwlock_trywrlock(&rrd_rwlock);
  517. if(ret == 0)
  518. fatal("RRDs should be read-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
  519. }
  520. void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line) {
  521. debug(D_RRDHOST, "Checking write lock on all RRDs");
  522. int ret = netdata_rwlock_tryrdlock(&rrd_rwlock);
  523. if(ret == 0)
  524. fatal("RRDs should be write-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file);
  525. }
  526. // ----------------------------------------------------------------------------
  527. // RRDHOST - free
  528. void rrdhost_system_info_free(struct rrdhost_system_info *system_info) {
  529. info("SYSTEM_INFO: free %p", system_info);
  530. if(likely(system_info)) {
  531. freez(system_info->host_os_name);
  532. freez(system_info->host_os_id);
  533. freez(system_info->host_os_id_like);
  534. freez(system_info->host_os_version);
  535. freez(system_info->host_os_version_id);
  536. freez(system_info->host_os_detection);
  537. freez(system_info->host_cores);
  538. freez(system_info->host_cpu_freq);
  539. freez(system_info->host_ram_total);
  540. freez(system_info->host_disk_space);
  541. freez(system_info->container_os_name);
  542. freez(system_info->container_os_id);
  543. freez(system_info->container_os_id_like);
  544. freez(system_info->container_os_version);
  545. freez(system_info->container_os_version_id);
  546. freez(system_info->container_os_detection);
  547. freez(system_info->kernel_name);
  548. freez(system_info->kernel_version);
  549. freez(system_info->architecture);
  550. freez(system_info->virtualization);
  551. freez(system_info->virt_detection);
  552. freez(system_info->container);
  553. freez(system_info->container_detection);
  554. freez(system_info);
  555. }
  556. }
  557. void destroy_receiver_state(struct receiver_state *rpt);
  558. void rrdhost_free(RRDHOST *host) {
  559. if(!host) return;
  560. info("Freeing all memory for host '%s'...", host->hostname);
  561. rrd_check_wrlock(); // make sure the RRDs are write locked
  562. // ------------------------------------------------------------------------
  563. // clean up streaming
  564. rrdpush_sender_thread_stop(host); // stop a possibly running thread
  565. cbuffer_free(host->sender->buffer);
  566. buffer_free(host->sender->build);
  567. freez(host->sender);
  568. host->sender = NULL;
  569. if (netdata_exit) {
  570. netdata_mutex_lock(&host->receiver_lock);
  571. if (host->receiver) {
  572. if (!host->receiver->exited)
  573. netdata_thread_cancel(host->receiver->thread);
  574. netdata_mutex_unlock(&host->receiver_lock);
  575. struct receiver_state *rpt = host->receiver;
  576. while (host->receiver && !rpt->exited)
  577. sleep_usec(50 * USEC_PER_MS);
  578. // If the receiver detached from the host then its thread will destroy the state
  579. if (host->receiver == rpt)
  580. destroy_receiver_state(host->receiver);
  581. }
  582. else
  583. netdata_mutex_unlock(&host->receiver_lock);
  584. }
  585. rrdhost_wrlock(host); // lock this RRDHOST
  586. // ------------------------------------------------------------------------
  587. // release its children resources
  588. #ifdef ENABLE_DBENGINE
  589. rrdeng_prepare_exit(host->rrdeng_ctx);
  590. #endif
  591. while(host->rrdset_root)
  592. rrdset_free(host->rrdset_root);
  593. freez(host->exporting_flags);
  594. while(host->alarms)
  595. rrdcalc_unlink_and_free(host, host->alarms);
  596. RRDCALC *rc,*nc;
  597. for(rc = host->alarms_with_foreach; rc ; rc = nc) {
  598. nc = rc->next;
  599. rrdcalc_free(rc);
  600. }
  601. host->alarms_with_foreach = NULL;
  602. while(host->templates)
  603. rrdcalctemplate_unlink_and_free(host, host->templates);
  604. RRDCALCTEMPLATE *rt,*next;
  605. for(rt = host->alarms_template_with_foreach; rt ; rt = next) {
  606. next = rt->next;
  607. rrdcalctemplate_free(rt);
  608. }
  609. host->alarms_template_with_foreach = NULL;
  610. debug(D_RRD_CALLS, "RRDHOST: Cleaning up remaining host variables for host '%s'", host->hostname);
  611. rrdvar_free_remaining_variables(host, &host->rrdvar_root_index);
  612. health_alarm_log_free(host);
  613. if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  614. #ifdef ENABLE_DBENGINE
  615. rrdeng_exit(host->rrdeng_ctx);
  616. #endif
  617. }
  618. // ------------------------------------------------------------------------
  619. // remove it from the indexes
  620. if(rrdhost_index_del(host) != host)
  621. error("RRDHOST '%s' removed from index, deleted the wrong entry.", host->hostname);
  622. // ------------------------------------------------------------------------
  623. // unlink it from the host
  624. if(host == localhost) {
  625. localhost = host->next;
  626. }
  627. else {
  628. // find the previous one
  629. RRDHOST *h;
  630. for(h = localhost; h && h->next != host ; h = h->next) ;
  631. // bypass it
  632. if(h) h->next = host->next;
  633. else error("Request to free RRDHOST '%s': cannot find it", host->hostname);
  634. }
  635. // ------------------------------------------------------------------------
  636. // free it
  637. freez((void *)host->tags);
  638. free_host_labels(host->labels);
  639. freez((void *)host->os);
  640. freez((void *)host->timezone);
  641. freez(host->program_version);
  642. freez(host->program_name);
  643. rrdhost_system_info_free(host->system_info);
  644. freez(host->cache_dir);
  645. freez(host->varlib_dir);
  646. freez(host->rrdpush_send_api_key);
  647. freez(host->rrdpush_send_destination);
  648. freez(host->health_default_exec);
  649. freez(host->health_default_recipient);
  650. freez(host->health_log_filename);
  651. freez(host->hostname);
  652. freez(host->registry_hostname);
  653. simple_pattern_free(host->rrdpush_send_charts_matching);
  654. rrdhost_unlock(host);
  655. netdata_rwlock_destroy(&host->labels_rwlock);
  656. netdata_rwlock_destroy(&host->health_log.alarm_log_rwlock);
  657. netdata_rwlock_destroy(&host->rrdhost_rwlock);
  658. freez(host);
  659. rrd_hosts_available--;
  660. }
  661. void rrdhost_free_all(void) {
  662. rrd_wrlock();
  663. while(localhost) rrdhost_free(localhost);
  664. rrd_unlock();
  665. }
  666. // ----------------------------------------------------------------------------
  667. // RRDHOST - save host files
  668. void rrdhost_save_charts(RRDHOST *host) {
  669. if(!host) return;
  670. info("Saving/Closing database of host '%s'...", host->hostname);
  671. RRDSET *st;
  672. // we get a write lock
  673. // to ensure only one thread is saving the database
  674. rrdhost_wrlock(host);
  675. rrdset_foreach_write(st, host) {
  676. rrdset_rdlock(st);
  677. rrdset_save(st);
  678. rrdset_unlock(st);
  679. }
  680. rrdhost_unlock(host);
  681. }
  682. static int is_valid_label_value(char *value) {
  683. while(*value) {
  684. if(*value == '"' || *value == '\'' || *value == '*' || *value == '!') {
  685. return 0;
  686. }
  687. value++;
  688. }
  689. return 1;
  690. }
  691. static int is_valid_label_key(char *key) {
  692. //Prometheus exporter
  693. if(!strcmp(key, "chart") || !strcmp(key, "family") || !strcmp(key, "dimension"))
  694. return 0;
  695. //Netdata and Prometheus internal
  696. if (*key == '_')
  697. return 0;
  698. while(*key) {
  699. if(!(isdigit(*key) || isalpha(*key) || *key == '.' || *key == '_' || *key == '-'))
  700. return 0;
  701. key++;
  702. }
  703. return 1;
  704. }
  705. char *translate_label_source(LABEL_SOURCE l) {
  706. switch (l) {
  707. case LABEL_SOURCE_AUTO:
  708. return "AUTO";
  709. case LABEL_SOURCE_NETDATA_CONF:
  710. return "NETDATA.CONF";
  711. case LABEL_SOURCE_DOCKER :
  712. return "DOCKER";
  713. case LABEL_SOURCE_ENVIRONMENT :
  714. return "ENVIRONMENT";
  715. case LABEL_SOURCE_KUBERNETES :
  716. return "KUBERNETES";
  717. default:
  718. return "Invalid label source";
  719. }
  720. }
  721. struct label *load_auto_labels()
  722. {
  723. struct label *label_list = NULL;
  724. if (localhost->system_info->host_os_name)
  725. label_list =
  726. add_label_to_list(label_list, "_os_name", localhost->system_info->host_os_name, LABEL_SOURCE_AUTO);
  727. if (localhost->system_info->host_os_version)
  728. label_list =
  729. add_label_to_list(label_list, "_os_version", localhost->system_info->host_os_version, LABEL_SOURCE_AUTO);
  730. if (localhost->system_info->kernel_version)
  731. label_list =
  732. add_label_to_list(label_list, "_kernel_version", localhost->system_info->kernel_version, LABEL_SOURCE_AUTO);
  733. if (localhost->system_info->host_cores)
  734. label_list =
  735. add_label_to_list(label_list, "_system_cores", localhost->system_info->host_cores, LABEL_SOURCE_AUTO);
  736. if (localhost->system_info->host_cpu_freq)
  737. label_list =
  738. add_label_to_list(label_list, "_system_cpu_freq", localhost->system_info->host_cpu_freq, LABEL_SOURCE_AUTO);
  739. if (localhost->system_info->host_ram_total)
  740. label_list =
  741. add_label_to_list(label_list, "_system_ram_total", localhost->system_info->host_ram_total, LABEL_SOURCE_AUTO);
  742. if (localhost->system_info->host_disk_space)
  743. label_list =
  744. add_label_to_list(label_list, "_system_disk_space", localhost->system_info->host_disk_space, LABEL_SOURCE_AUTO);
  745. if (localhost->system_info->architecture)
  746. label_list =
  747. add_label_to_list(label_list, "_architecture", localhost->system_info->architecture, LABEL_SOURCE_AUTO);
  748. if (localhost->system_info->virtualization)
  749. label_list =
  750. add_label_to_list(label_list, "_virtualization", localhost->system_info->virtualization, LABEL_SOURCE_AUTO);
  751. if (localhost->system_info->container)
  752. label_list =
  753. add_label_to_list(label_list, "_container", localhost->system_info->container, LABEL_SOURCE_AUTO);
  754. if (localhost->system_info->container_detection)
  755. label_list =
  756. add_label_to_list(label_list, "_container_detection", localhost->system_info->container_detection, LABEL_SOURCE_AUTO);
  757. if (localhost->system_info->virt_detection)
  758. label_list =
  759. add_label_to_list(label_list, "_virt_detection", localhost->system_info->virt_detection, LABEL_SOURCE_AUTO);
  760. label_list = add_label_to_list(
  761. label_list, "_is_parent", (localhost->next || configured_as_parent()) ? "true" : "false", LABEL_SOURCE_AUTO);
  762. if (localhost->rrdpush_send_destination)
  763. label_list =
  764. add_label_to_list(label_list, "_streams_to", localhost->rrdpush_send_destination, LABEL_SOURCE_AUTO);
  765. return label_list;
  766. }
  767. static inline int is_valid_label_config_option(char *name, char *value) {
  768. return (is_valid_label_key(name) && is_valid_label_value(value) && strcmp(name, "from environment") && strcmp(name, "from kubernetes pods") );
  769. }
  770. struct label *load_config_labels()
  771. {
  772. int status = config_load(NULL, 1, CONFIG_SECTION_HOST_LABEL);
  773. if(!status) {
  774. char *filename = CONFIG_DIR "/" CONFIG_FILENAME;
  775. error("LABEL: Cannot reload the configuration file '%s', using labels in memory", filename);
  776. }
  777. struct label *l = NULL;
  778. struct section *co = appconfig_get_section(&netdata_config, CONFIG_SECTION_HOST_LABEL);
  779. if(co) {
  780. config_section_wrlock(co);
  781. struct config_option *cv;
  782. for(cv = co->values; cv ; cv = cv->next) {
  783. if( is_valid_label_config_option(cv->name, cv->value)) {
  784. l = add_label_to_list(l, cv->name, cv->value, LABEL_SOURCE_NETDATA_CONF);
  785. cv->flags |= CONFIG_VALUE_USED;
  786. } else {
  787. error("LABELS: It was not possible to create the label '%s' because it contains invalid character(s) or values."
  788. , cv->name);
  789. }
  790. }
  791. config_section_unlock(co);
  792. }
  793. return l;
  794. }
  795. typedef enum strip_quotes {
  796. DO_NOT_STRIP_QUOTES,
  797. STRIP_QUOTES
  798. } STRIP_QUOTES_OPTION;
  799. typedef enum skip_escaped_characters {
  800. DO_NOT_SKIP_ESCAPED_CHARACTERS,
  801. SKIP_ESCAPED_CHARACTERS
  802. } SKIP_ESCAPED_CHARACTERS_OPTION;
  803. static inline void strip_last_symbol(
  804. char *str,
  805. char symbol,
  806. SKIP_ESCAPED_CHARACTERS_OPTION skip_escaped_characters)
  807. {
  808. char *end = str;
  809. while (*end && *end != symbol) {
  810. if (unlikely(skip_escaped_characters && *end == '\\')) {
  811. end++;
  812. if (unlikely(!*end))
  813. break;
  814. }
  815. end++;
  816. }
  817. if (likely(*end == symbol))
  818. *end = '\0';
  819. }
  820. static inline char *strip_double_quotes(char *str, SKIP_ESCAPED_CHARACTERS_OPTION skip_escaped_characters)
  821. {
  822. if (*str == '"') {
  823. str++;
  824. strip_last_symbol(str, '"', skip_escaped_characters);
  825. }
  826. return str;
  827. }
  828. struct label *parse_simple_tags(
  829. struct label *label_list,
  830. const char *tags,
  831. char key_value_separator,
  832. char label_separator,
  833. STRIP_QUOTES_OPTION strip_quotes_from_key,
  834. STRIP_QUOTES_OPTION strip_quotes_from_value,
  835. SKIP_ESCAPED_CHARACTERS_OPTION skip_escaped_characters)
  836. {
  837. const char *end = tags;
  838. while (*end) {
  839. const char *start = end;
  840. char key[CONFIG_MAX_VALUE + 1];
  841. char value[CONFIG_MAX_VALUE + 1];
  842. while (*end && *end != key_value_separator)
  843. end++;
  844. strncpyz(key, start, end - start);
  845. if (*end)
  846. start = ++end;
  847. while (*end && *end != label_separator)
  848. end++;
  849. strncpyz(value, start, end - start);
  850. label_list = add_label_to_list(
  851. label_list,
  852. strip_quotes_from_key ? strip_double_quotes(trim(key), skip_escaped_characters) : trim(key),
  853. strip_quotes_from_value ? strip_double_quotes(trim(value), skip_escaped_characters) : trim(value),
  854. LABEL_SOURCE_NETDATA_CONF);
  855. if (*end)
  856. end++;
  857. }
  858. return label_list;
  859. }
  860. struct label *parse_json_tags(struct label *label_list, const char *tags)
  861. {
  862. char tags_buf[CONFIG_MAX_VALUE + 1];
  863. strncpy(tags_buf, tags, CONFIG_MAX_VALUE);
  864. char *str = tags_buf;
  865. switch (*str) {
  866. case '{':
  867. str++;
  868. strip_last_symbol(str, '}', SKIP_ESCAPED_CHARACTERS);
  869. label_list = parse_simple_tags(label_list, str, ':', ',', STRIP_QUOTES, STRIP_QUOTES, SKIP_ESCAPED_CHARACTERS);
  870. break;
  871. case '[':
  872. str++;
  873. strip_last_symbol(str, ']', SKIP_ESCAPED_CHARACTERS);
  874. char *end = str + strlen(str);
  875. size_t i = 0;
  876. while (str < end) {
  877. char key[CONFIG_MAX_VALUE + 1];
  878. snprintfz(key, CONFIG_MAX_VALUE, "host_tag%zu", i);
  879. str = strip_double_quotes(trim(str), SKIP_ESCAPED_CHARACTERS);
  880. label_list = add_label_to_list(label_list, key, str, LABEL_SOURCE_NETDATA_CONF);
  881. // skip to the next element in the array
  882. str += strlen(str) + 1;
  883. while (*str && *str != ',')
  884. str++;
  885. str++;
  886. i++;
  887. }
  888. break;
  889. case '"':
  890. label_list = add_label_to_list(
  891. label_list, "host_tag", strip_double_quotes(str, SKIP_ESCAPED_CHARACTERS), LABEL_SOURCE_NETDATA_CONF);
  892. break;
  893. default:
  894. label_list = add_label_to_list(label_list, "host_tag", str, LABEL_SOURCE_NETDATA_CONF);
  895. break;
  896. }
  897. return label_list;
  898. }
  899. struct label *load_labels_from_tags()
  900. {
  901. if (!localhost->tags)
  902. return NULL;
  903. struct label *label_list = NULL;
  904. BACKEND_TYPE type = BACKEND_TYPE_UNKNOWN;
  905. if (config_exists(CONFIG_SECTION_BACKEND, "enabled")) {
  906. if (config_get_boolean(CONFIG_SECTION_BACKEND, "enabled", CONFIG_BOOLEAN_NO) != CONFIG_BOOLEAN_NO) {
  907. const char *type_name = config_get(CONFIG_SECTION_BACKEND, "type", "graphite");
  908. type = backend_select_type(type_name);
  909. }
  910. }
  911. switch (type) {
  912. case BACKEND_TYPE_GRAPHITE:
  913. label_list = parse_simple_tags(
  914. label_list, localhost->tags, '=', ';', DO_NOT_STRIP_QUOTES, DO_NOT_STRIP_QUOTES,
  915. DO_NOT_SKIP_ESCAPED_CHARACTERS);
  916. break;
  917. case BACKEND_TYPE_OPENTSDB_USING_TELNET:
  918. label_list = parse_simple_tags(
  919. label_list, localhost->tags, '=', ' ', DO_NOT_STRIP_QUOTES, DO_NOT_STRIP_QUOTES,
  920. DO_NOT_SKIP_ESCAPED_CHARACTERS);
  921. break;
  922. case BACKEND_TYPE_OPENTSDB_USING_HTTP:
  923. label_list = parse_simple_tags(
  924. label_list, localhost->tags, ':', ',', STRIP_QUOTES, STRIP_QUOTES,
  925. DO_NOT_SKIP_ESCAPED_CHARACTERS);
  926. break;
  927. case BACKEND_TYPE_JSON:
  928. label_list = parse_json_tags(label_list, localhost->tags);
  929. break;
  930. default:
  931. label_list = parse_simple_tags(
  932. label_list, localhost->tags, '=', ',', DO_NOT_STRIP_QUOTES, STRIP_QUOTES,
  933. DO_NOT_SKIP_ESCAPED_CHARACTERS);
  934. break;
  935. }
  936. return label_list;
  937. }
  938. struct label *load_kubernetes_labels()
  939. {
  940. struct label *l=NULL;
  941. char *label_script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("get-kubernetes-labels.sh") + 2));
  942. sprintf(label_script, "%s/%s", netdata_configured_primary_plugins_dir, "get-kubernetes-labels.sh");
  943. if (unlikely(access(label_script, R_OK) != 0)) {
  944. error("Kubernetes pod label fetching script %s not found.",label_script);
  945. freez(label_script);
  946. } else {
  947. pid_t command_pid;
  948. debug(D_RRDHOST, "Attempting to fetch external labels via %s", label_script);
  949. FILE *fp = mypopen(label_script, &command_pid);
  950. if(fp) {
  951. int MAX_LINE_SIZE=300;
  952. char buffer[MAX_LINE_SIZE + 1];
  953. while (fgets(buffer, MAX_LINE_SIZE, fp) != NULL) {
  954. char *name=buffer;
  955. char *value=buffer;
  956. while (*value && *value != ':') value++;
  957. if (*value == ':') {
  958. *value = '\0';
  959. value++;
  960. }
  961. char *eos=value;
  962. while (*eos && *eos != '\n') eos++;
  963. if (*eos == '\n') *eos = '\0';
  964. if (strlen(value)>0) {
  965. if (is_valid_label_key(name)){
  966. l = add_label_to_list(l, name, value, LABEL_SOURCE_KUBERNETES);
  967. } else {
  968. info("Ignoring invalid label name '%s'", name);
  969. }
  970. } else {
  971. error("%s outputted unexpected result: '%s'", label_script, name);
  972. }
  973. };
  974. // Non-zero exit code means that all the script output is error messages. We've shown already any message that didn't include a ':'
  975. // Here we'll inform with an ERROR that the script failed, show whatever (if anything) was added to the list of labels, free the memory and set the return to null
  976. int retcode=mypclose(fp, command_pid);
  977. if (retcode) {
  978. error("%s exited abnormally. No kubernetes labels will be added to the host.", label_script);
  979. struct label *ll=l;
  980. while (ll != NULL) {
  981. info("Ignoring Label [source id=%s]: \"%s\" -> \"%s\"\n", translate_label_source(ll->label_source), ll->key, ll->value);
  982. ll = ll->next;
  983. freez(l);
  984. l=ll;
  985. }
  986. }
  987. }
  988. freez(label_script);
  989. }
  990. return l;
  991. }
  992. struct label *create_label(char *key, char *value, LABEL_SOURCE label_source)
  993. {
  994. size_t key_len = strlen(key), value_len = strlen(value);
  995. size_t n = sizeof(struct label) + key_len + 1 + value_len + 1;
  996. struct label *result = callocz(1,n);
  997. if (result != NULL) {
  998. char *c = (char *)result;
  999. c += sizeof(struct label);
  1000. strcpy(c, key);
  1001. result->key = c;
  1002. c += key_len + 1;
  1003. strcpy(c, value);
  1004. result->value = c;
  1005. result->label_source = label_source;
  1006. result->key_hash = simple_hash(result->key);
  1007. }
  1008. return result;
  1009. }
  1010. void free_host_labels(struct label *labels)
  1011. {
  1012. while (labels != NULL)
  1013. {
  1014. struct label *current = labels;
  1015. labels = labels->next;
  1016. freez(current);
  1017. }
  1018. }
  1019. void replace_label_list(RRDHOST *host, struct label *new_labels)
  1020. {
  1021. rrdhost_check_rdlock(host);
  1022. netdata_rwlock_wrlock(&host->labels_rwlock);
  1023. struct label *old_labels = host->labels;
  1024. host->labels = new_labels;
  1025. netdata_rwlock_unlock(&host->labels_rwlock);
  1026. free_host_labels(old_labels);
  1027. }
  1028. struct label *add_label_to_list(struct label *l, char *key, char *value, LABEL_SOURCE label_source)
  1029. {
  1030. struct label *lab = create_label(key, value, label_source);
  1031. lab->next = l;
  1032. return lab;
  1033. }
  1034. int label_list_contains(struct label *head, struct label *check)
  1035. {
  1036. while (head != NULL)
  1037. {
  1038. if (head->key_hash == check->key_hash && !strcmp(head->key, check->key))
  1039. return 1;
  1040. head = head->next;
  1041. }
  1042. return 0;
  1043. }
  1044. /* Create a list with entries from both lists.
  1045. If any entry in the low priority list is masked by an entry in the high priorty list then delete it.
  1046. */
  1047. struct label *merge_label_lists(struct label *lo_pri, struct label *hi_pri)
  1048. {
  1049. struct label *result = hi_pri;
  1050. while (lo_pri != NULL)
  1051. {
  1052. struct label *current = lo_pri;
  1053. lo_pri = lo_pri->next;
  1054. if (!label_list_contains(result, current)) {
  1055. current->next = result;
  1056. result = current;
  1057. }
  1058. else
  1059. freez(current);
  1060. }
  1061. return result;
  1062. }
  1063. void reload_host_labels()
  1064. {
  1065. struct label *from_auto = load_auto_labels();
  1066. struct label *from_k8s = load_kubernetes_labels();
  1067. struct label *from_config = load_config_labels();
  1068. struct label *from_tags = load_labels_from_tags();
  1069. struct label *new_labels = merge_label_lists(from_auto, from_k8s);
  1070. new_labels = merge_label_lists(new_labels, from_tags);
  1071. new_labels = merge_label_lists(new_labels, from_config);
  1072. rrdhost_rdlock(localhost);
  1073. replace_label_list(localhost, new_labels);
  1074. health_label_log_save(localhost);
  1075. rrdhost_unlock(localhost);
  1076. /* TODO-GAPS - fix this so that it looks properly at the state and version of the sender
  1077. if(localhost->rrdpush_send_enabled && localhost->rrdpush_sender_buffer){
  1078. localhost->labels_flag |= LABEL_FLAG_UPDATE_STREAM;
  1079. rrdpush_send_labels(localhost);
  1080. }
  1081. */
  1082. health_reload();
  1083. }
  1084. // ----------------------------------------------------------------------------
  1085. // RRDHOST - delete host files
  1086. void rrdhost_delete_charts(RRDHOST *host) {
  1087. if(!host) return;
  1088. info("Deleting database of host '%s'...", host->hostname);
  1089. RRDSET *st;
  1090. // we get a write lock
  1091. // to ensure only one thread is saving the database
  1092. rrdhost_wrlock(host);
  1093. rrdset_foreach_write(st, host) {
  1094. rrdset_rdlock(st);
  1095. rrdset_delete(st);
  1096. rrdset_unlock(st);
  1097. }
  1098. recursively_delete_dir(host->cache_dir, "left over host");
  1099. rrdhost_unlock(host);
  1100. }
  1101. // ----------------------------------------------------------------------------
  1102. // RRDHOST - cleanup host files
  1103. void rrdhost_cleanup_charts(RRDHOST *host) {
  1104. if(!host) return;
  1105. info("Cleaning up database of host '%s'...", host->hostname);
  1106. RRDSET *st;
  1107. uint32_t rrdhost_delete_obsolete_charts = rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS);
  1108. // we get a write lock
  1109. // to ensure only one thread is saving the database
  1110. rrdhost_wrlock(host);
  1111. rrdset_foreach_write(st, host) {
  1112. rrdset_rdlock(st);
  1113. if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))
  1114. rrdset_delete(st);
  1115. else if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS))
  1116. rrdset_delete_obsolete_dimensions(st);
  1117. else
  1118. rrdset_save(st);
  1119. rrdset_unlock(st);
  1120. }
  1121. rrdhost_unlock(host);
  1122. }
  1123. // ----------------------------------------------------------------------------
  1124. // RRDHOST - save all hosts to disk
  1125. void rrdhost_save_all(void) {
  1126. info("Saving database [%zu hosts(s)]...", rrd_hosts_available);
  1127. rrd_rdlock();
  1128. RRDHOST *host;
  1129. rrdhost_foreach_read(host)
  1130. rrdhost_save_charts(host);
  1131. rrd_unlock();
  1132. }
  1133. // ----------------------------------------------------------------------------
  1134. // RRDHOST - save or delete all hosts from disk
  1135. void rrdhost_cleanup_all(void) {
  1136. info("Cleaning up database [%zu hosts(s)]...", rrd_hosts_available);
  1137. rrd_rdlock();
  1138. RRDHOST *host;
  1139. rrdhost_foreach_read(host) {
  1140. if(host != localhost && rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS) && !host->receiver)
  1141. rrdhost_delete_charts(host);
  1142. else
  1143. rrdhost_cleanup_charts(host);
  1144. }
  1145. rrd_unlock();
  1146. }
  1147. // ----------------------------------------------------------------------------
  1148. // RRDHOST - save or delete all the host charts from disk
  1149. void rrdhost_cleanup_obsolete_charts(RRDHOST *host) {
  1150. time_t now = now_realtime_sec();
  1151. RRDSET *st;
  1152. uint32_t rrdhost_delete_obsolete_charts = rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS);
  1153. restart_after_removal:
  1154. rrdset_foreach_write(st, host) {
  1155. if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)
  1156. && st->last_accessed_time + rrdset_free_obsolete_time < now
  1157. && st->last_updated.tv_sec + rrdset_free_obsolete_time < now
  1158. && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now
  1159. )) {
  1160. #ifdef ENABLE_DBENGINE
  1161. if(st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  1162. rrdset_flag_set(st, RRDSET_FLAG_ARCHIVED);
  1163. rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE);
  1164. if (st->dimensions) {
  1165. /* If the chart still has dimensions don't delete it from the metadata log */
  1166. continue;
  1167. }
  1168. metalog_commit_delete_chart(st);
  1169. }
  1170. #endif
  1171. rrdset_rdlock(st);
  1172. if(rrdhost_delete_obsolete_charts)
  1173. rrdset_delete(st);
  1174. else
  1175. rrdset_save(st);
  1176. rrdset_unlock(st);
  1177. rrdset_free(st);
  1178. goto restart_after_removal;
  1179. }
  1180. }
  1181. }
  1182. // ----------------------------------------------------------------------------
  1183. // RRDHOST - set system info from environment variables
  1184. // system_info fields must be heap allocated or NULL
  1185. int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value) {
  1186. int res = 0;
  1187. if (!strcmp(name, "NETDATA_PROTOCOL_VERSION"))
  1188. return res;
  1189. else if(!strcmp(name, "NETDATA_CONTAINER_OS_NAME")){
  1190. freez(system_info->container_os_name);
  1191. system_info->container_os_name = strdupz(value);
  1192. }
  1193. else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID")){
  1194. freez(system_info->container_os_id);
  1195. system_info->container_os_id = strdupz(value);
  1196. }
  1197. else if(!strcmp(name, "NETDATA_CONTAINER_OS_ID_LIKE")){
  1198. freez(system_info->container_os_id_like);
  1199. system_info->container_os_id_like = strdupz(value);
  1200. }
  1201. else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION")){
  1202. freez(system_info->container_os_version);
  1203. system_info->container_os_version = strdupz(value);
  1204. }
  1205. else if(!strcmp(name, "NETDATA_CONTAINER_OS_VERSION_ID")){
  1206. freez(system_info->container_os_version_id);
  1207. system_info->container_os_version_id = strdupz(value);
  1208. }
  1209. else if(!strcmp(name, "NETDATA_CONTAINER_OS_DETECTION")){
  1210. freez(system_info->host_os_detection);
  1211. system_info->host_os_detection = strdupz(value);
  1212. }
  1213. else if(!strcmp(name, "NETDATA_HOST_OS_NAME")){
  1214. freez(system_info->host_os_name);
  1215. system_info->host_os_name = strdupz(value);
  1216. }
  1217. else if(!strcmp(name, "NETDATA_HOST_OS_ID")){
  1218. freez(system_info->host_os_id);
  1219. system_info->host_os_id = strdupz(value);
  1220. }
  1221. else if(!strcmp(name, "NETDATA_HOST_OS_ID_LIKE")){
  1222. freez(system_info->host_os_id_like);
  1223. system_info->host_os_id_like = strdupz(value);
  1224. }
  1225. else if(!strcmp(name, "NETDATA_HOST_OS_VERSION")){
  1226. freez(system_info->host_os_version);
  1227. system_info->host_os_version = strdupz(value);
  1228. }
  1229. else if(!strcmp(name, "NETDATA_HOST_OS_VERSION_ID")){
  1230. freez(system_info->host_os_version_id);
  1231. system_info->host_os_version_id = strdupz(value);
  1232. }
  1233. else if(!strcmp(name, "NETDATA_HOST_OS_DETECTION")){
  1234. freez(system_info->host_os_detection);
  1235. system_info->host_os_detection = strdupz(value);
  1236. }
  1237. else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_NAME")){
  1238. freez(system_info->kernel_name);
  1239. system_info->kernel_name = strdupz(value);
  1240. }
  1241. else if(!strcmp(name, "NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT")){
  1242. freez(system_info->host_cores);
  1243. system_info->host_cores = strdupz(value);
  1244. }
  1245. else if(!strcmp(name, "NETDATA_SYSTEM_CPU_FREQ")){
  1246. freez(system_info->host_cpu_freq);
  1247. system_info->host_cpu_freq = strdupz(value);
  1248. }
  1249. else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_RAM")){
  1250. freez(system_info->host_ram_total);
  1251. system_info->host_ram_total = strdupz(value);
  1252. }
  1253. else if(!strcmp(name, "NETDATA_SYSTEM_TOTAL_DISK_SIZE")){
  1254. freez(system_info->host_disk_space);
  1255. system_info->host_disk_space = strdupz(value);
  1256. }
  1257. else if(!strcmp(name, "NETDATA_SYSTEM_KERNEL_VERSION")){
  1258. freez(system_info->kernel_version);
  1259. system_info->kernel_version = strdupz(value);
  1260. }
  1261. else if(!strcmp(name, "NETDATA_SYSTEM_ARCHITECTURE")){
  1262. freez(system_info->architecture);
  1263. system_info->architecture = strdupz(value);
  1264. }
  1265. else if(!strcmp(name, "NETDATA_SYSTEM_VIRTUALIZATION")){
  1266. freez(system_info->virtualization);
  1267. system_info->virtualization = strdupz(value);
  1268. }
  1269. else if(!strcmp(name, "NETDATA_SYSTEM_VIRT_DETECTION")){
  1270. freez(system_info->virt_detection);
  1271. system_info->virt_detection = strdupz(value);
  1272. }
  1273. else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER")){
  1274. freez(system_info->container);
  1275. system_info->container = strdupz(value);
  1276. }
  1277. else if(!strcmp(name, "NETDATA_SYSTEM_CONTAINER_DETECTION")){
  1278. freez(system_info->container_detection);
  1279. system_info->container_detection = strdupz(value);
  1280. }
  1281. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_VENDOR"))
  1282. return res;
  1283. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_MODEL"))
  1284. return res;
  1285. else if (!strcmp(name, "NETDATA_SYSTEM_CPU_DETECTION"))
  1286. return res;
  1287. else if (!strcmp(name, "NETDATA_SYSTEM_RAM_DETECTION"))
  1288. return res;
  1289. else if (!strcmp(name, "NETDATA_SYSTEM_DISK_DETECTION"))
  1290. return res;
  1291. else {
  1292. res = 1;
  1293. }
  1294. return res;
  1295. }
  1296. /**
  1297. * Alarm Compare ID
  1298. *
  1299. * Callback function used with the binary trees to compare the id of RRDCALC
  1300. *
  1301. * @param a a pointer to the RRDCAL item to insert,compare or update the binary tree
  1302. * @param b the pointer to the binary tree.
  1303. *
  1304. * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b.
  1305. */
  1306. int alarm_compare_id(void *a, void *b) {
  1307. register uint32_t hash1 = ((RRDCALC *)a)->id;
  1308. register uint32_t hash2 = ((RRDCALC *)b)->id;
  1309. if(hash1 < hash2) return -1;
  1310. else if(hash1 > hash2) return 1;
  1311. return 0;
  1312. }
  1313. /**
  1314. * Alarm Compare NAME
  1315. *
  1316. * Callback function used with the binary trees to compare the name of RRDCALC
  1317. *
  1318. * @param a a pointer to the RRDCAL item to insert,compare or update the binary tree
  1319. * @param b the pointer to the binary tree.
  1320. *
  1321. * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b.
  1322. */
  1323. int alarm_compare_name(void *a, void *b) {
  1324. RRDCALC *in1 = (RRDCALC *)a;
  1325. RRDCALC *in2 = (RRDCALC *)b;
  1326. if(in1->hash < in2->hash) return -1;
  1327. else if(in1->hash > in2->hash) return 1;
  1328. return strcmp(in1->name,in2->name);
  1329. }
  1330. // Added for gap-filling, if this proves to be a bottleneck in large-scale systems then we will need to cache
  1331. // the last entry times as the metric updates, but let's see if it is a problem first.
  1332. time_t rrdhost_last_entry_t(RRDHOST *h) {
  1333. rrdhost_rdlock(h);
  1334. RRDSET *st;
  1335. time_t result = 0;
  1336. rrdset_foreach_read(st, h) {
  1337. time_t st_last = rrdset_last_entry_t(st);
  1338. if (st_last > result)
  1339. result = st_last;
  1340. }
  1341. rrdhost_unlock(h);
  1342. return result;
  1343. }