main.c 87 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "common.h"
  3. #include "buildinfo.h"
  4. #include "static_threads.h"
  5. #if defined(ENV32BIT)
  6. #warning COMPILING 32BIT NETDATA
  7. #endif
  8. bool unittest_running = false;
  9. int netdata_zero_metrics_enabled;
  10. int netdata_anonymous_statistics_enabled;
  11. int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  12. struct netdata_static_thread *static_threads;
  13. struct config netdata_config = {
  14. .first_section = NULL,
  15. .last_section = NULL,
  16. .mutex = NETDATA_MUTEX_INITIALIZER,
  17. .index = {
  18. .avl_tree = {
  19. .root = NULL,
  20. .compar = appconfig_section_compare
  21. },
  22. .rwlock = AVL_LOCK_INITIALIZER
  23. }
  24. };
  25. typedef struct service_thread {
  26. pid_t tid;
  27. SERVICE_THREAD_TYPE type;
  28. SERVICE_TYPE services;
  29. char name[NETDATA_THREAD_NAME_MAX + 1];
  30. bool cancelled;
  31. union {
  32. netdata_thread_t netdata_thread;
  33. uv_thread_t uv_thread;
  34. };
  35. force_quit_t force_quit_callback;
  36. request_quit_t request_quit_callback;
  37. void *data;
  38. } SERVICE_THREAD;
  39. struct service_globals {
  40. SERVICE_TYPE running;
  41. SPINLOCK lock;
  42. Pvoid_t pid_judy;
  43. } service_globals = {
  44. .running = ~0,
  45. .pid_judy = NULL,
  46. };
  47. SERVICE_THREAD *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused) {
  48. SERVICE_THREAD *sth = NULL;
  49. pid_t tid = gettid();
  50. netdata_spinlock_lock(&service_globals.lock);
  51. Pvoid_t *PValue = JudyLIns(&service_globals.pid_judy, tid, PJE0);
  52. if(!*PValue) {
  53. sth = callocz(1, sizeof(SERVICE_THREAD));
  54. sth->tid = tid;
  55. sth->type = thread_type;
  56. sth->request_quit_callback = request_quit_callback;
  57. sth->force_quit_callback = force_quit_callback;
  58. sth->data = data;
  59. os_thread_get_current_name_np(sth->name);
  60. *PValue = sth;
  61. switch(thread_type) {
  62. default:
  63. case SERVICE_THREAD_TYPE_NETDATA:
  64. sth->netdata_thread = netdata_thread_self();
  65. break;
  66. case SERVICE_THREAD_TYPE_EVENT_LOOP:
  67. case SERVICE_THREAD_TYPE_LIBUV:
  68. sth->uv_thread = uv_thread_self();
  69. break;
  70. }
  71. }
  72. else {
  73. sth = *PValue;
  74. }
  75. netdata_spinlock_unlock(&service_globals.lock);
  76. return sth;
  77. }
  78. void service_exits(void) {
  79. pid_t tid = gettid();
  80. netdata_spinlock_lock(&service_globals.lock);
  81. Pvoid_t *PValue = JudyLGet(service_globals.pid_judy, tid, PJE0);
  82. if(PValue) {
  83. freez(*PValue);
  84. JudyLDel(&service_globals.pid_judy, tid, PJE0);
  85. }
  86. netdata_spinlock_unlock(&service_globals.lock);
  87. }
  88. bool service_running(SERVICE_TYPE service) {
  89. static __thread SERVICE_THREAD *sth = NULL;
  90. if(unlikely(!sth))
  91. sth = service_register(SERVICE_THREAD_TYPE_NETDATA, NULL, NULL, NULL, false);
  92. if(netdata_exit)
  93. __atomic_store_n(&service_globals.running, 0, __ATOMIC_RELAXED);
  94. if(service == 0)
  95. service = sth->services;
  96. sth->services |= service;
  97. return ((__atomic_load_n(&service_globals.running, __ATOMIC_RELAXED) & service) == service);
  98. }
  99. void service_signal_exit(SERVICE_TYPE service) {
  100. __atomic_and_fetch(&service_globals.running, ~(service), __ATOMIC_RELAXED);
  101. netdata_spinlock_lock(&service_globals.lock);
  102. Pvoid_t *PValue;
  103. Word_t tid = 0;
  104. bool first = true;
  105. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  106. SERVICE_THREAD *sth = *PValue;
  107. if((sth->services & service) && sth->request_quit_callback) {
  108. netdata_spinlock_unlock(&service_globals.lock);
  109. sth->request_quit_callback(sth->data);
  110. netdata_spinlock_lock(&service_globals.lock);
  111. continue;
  112. }
  113. }
  114. netdata_spinlock_unlock(&service_globals.lock);
  115. }
  116. static void service_to_buffer(BUFFER *wb, SERVICE_TYPE service) {
  117. if(service & SERVICE_MAINTENANCE)
  118. buffer_strcat(wb, "MAINTENANCE ");
  119. if(service & SERVICE_COLLECTORS)
  120. buffer_strcat(wb, "COLLECTORS ");
  121. if(service & SERVICE_ML_TRAINING)
  122. buffer_strcat(wb, "ML_TRAINING ");
  123. if(service & SERVICE_ML_PREDICTION)
  124. buffer_strcat(wb, "ML_PREDICTION ");
  125. if(service & SERVICE_REPLICATION)
  126. buffer_strcat(wb, "REPLICATION ");
  127. if(service & ABILITY_DATA_QUERIES)
  128. buffer_strcat(wb, "DATA_QUERIES ");
  129. if(service & ABILITY_WEB_REQUESTS)
  130. buffer_strcat(wb, "WEB_REQUESTS ");
  131. if(service & SERVICE_WEB_SERVER)
  132. buffer_strcat(wb, "WEB_SERVER ");
  133. if(service & SERVICE_ACLK)
  134. buffer_strcat(wb, "ACLK ");
  135. if(service & SERVICE_HEALTH)
  136. buffer_strcat(wb, "HEALTH ");
  137. if(service & SERVICE_STREAMING)
  138. buffer_strcat(wb, "STREAMING ");
  139. if(service & ABILITY_STREAMING_CONNECTIONS)
  140. buffer_strcat(wb, "STREAMING_CONNECTIONS ");
  141. if(service & SERVICE_CONTEXT)
  142. buffer_strcat(wb, "CONTEXT ");
  143. if(service & SERVICE_ANALYTICS)
  144. buffer_strcat(wb, "ANALYTICS ");
  145. if(service & SERVICE_EXPORTERS)
  146. buffer_strcat(wb, "EXPORTERS ");
  147. }
  148. static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) {
  149. BUFFER *service_list = buffer_create(1024, NULL);
  150. BUFFER *thread_list = buffer_create(1024, NULL);
  151. usec_t started_ut = now_monotonic_usec(), ended_ut;
  152. size_t running;
  153. SERVICE_TYPE running_services = 0;
  154. // cancel the threads
  155. running = 0;
  156. running_services = 0;
  157. {
  158. buffer_flush(thread_list);
  159. netdata_spinlock_lock(&service_globals.lock);
  160. Pvoid_t *PValue;
  161. Word_t tid = 0;
  162. bool first = true;
  163. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  164. SERVICE_THREAD *sth = *PValue;
  165. if(sth->services & service && sth->tid != gettid() && !sth->cancelled) {
  166. sth->cancelled = true;
  167. switch(sth->type) {
  168. default:
  169. case SERVICE_THREAD_TYPE_NETDATA:
  170. netdata_thread_cancel(sth->netdata_thread);
  171. break;
  172. case SERVICE_THREAD_TYPE_EVENT_LOOP:
  173. case SERVICE_THREAD_TYPE_LIBUV:
  174. break;
  175. }
  176. if(running)
  177. buffer_strcat(thread_list, ", ");
  178. buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid);
  179. running++;
  180. running_services |= sth->services & service;
  181. if(sth->force_quit_callback) {
  182. netdata_spinlock_unlock(&service_globals.lock);
  183. sth->force_quit_callback(sth->data);
  184. netdata_spinlock_lock(&service_globals.lock);
  185. continue;
  186. }
  187. }
  188. }
  189. netdata_spinlock_unlock(&service_globals.lock);
  190. }
  191. service_signal_exit(service);
  192. // signal them to stop
  193. size_t last_running = 0;
  194. size_t stale_time_ut = 0;
  195. usec_t sleep_ut = 50 * USEC_PER_MS;
  196. size_t log_countdown_ut = sleep_ut;
  197. do {
  198. if(running != last_running)
  199. stale_time_ut = 0;
  200. last_running = running;
  201. running = 0;
  202. running_services = 0;
  203. buffer_flush(thread_list);
  204. netdata_spinlock_lock(&service_globals.lock);
  205. Pvoid_t *PValue;
  206. Word_t tid = 0;
  207. bool first = true;
  208. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  209. SERVICE_THREAD *sth = *PValue;
  210. if(sth->services & service && sth->tid != gettid()) {
  211. if(running)
  212. buffer_strcat(thread_list, ", ");
  213. buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid);
  214. running_services |= sth->services & service;
  215. running++;
  216. }
  217. }
  218. netdata_spinlock_unlock(&service_globals.lock);
  219. if(running) {
  220. log_countdown_ut -= (log_countdown_ut >= sleep_ut) ? sleep_ut : log_countdown_ut;
  221. if(log_countdown_ut == 0 || running != last_running) {
  222. log_countdown_ut = 20 * sleep_ut;
  223. buffer_flush(service_list);
  224. service_to_buffer(service_list, running_services);
  225. info("SERVICE CONTROL: waiting for the following %zu services [ %s] to exit: %s",
  226. running, buffer_tostring(service_list),
  227. running <= 10 ? buffer_tostring(thread_list) : "");
  228. }
  229. sleep_usec(sleep_ut);
  230. stale_time_ut += sleep_ut;
  231. }
  232. ended_ut = now_monotonic_usec();
  233. } while(running && (ended_ut - started_ut < timeout_ut || stale_time_ut < timeout_ut));
  234. if(running) {
  235. buffer_flush(service_list);
  236. service_to_buffer(service_list, running_services);
  237. info("SERVICE CONTROL: "
  238. "the following %zu service(s) [ %s] take too long to exit: %s; "
  239. "giving up on them...",
  240. running, buffer_tostring(service_list),
  241. buffer_tostring(thread_list));
  242. }
  243. buffer_free(thread_list);
  244. buffer_free(service_list);
  245. return (running == 0);
  246. }
  247. #define delta_shutdown_time(msg) \
  248. { \
  249. usec_t now_ut = now_monotonic_usec(); \
  250. if(prev_msg) \
  251. info("NETDATA SHUTDOWN: in %7llu ms, %s%s - next: %s", (now_ut - last_ut) / USEC_PER_MS, (timeout)?"(TIMEOUT) ":"", prev_msg, msg); \
  252. else \
  253. info("NETDATA SHUTDOWN: next: %s", msg); \
  254. last_ut = now_ut; \
  255. prev_msg = msg; \
  256. timeout = false; \
  257. }
  258. void netdata_cleanup_and_exit(int ret) {
  259. usec_t started_ut = now_monotonic_usec();
  260. usec_t last_ut = started_ut;
  261. const char *prev_msg = NULL;
  262. bool timeout = false;
  263. error_log_limit_unlimited();
  264. info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret);
  265. send_statistics("EXIT", ret?"ERROR":"OK","-");
  266. delta_shutdown_time("create shutdown file");
  267. char agent_crash_file[FILENAME_MAX + 1];
  268. char agent_incomplete_shutdown_file[FILENAME_MAX + 1];
  269. snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir);
  270. snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir);
  271. (void) rename(agent_crash_file, agent_incomplete_shutdown_file);
  272. #ifdef ENABLE_DBENGINE
  273. if(dbengine_enabled) {
  274. delta_shutdown_time("dbengine exit mode");
  275. for (size_t tier = 0; tier < storage_tiers; tier++)
  276. rrdeng_exit_mode(multidb_ctx[tier]);
  277. }
  278. #endif
  279. delta_shutdown_time("disable maintenance, new queries, new web requests, new streaming connections and aclk");
  280. service_signal_exit(
  281. SERVICE_MAINTENANCE
  282. | ABILITY_DATA_QUERIES
  283. | ABILITY_WEB_REQUESTS
  284. | ABILITY_STREAMING_CONNECTIONS
  285. | SERVICE_ACLK
  286. );
  287. delta_shutdown_time("stop replication, exporters, ML training, health and web servers threads");
  288. timeout = !service_wait_exit(
  289. SERVICE_REPLICATION
  290. | SERVICE_EXPORTERS
  291. | SERVICE_ML_TRAINING
  292. | SERVICE_HEALTH
  293. | SERVICE_WEB_SERVER
  294. , 3 * USEC_PER_SEC);
  295. delta_shutdown_time("stop collectors and streaming threads");
  296. timeout = !service_wait_exit(
  297. SERVICE_COLLECTORS
  298. | SERVICE_STREAMING
  299. , 3 * USEC_PER_SEC);
  300. delta_shutdown_time("stop ML prediction and context threads");
  301. timeout = !service_wait_exit(
  302. SERVICE_ML_PREDICTION
  303. | SERVICE_CONTEXT
  304. , 3 * USEC_PER_SEC);
  305. delta_shutdown_time("stop maintenance thread");
  306. timeout = !service_wait_exit(
  307. SERVICE_MAINTENANCE
  308. , 3 * USEC_PER_SEC);
  309. delta_shutdown_time("clean rrdhost database");
  310. rrdhost_cleanup_all();
  311. delta_shutdown_time("prepare metasync shutdown");
  312. metadata_sync_shutdown_prepare();
  313. #ifdef ENABLE_ACLK
  314. delta_shutdown_time("signal aclk sync to stop");
  315. aclk_sync_exit_all();
  316. #endif
  317. delta_shutdown_time("stop aclk threads");
  318. timeout = !service_wait_exit(
  319. SERVICE_ACLK
  320. , 3 * USEC_PER_SEC);
  321. delta_shutdown_time("stop all remaining worker threads");
  322. timeout = !service_wait_exit(~0, 10 * USEC_PER_SEC);
  323. delta_shutdown_time("cancel main threads");
  324. cancel_main_threads();
  325. if(!ret) {
  326. // exit cleanly
  327. #ifdef ENABLE_DBENGINE
  328. if(dbengine_enabled) {
  329. delta_shutdown_time("flush dbengine tiers");
  330. for (size_t tier = 0; tier < storage_tiers; tier++)
  331. rrdeng_prepare_exit(multidb_ctx[tier]);
  332. }
  333. #endif
  334. // free the database
  335. delta_shutdown_time("stop collection for all hosts");
  336. // rrdhost_free_all();
  337. rrd_finalize_collection_for_all_hosts();
  338. delta_shutdown_time("stop metasync threads");
  339. metadata_sync_shutdown();
  340. #ifdef ENABLE_DBENGINE
  341. if(dbengine_enabled) {
  342. delta_shutdown_time("wait for dbengine collectors to finish");
  343. size_t running = 1;
  344. while(running) {
  345. running = 0;
  346. for (size_t tier = 0; tier < storage_tiers; tier++)
  347. running += rrdeng_collectors_running(multidb_ctx[tier]);
  348. if(running)
  349. sleep_usec(100 * USEC_PER_MS);
  350. }
  351. delta_shutdown_time("wait for dbengine main cache to finish flushing");
  352. while (pgc_hot_and_dirty_entries(main_cache)) {
  353. pgc_flush_all_hot_and_dirty_pages(main_cache, PGC_SECTION_ALL);
  354. sleep_usec(100 * USEC_PER_MS);
  355. }
  356. delta_shutdown_time("stop dbengine tiers");
  357. for (size_t tier = 0; tier < storage_tiers; tier++)
  358. rrdeng_exit(multidb_ctx[tier]);
  359. }
  360. #endif
  361. }
  362. delta_shutdown_time("close SQL context db");
  363. sql_close_context_database();
  364. delta_shutdown_time("closed SQL main db");
  365. sql_close_database();
  366. // unlink the pid
  367. if(pidfile[0]) {
  368. delta_shutdown_time("remove pid file");
  369. if(unlink(pidfile) != 0)
  370. error("EXIT: cannot unlink pidfile '%s'.", pidfile);
  371. }
  372. #ifdef ENABLE_HTTPS
  373. delta_shutdown_time("free openssl structures");
  374. security_clean_openssl();
  375. #endif
  376. delta_shutdown_time("remove incomplete shutdown file");
  377. (void) unlink(agent_incomplete_shutdown_file);
  378. delta_shutdown_time("exit");
  379. usec_t ended_ut = now_monotonic_usec();
  380. info("NETDATA SHUTDOWN: completed in %llu ms - netdata is now exiting - bye bye...", (ended_ut - started_ut) / USEC_PER_MS);
  381. exit(ret);
  382. }
  383. void web_server_threading_selection(void) {
  384. web_server_mode = web_server_mode_id(config_get(CONFIG_SECTION_WEB, "mode", web_server_mode_name(web_server_mode)));
  385. int static_threaded = (web_server_mode == WEB_SERVER_MODE_STATIC_THREADED);
  386. int i;
  387. for (i = 0; static_threads[i].name; i++) {
  388. if (static_threads[i].start_routine == socket_listen_main_static_threaded)
  389. static_threads[i].enabled = static_threaded;
  390. }
  391. }
  392. int make_dns_decision(const char *section_name, const char *config_name, const char *default_value, SIMPLE_PATTERN *p)
  393. {
  394. char *value = config_get(section_name,config_name,default_value);
  395. if(!strcmp("yes",value))
  396. return 1;
  397. if(!strcmp("no",value))
  398. return 0;
  399. if(strcmp("heuristic",value))
  400. error("Invalid configuration option '%s' for '%s'/'%s'. Valid options are 'yes', 'no' and 'heuristic'. Proceeding with 'heuristic'",
  401. value, section_name, config_name);
  402. return simple_pattern_is_potential_name(p);
  403. }
  404. void web_server_config_options(void)
  405. {
  406. web_client_timeout =
  407. (int)config_get_number(CONFIG_SECTION_WEB, "disconnect idle clients after seconds", web_client_timeout);
  408. web_client_first_request_timeout =
  409. (int)config_get_number(CONFIG_SECTION_WEB, "timeout for first request", web_client_first_request_timeout);
  410. web_client_streaming_rate_t =
  411. config_get_number(CONFIG_SECTION_WEB, "accept a streaming request every seconds", web_client_streaming_rate_t);
  412. respect_web_browser_do_not_track_policy =
  413. config_get_boolean(CONFIG_SECTION_WEB, "respect do not track policy", respect_web_browser_do_not_track_policy);
  414. web_x_frame_options = config_get(CONFIG_SECTION_WEB, "x-frame-options response header", "");
  415. if(!*web_x_frame_options)
  416. web_x_frame_options = NULL;
  417. web_allow_connections_from =
  418. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow connections from", "localhost *"),
  419. NULL, SIMPLE_PATTERN_EXACT);
  420. web_allow_connections_dns =
  421. make_dns_decision(CONFIG_SECTION_WEB, "allow connections by dns", "heuristic", web_allow_connections_from);
  422. web_allow_dashboard_from =
  423. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow dashboard from", "localhost *"),
  424. NULL, SIMPLE_PATTERN_EXACT);
  425. web_allow_dashboard_dns =
  426. make_dns_decision(CONFIG_SECTION_WEB, "allow dashboard by dns", "heuristic", web_allow_dashboard_from);
  427. web_allow_badges_from =
  428. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow badges from", "*"), NULL, SIMPLE_PATTERN_EXACT);
  429. web_allow_badges_dns =
  430. make_dns_decision(CONFIG_SECTION_WEB, "allow badges by dns", "heuristic", web_allow_badges_from);
  431. web_allow_registry_from =
  432. simple_pattern_create(config_get(CONFIG_SECTION_REGISTRY, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT);
  433. web_allow_registry_dns = make_dns_decision(CONFIG_SECTION_REGISTRY, "allow by dns", "heuristic",
  434. web_allow_registry_from);
  435. web_allow_streaming_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow streaming from", "*"),
  436. NULL, SIMPLE_PATTERN_EXACT);
  437. web_allow_streaming_dns = make_dns_decision(CONFIG_SECTION_WEB, "allow streaming by dns", "heuristic",
  438. web_allow_streaming_from);
  439. // Note the default is not heuristic, the wildcards could match DNS but the intent is ip-addresses.
  440. web_allow_netdataconf_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow netdata.conf from",
  441. "localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.*"
  442. " 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.*"
  443. " 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.*"
  444. " 172.31.* UNKNOWN"), NULL, SIMPLE_PATTERN_EXACT);
  445. web_allow_netdataconf_dns =
  446. make_dns_decision(CONFIG_SECTION_WEB, "allow netdata.conf by dns", "no", web_allow_netdataconf_from);
  447. web_allow_mgmt_from =
  448. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow management from", "localhost"),
  449. NULL, SIMPLE_PATTERN_EXACT);
  450. web_allow_mgmt_dns =
  451. make_dns_decision(CONFIG_SECTION_WEB, "allow management by dns","heuristic",web_allow_mgmt_from);
  452. #ifdef NETDATA_WITH_ZLIB
  453. web_enable_gzip = config_get_boolean(CONFIG_SECTION_WEB, "enable gzip compression", web_enable_gzip);
  454. char *s = config_get(CONFIG_SECTION_WEB, "gzip compression strategy", "default");
  455. if(!strcmp(s, "default"))
  456. web_gzip_strategy = Z_DEFAULT_STRATEGY;
  457. else if(!strcmp(s, "filtered"))
  458. web_gzip_strategy = Z_FILTERED;
  459. else if(!strcmp(s, "huffman only"))
  460. web_gzip_strategy = Z_HUFFMAN_ONLY;
  461. else if(!strcmp(s, "rle"))
  462. web_gzip_strategy = Z_RLE;
  463. else if(!strcmp(s, "fixed"))
  464. web_gzip_strategy = Z_FIXED;
  465. else {
  466. error("Invalid compression strategy '%s'. Valid strategies are 'default', 'filtered', 'huffman only', 'rle' and 'fixed'. Proceeding with 'default'.", s);
  467. web_gzip_strategy = Z_DEFAULT_STRATEGY;
  468. }
  469. web_gzip_level = (int)config_get_number(CONFIG_SECTION_WEB, "gzip compression level", 3);
  470. if(web_gzip_level < 1) {
  471. error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 1 (fastest compression).", web_gzip_level);
  472. web_gzip_level = 1;
  473. }
  474. else if(web_gzip_level > 9) {
  475. error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 9 (best compression).", web_gzip_level);
  476. web_gzip_level = 9;
  477. }
  478. #endif /* NETDATA_WITH_ZLIB */
  479. }
  480. // killpid kills pid with SIGTERM.
  481. int killpid(pid_t pid) {
  482. int ret;
  483. debug(D_EXIT, "Request to kill pid %d", pid);
  484. errno = 0;
  485. ret = kill(pid, SIGTERM);
  486. if (ret == -1) {
  487. switch(errno) {
  488. case ESRCH:
  489. // We wanted the process to exit so just let the caller handle.
  490. return ret;
  491. case EPERM:
  492. error("Cannot kill pid %d, but I do not have enough permissions.", pid);
  493. break;
  494. default:
  495. error("Cannot kill pid %d, but I received an error.", pid);
  496. break;
  497. }
  498. }
  499. return ret;
  500. }
  501. static void set_nofile_limit(struct rlimit *rl) {
  502. // get the num files allowed
  503. if(getrlimit(RLIMIT_NOFILE, rl) != 0) {
  504. error("getrlimit(RLIMIT_NOFILE) failed");
  505. return;
  506. }
  507. info("resources control: allowed file descriptors: soft = %zu, max = %zu",
  508. (size_t) rl->rlim_cur, (size_t) rl->rlim_max);
  509. // make the soft/hard limits equal
  510. rl->rlim_cur = rl->rlim_max;
  511. if (setrlimit(RLIMIT_NOFILE, rl) != 0) {
  512. error("setrlimit(RLIMIT_NOFILE, { %zu, %zu }) failed", (size_t)rl->rlim_cur, (size_t)rl->rlim_max);
  513. }
  514. // sanity check to make sure we have enough file descriptors available to open
  515. if (getrlimit(RLIMIT_NOFILE, rl) != 0) {
  516. error("getrlimit(RLIMIT_NOFILE) failed");
  517. return;
  518. }
  519. if (rl->rlim_cur < 1024)
  520. error("Number of open file descriptors allowed for this process is too low (RLIMIT_NOFILE=%zu)", (size_t)rl->rlim_cur);
  521. }
  522. void cancel_main_threads() {
  523. error_log_limit_unlimited();
  524. int i, found = 0;
  525. usec_t max = 5 * USEC_PER_SEC, step = 100000;
  526. for (i = 0; static_threads[i].name != NULL ; i++) {
  527. if(static_threads[i].enabled == NETDATA_MAIN_THREAD_RUNNING) {
  528. info("EXIT: Stopping main thread: %s", static_threads[i].name);
  529. netdata_thread_cancel(*static_threads[i].thread);
  530. found++;
  531. }
  532. }
  533. netdata_exit = 1;
  534. while(found && max > 0) {
  535. max -= step;
  536. info("Waiting %d threads to finish...", found);
  537. sleep_usec(step);
  538. found = 0;
  539. for (i = 0; static_threads[i].name != NULL ; i++) {
  540. if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED)
  541. found++;
  542. }
  543. }
  544. if(found) {
  545. for (i = 0; static_threads[i].name != NULL ; i++) {
  546. if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED)
  547. error("Main thread %s takes too long to exit. Giving up...", static_threads[i].name);
  548. }
  549. }
  550. else
  551. info("All threads finished.");
  552. for (i = 0; static_threads[i].name != NULL ; i++)
  553. freez(static_threads[i].thread);
  554. freez(static_threads);
  555. }
  556. struct option_def option_definitions[] = {
  557. // opt description arg name default value
  558. { 'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME},
  559. { 'D', "Do not fork. Run in the foreground.", NULL, "run in the background"},
  560. { 'd', "Fork. Run in the background.", NULL, "run in the background"},
  561. { 'h', "Display this help message.", NULL, NULL},
  562. { 'P', "File to save a pid while running.", "filename", "do not save pid to a file"},
  563. { 'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"},
  564. { 'p', "API/Web port to use.", "port", "19999"},
  565. { 's', "Prefix for /proc and /sys (for containers).", "path", "no prefix"},
  566. { 't', "The internal clock of netdata.", "seconds", "1"},
  567. { 'u', "Run as user.", "username", "netdata"},
  568. { 'v', "Print netdata version and exit.", NULL, NULL},
  569. { 'V', "Print netdata version and exit.", NULL, NULL},
  570. { 'W', "See Advanced options below.", "options", NULL},
  571. };
  572. int help(int exitcode) {
  573. FILE *stream;
  574. if(exitcode == 0)
  575. stream = stdout;
  576. else
  577. stream = stderr;
  578. int num_opts = sizeof(option_definitions) / sizeof(struct option_def);
  579. int i;
  580. int max_len_arg = 0;
  581. // Compute maximum argument length
  582. for( i = 0; i < num_opts; i++ ) {
  583. if(option_definitions[i].arg_name) {
  584. int len_arg = (int)strlen(option_definitions[i].arg_name);
  585. if(len_arg > max_len_arg) max_len_arg = len_arg;
  586. }
  587. }
  588. if(max_len_arg > 30) max_len_arg = 30;
  589. if(max_len_arg < 20) max_len_arg = 20;
  590. fprintf(stream, "%s", "\n"
  591. " ^\n"
  592. " |.-. .-. .-. .-. . netdata \n"
  593. " | '-' '-' '-' '-' real-time performance monitoring, done right! \n"
  594. " +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n"
  595. "\n"
  596. " Copyright (C) 2016-2022, Netdata, Inc. <info@netdata.cloud>\n"
  597. " Released under GNU General Public License v3 or later.\n"
  598. " All rights reserved.\n"
  599. "\n"
  600. " Home Page : https://netdata.cloud\n"
  601. " Source Code: https://github.com/netdata/netdata\n"
  602. " Docs : https://learn.netdata.cloud\n"
  603. " Support : https://github.com/netdata/netdata/issues\n"
  604. " License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n"
  605. "\n"
  606. " Twitter : https://twitter.com/linuxnetdata\n"
  607. " LinkedIn : https://linkedin.com/company/netdata-cloud/\n"
  608. " Facebook : https://facebook.com/linuxnetdata/\n"
  609. "\n"
  610. "\n"
  611. );
  612. fprintf(stream, " SYNOPSIS: netdata [options]\n");
  613. fprintf(stream, "\n");
  614. fprintf(stream, " Options:\n\n");
  615. // Output options description.
  616. for( i = 0; i < num_opts; i++ ) {
  617. fprintf(stream, " -%c %-*s %s", option_definitions[i].val, max_len_arg, option_definitions[i].arg_name ? option_definitions[i].arg_name : "", option_definitions[i].description);
  618. if(option_definitions[i].default_value) {
  619. fprintf(stream, "\n %c %-*s Default: %s\n", ' ', max_len_arg, "", option_definitions[i].default_value);
  620. } else {
  621. fprintf(stream, "\n");
  622. }
  623. fprintf(stream, "\n");
  624. }
  625. fprintf(stream, "\n Advanced options:\n\n"
  626. " -W stacksize=N Set the stacksize (in bytes).\n\n"
  627. " -W debug_flags=N Set runtime tracing to debug.log.\n\n"
  628. " -W unittest Run internal unittests and exit.\n\n"
  629. " -W sqlite-check Check metadata database integrity and exit.\n\n"
  630. " -W sqlite-fix Check metadata database integrity, fix if needed and exit.\n\n"
  631. " -W sqlite-compact Reclaim metadata database unused space and exit.\n\n"
  632. #ifdef ENABLE_DBENGINE
  633. " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n"
  634. " -W stresstest=A,B,C,D,E,F,G\n"
  635. " Run a DB engine stress test for A seconds,\n"
  636. " with B writers and C readers, with a ramp up\n"
  637. " time of D seconds for writers, a page cache\n"
  638. " size of E MiB, an optional disk space limit\n"
  639. " of F MiB, G libuv workers (default 16) and exit.\n\n"
  640. #endif
  641. " -W set section option value\n"
  642. " set netdata.conf option from the command line.\n\n"
  643. " -W buildinfo Print the version, the configure options,\n"
  644. " a list of optional features, and whether they\n"
  645. " are enabled or not.\n\n"
  646. " -W buildinfojson Print the version, the configure options,\n"
  647. " a list of optional features, and whether they\n"
  648. " are enabled or not, in JSON format.\n\n"
  649. " -W simple-pattern pattern string\n"
  650. " Check if string matches pattern and exit.\n\n"
  651. " -W \"claim -token=TOKEN -rooms=ROOM1,ROOM2\"\n"
  652. " Claim the agent to the workspace rooms pointed to by TOKEN and ROOM*.\n\n"
  653. );
  654. fprintf(stream, "\n Signals netdata handles:\n\n"
  655. " - HUP Close and reopen log files.\n"
  656. " - USR1 Save internal DB to disk.\n"
  657. " - USR2 Reload health configuration.\n"
  658. "\n"
  659. );
  660. fflush(stream);
  661. return exitcode;
  662. }
  663. #ifdef ENABLE_HTTPS
  664. static void security_init(){
  665. char filename[FILENAME_MAX + 1];
  666. snprintfz(filename, FILENAME_MAX, "%s/ssl/key.pem",netdata_configured_user_config_dir);
  667. netdata_ssl_security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename);
  668. snprintfz(filename, FILENAME_MAX, "%s/ssl/cert.pem",netdata_configured_user_config_dir);
  669. netdata_ssl_security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename);
  670. tls_version = config_get(CONFIG_SECTION_WEB, "tls version", "1.3");
  671. tls_ciphers = config_get(CONFIG_SECTION_WEB, "tls ciphers", "none");
  672. security_openssl_library();
  673. }
  674. #endif
  675. static void log_init(void) {
  676. char filename[FILENAME_MAX + 1];
  677. snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir);
  678. stdout_filename = config_get(CONFIG_SECTION_LOGS, "debug", filename);
  679. snprintfz(filename, FILENAME_MAX, "%s/error.log", netdata_configured_log_dir);
  680. stderr_filename = config_get(CONFIG_SECTION_LOGS, "error", filename);
  681. snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir);
  682. stdcollector_filename = config_get(CONFIG_SECTION_LOGS, "collector", filename);
  683. snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir);
  684. stdaccess_filename = config_get(CONFIG_SECTION_LOGS, "access", filename);
  685. snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir);
  686. stdhealth_filename = config_get(CONFIG_SECTION_LOGS, "health", filename);
  687. #ifdef ENABLE_ACLK
  688. aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO);
  689. if (aclklog_enabled) {
  690. snprintfz(filename, FILENAME_MAX, "%s/aclk.log", netdata_configured_log_dir);
  691. aclklog_filename = config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename);
  692. }
  693. #endif
  694. char deffacility[8];
  695. snprintfz(deffacility,7,"%s","daemon");
  696. facility_log = config_get(CONFIG_SECTION_LOGS, "facility", deffacility);
  697. error_log_throttle_period = config_get_number(CONFIG_SECTION_LOGS, "errors flood protection period", error_log_throttle_period);
  698. error_log_errors_per_period = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "errors to trigger flood protection", (long long int)error_log_errors_per_period);
  699. error_log_errors_per_period_backup = error_log_errors_per_period;
  700. setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors flood protection period" , ""), 1);
  701. setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors to trigger flood protection", ""), 1);
  702. }
  703. char *initialize_lock_directory_path(char *prefix)
  704. {
  705. char filename[FILENAME_MAX + 1];
  706. snprintfz(filename, FILENAME_MAX, "%s/lock", prefix);
  707. return config_get(CONFIG_SECTION_DIRECTORIES, "lock", filename);
  708. }
  709. static void backwards_compatible_config() {
  710. // move [global] options to the [web] section
  711. config_move(CONFIG_SECTION_GLOBAL, "http port listen backlog",
  712. CONFIG_SECTION_WEB, "listen backlog");
  713. config_move(CONFIG_SECTION_GLOBAL, "bind socket to IP",
  714. CONFIG_SECTION_WEB, "bind to");
  715. config_move(CONFIG_SECTION_GLOBAL, "bind to",
  716. CONFIG_SECTION_WEB, "bind to");
  717. config_move(CONFIG_SECTION_GLOBAL, "port",
  718. CONFIG_SECTION_WEB, "default port");
  719. config_move(CONFIG_SECTION_GLOBAL, "default port",
  720. CONFIG_SECTION_WEB, "default port");
  721. config_move(CONFIG_SECTION_GLOBAL, "disconnect idle web clients after seconds",
  722. CONFIG_SECTION_WEB, "disconnect idle clients after seconds");
  723. config_move(CONFIG_SECTION_GLOBAL, "respect web browser do not track policy",
  724. CONFIG_SECTION_WEB, "respect do not track policy");
  725. config_move(CONFIG_SECTION_GLOBAL, "web x-frame-options header",
  726. CONFIG_SECTION_WEB, "x-frame-options response header");
  727. config_move(CONFIG_SECTION_GLOBAL, "enable web responses gzip compression",
  728. CONFIG_SECTION_WEB, "enable gzip compression");
  729. config_move(CONFIG_SECTION_GLOBAL, "web compression strategy",
  730. CONFIG_SECTION_WEB, "gzip compression strategy");
  731. config_move(CONFIG_SECTION_GLOBAL, "web compression level",
  732. CONFIG_SECTION_WEB, "gzip compression level");
  733. config_move(CONFIG_SECTION_GLOBAL, "config directory",
  734. CONFIG_SECTION_DIRECTORIES, "config");
  735. config_move(CONFIG_SECTION_GLOBAL, "stock config directory",
  736. CONFIG_SECTION_DIRECTORIES, "stock config");
  737. config_move(CONFIG_SECTION_GLOBAL, "log directory",
  738. CONFIG_SECTION_DIRECTORIES, "log");
  739. config_move(CONFIG_SECTION_GLOBAL, "web files directory",
  740. CONFIG_SECTION_DIRECTORIES, "web");
  741. config_move(CONFIG_SECTION_GLOBAL, "cache directory",
  742. CONFIG_SECTION_DIRECTORIES, "cache");
  743. config_move(CONFIG_SECTION_GLOBAL, "lib directory",
  744. CONFIG_SECTION_DIRECTORIES, "lib");
  745. config_move(CONFIG_SECTION_GLOBAL, "home directory",
  746. CONFIG_SECTION_DIRECTORIES, "home");
  747. config_move(CONFIG_SECTION_GLOBAL, "lock directory",
  748. CONFIG_SECTION_DIRECTORIES, "lock");
  749. config_move(CONFIG_SECTION_GLOBAL, "plugins directory",
  750. CONFIG_SECTION_DIRECTORIES, "plugins");
  751. config_move(CONFIG_SECTION_HEALTH, "health configuration directory",
  752. CONFIG_SECTION_DIRECTORIES, "health config");
  753. config_move(CONFIG_SECTION_HEALTH, "stock health configuration directory",
  754. CONFIG_SECTION_DIRECTORIES, "stock health config");
  755. config_move(CONFIG_SECTION_REGISTRY, "registry db directory",
  756. CONFIG_SECTION_DIRECTORIES, "registry");
  757. config_move(CONFIG_SECTION_GLOBAL, "debug log",
  758. CONFIG_SECTION_LOGS, "debug");
  759. config_move(CONFIG_SECTION_GLOBAL, "error log",
  760. CONFIG_SECTION_LOGS, "error");
  761. config_move(CONFIG_SECTION_GLOBAL, "access log",
  762. CONFIG_SECTION_LOGS, "access");
  763. config_move(CONFIG_SECTION_GLOBAL, "facility log",
  764. CONFIG_SECTION_LOGS, "facility");
  765. config_move(CONFIG_SECTION_GLOBAL, "errors flood protection period",
  766. CONFIG_SECTION_LOGS, "errors flood protection period");
  767. config_move(CONFIG_SECTION_GLOBAL, "errors to trigger flood protection",
  768. CONFIG_SECTION_LOGS, "errors to trigger flood protection");
  769. config_move(CONFIG_SECTION_GLOBAL, "debug flags",
  770. CONFIG_SECTION_LOGS, "debug flags");
  771. config_move(CONFIG_SECTION_GLOBAL, "TZ environment variable",
  772. CONFIG_SECTION_ENV_VARS, "TZ");
  773. config_move(CONFIG_SECTION_PLUGINS, "PATH environment variable",
  774. CONFIG_SECTION_ENV_VARS, "PATH");
  775. config_move(CONFIG_SECTION_PLUGINS, "PYTHONPATH environment variable",
  776. CONFIG_SECTION_ENV_VARS, "PYTHONPATH");
  777. config_move(CONFIG_SECTION_STATSD, "enabled",
  778. CONFIG_SECTION_PLUGINS, "statsd");
  779. config_move(CONFIG_SECTION_GLOBAL, "memory mode",
  780. CONFIG_SECTION_DB, "mode");
  781. config_move(CONFIG_SECTION_GLOBAL, "history",
  782. CONFIG_SECTION_DB, "retention");
  783. config_move(CONFIG_SECTION_GLOBAL, "update every",
  784. CONFIG_SECTION_DB, "update every");
  785. config_move(CONFIG_SECTION_GLOBAL, "page cache size",
  786. CONFIG_SECTION_DB, "dbengine page cache size MB");
  787. config_move(CONFIG_SECTION_DB, "page cache size",
  788. CONFIG_SECTION_DB, "dbengine page cache size MB");
  789. config_move(CONFIG_SECTION_GLOBAL, "page cache uses malloc",
  790. CONFIG_SECTION_DB, "dbengine page cache with malloc");
  791. config_move(CONFIG_SECTION_DB, "page cache with malloc",
  792. CONFIG_SECTION_DB, "dbengine page cache with malloc");
  793. config_move(CONFIG_SECTION_GLOBAL, "dbengine disk space",
  794. CONFIG_SECTION_DB, "dbengine disk space MB");
  795. config_move(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space",
  796. CONFIG_SECTION_DB, "dbengine multihost disk space MB");
  797. config_move(CONFIG_SECTION_GLOBAL, "memory deduplication (ksm)",
  798. CONFIG_SECTION_DB, "memory deduplication (ksm)");
  799. config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch timeout",
  800. CONFIG_SECTION_DB, "dbengine page fetch timeout secs");
  801. config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch retries",
  802. CONFIG_SECTION_DB, "dbengine page fetch retries");
  803. config_move(CONFIG_SECTION_GLOBAL, "dbengine extent pages",
  804. CONFIG_SECTION_DB, "dbengine pages per extent");
  805. config_move(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds",
  806. CONFIG_SECTION_DB, "cleanup obsolete charts after secs");
  807. config_move(CONFIG_SECTION_GLOBAL, "gap when lost iterations above",
  808. CONFIG_SECTION_DB, "gap when lost iterations above");
  809. config_move(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds",
  810. CONFIG_SECTION_DB, "cleanup orphan hosts after secs");
  811. config_move(CONFIG_SECTION_GLOBAL, "delete obsolete charts files",
  812. CONFIG_SECTION_DB, "delete obsolete charts files");
  813. config_move(CONFIG_SECTION_GLOBAL, "delete orphan hosts files",
  814. CONFIG_SECTION_DB, "delete orphan hosts files");
  815. config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics",
  816. CONFIG_SECTION_DB, "enable zero metrics");
  817. }
  818. static void get_netdata_configured_variables() {
  819. backwards_compatible_config();
  820. // ------------------------------------------------------------------------
  821. // get the hostname
  822. char buf[HOSTNAME_MAX + 1];
  823. if(gethostname(buf, HOSTNAME_MAX) == -1){
  824. error("Cannot get machine hostname.");
  825. }
  826. netdata_configured_hostname = config_get(CONFIG_SECTION_GLOBAL, "hostname", buf);
  827. debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname);
  828. // ------------------------------------------------------------------------
  829. // get default database update frequency
  830. default_rrd_update_every = (int) config_get_number(CONFIG_SECTION_DB, "update every", UPDATE_EVERY);
  831. if(default_rrd_update_every < 1 || default_rrd_update_every > 600) {
  832. error("Invalid data collection frequency (update every) %d given. Defaulting to %d.", default_rrd_update_every, UPDATE_EVERY);
  833. default_rrd_update_every = UPDATE_EVERY;
  834. config_set_number(CONFIG_SECTION_DB, "update every", default_rrd_update_every);
  835. }
  836. // ------------------------------------------------------------------------
  837. // get default memory mode for the database
  838. {
  839. const char *mode = config_get(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode));
  840. default_rrd_memory_mode = rrd_memory_mode_id(mode);
  841. if(strcmp(mode, rrd_memory_mode_name(default_rrd_memory_mode)) != 0) {
  842. error("Invalid memory mode '%s' given. Using '%s'", mode, rrd_memory_mode_name(default_rrd_memory_mode));
  843. config_set(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode));
  844. }
  845. }
  846. // ------------------------------------------------------------------------
  847. // get default database size
  848. if(default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && default_rrd_memory_mode != RRD_MEMORY_MODE_NONE) {
  849. default_rrd_history_entries = (int)config_get_number(
  850. CONFIG_SECTION_DB, "retention",
  851. align_entries_to_pagesize(default_rrd_memory_mode, RRD_DEFAULT_HISTORY_ENTRIES));
  852. long h = align_entries_to_pagesize(default_rrd_memory_mode, default_rrd_history_entries);
  853. if (h != default_rrd_history_entries) {
  854. config_set_number(CONFIG_SECTION_DB, "retention", h);
  855. default_rrd_history_entries = (int)h;
  856. }
  857. }
  858. // ------------------------------------------------------------------------
  859. // get system paths
  860. netdata_configured_user_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "config", netdata_configured_user_config_dir);
  861. netdata_configured_stock_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "stock config", netdata_configured_stock_config_dir);
  862. netdata_configured_log_dir = config_get(CONFIG_SECTION_DIRECTORIES, "log", netdata_configured_log_dir);
  863. netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir);
  864. netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir);
  865. netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir);
  866. char *env_home=getenv("HOME");
  867. netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", env_home?env_home:netdata_configured_home_dir);
  868. netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir);
  869. {
  870. pluginsd_initialize_plugin_directories();
  871. netdata_configured_primary_plugins_dir = plugin_directories[PLUGINSD_STOCK_PLUGINS_DIRECTORY_PATH];
  872. }
  873. #ifdef ENABLE_DBENGINE
  874. // ------------------------------------------------------------------------
  875. // get default Database Engine page cache size in MiB
  876. default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
  877. db_engine_journal_check = config_get_boolean(CONFIG_SECTION_DB, "dbengine enable journal integrity check", CONFIG_BOOLEAN_NO);
  878. if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) {
  879. error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB);
  880. default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
  881. config_set_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
  882. }
  883. // ------------------------------------------------------------------------
  884. // get default Database Engine disk space quota in MiB
  885. default_rrdeng_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
  886. if(default_rrdeng_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
  887. error("Invalid dbengine disk space %d given. Defaulting to %d.", default_rrdeng_disk_quota_mb, RRDENG_MIN_DISK_SPACE_MB);
  888. default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB;
  889. config_set_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
  890. }
  891. default_multidb_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", compute_multidb_diskspace());
  892. if(default_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
  893. error("Invalid multidb disk space %d given. Defaulting to %d.", default_multidb_disk_quota_mb, default_rrdeng_disk_quota_mb);
  894. default_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb;
  895. config_set_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", default_multidb_disk_quota_mb);
  896. }
  897. #else
  898. if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  899. error_report("RRD_MEMORY_MODE_DBENGINE is not supported in this platform. The agent will use db mode 'save' instead.");
  900. default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE;
  901. }
  902. #endif
  903. // ------------------------------------------------------------------------
  904. netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", "");
  905. verify_netdata_host_prefix();
  906. // --------------------------------------------------------------------
  907. // get KSM settings
  908. #ifdef MADV_MERGEABLE
  909. enable_ksm = config_get_boolean(CONFIG_SECTION_DB, "memory deduplication (ksm)", enable_ksm);
  910. #endif
  911. // --------------------------------------------------------------------
  912. // metric correlations
  913. enable_metric_correlations = config_get_boolean(CONFIG_SECTION_GLOBAL, "enable metric correlations", enable_metric_correlations);
  914. default_metric_correlations_method = weights_string_to_method(config_get(
  915. CONFIG_SECTION_GLOBAL, "metric correlations method",
  916. weights_method_to_string(default_metric_correlations_method)));
  917. // --------------------------------------------------------------------
  918. rrdset_free_obsolete_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
  919. // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short
  920. // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at
  921. // https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information.
  922. if (rrdset_free_obsolete_time_s < 10) {
  923. rrdset_free_obsolete_time_s = 10;
  924. info("The \"cleanup obsolete charts after seconds\" option was set to 10 seconds.");
  925. config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
  926. }
  927. gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above);
  928. if (gap_when_lost_iterations_above < 1) {
  929. gap_when_lost_iterations_above = 1;
  930. config_set_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above);
  931. }
  932. gap_when_lost_iterations_above += 2;
  933. // --------------------------------------------------------------------
  934. // get various system parameters
  935. get_system_HZ();
  936. get_system_cpus_uncached();
  937. get_system_pid_max();
  938. }
  939. int load_netdata_conf(char *filename, char overwrite_used) {
  940. errno = 0;
  941. int ret = 0;
  942. if(filename && *filename) {
  943. ret = config_load(filename, overwrite_used, NULL);
  944. if(!ret)
  945. error("CONFIG: cannot load config file '%s'.", filename);
  946. }
  947. else {
  948. filename = strdupz_path_subpath(netdata_configured_user_config_dir, "netdata.conf");
  949. ret = config_load(filename, overwrite_used, NULL);
  950. if(!ret) {
  951. info("CONFIG: cannot load user config '%s'. Will try the stock version.", filename);
  952. freez(filename);
  953. filename = strdupz_path_subpath(netdata_configured_stock_config_dir, "netdata.conf");
  954. ret = config_load(filename, overwrite_used, NULL);
  955. if(!ret)
  956. info("CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename);
  957. }
  958. freez(filename);
  959. }
  960. return ret;
  961. }
  962. // coverity[ +tainted_string_sanitize_content : arg-0 ]
  963. static inline void coverity_remove_taint(char *s)
  964. {
  965. (void)s;
  966. }
  967. int get_system_info(struct rrdhost_system_info *system_info) {
  968. char *script;
  969. script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("system-info.sh") + 2));
  970. sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "system-info.sh");
  971. if (unlikely(access(script, R_OK) != 0)) {
  972. info("System info script %s not found.",script);
  973. freez(script);
  974. return 1;
  975. }
  976. pid_t command_pid;
  977. info("Executing %s", script);
  978. FILE *fp_child_input;
  979. FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input);
  980. if(fp_child_output) {
  981. char line[200 + 1];
  982. // Removed the double strlens, if the Coverity tainted string warning reappears I'll revert.
  983. // One time init code, but I'm curious about the warning...
  984. while (fgets(line, 200, fp_child_output) != NULL) {
  985. char *value=line;
  986. while (*value && *value != '=') value++;
  987. if (*value=='=') {
  988. *value='\0';
  989. value++;
  990. char *end = value;
  991. while (*end && *end != '\n') end++;
  992. *end = '\0'; // Overwrite newline if present
  993. coverity_remove_taint(line); // I/O is controlled result of system_info.sh - not tainted
  994. coverity_remove_taint(value);
  995. if(unlikely(rrdhost_set_system_info_variable(system_info, line, value))) {
  996. info("Unexpected environment variable %s=%s", line, value);
  997. }
  998. else {
  999. info("%s=%s", line, value);
  1000. setenv(line, value, 1);
  1001. }
  1002. }
  1003. }
  1004. netdata_pclose(fp_child_input, fp_child_output, command_pid);
  1005. }
  1006. freez(script);
  1007. return 0;
  1008. }
  1009. void set_silencers_filename() {
  1010. char filename[FILENAME_MAX + 1];
  1011. snprintfz(filename, FILENAME_MAX, "%s/health.silencers.json", netdata_configured_varlib_dir);
  1012. silencers_filename = config_get(CONFIG_SECTION_HEALTH, "silencers file", filename);
  1013. }
  1014. /* Any config setting that can be accessed without a default value i.e. configget(...,...,NULL) *MUST*
  1015. be set in this procedure to be called in all the relevant code paths.
  1016. */
  1017. void post_conf_load(char **user)
  1018. {
  1019. // --------------------------------------------------------------------
  1020. // get the user we should run
  1021. // IMPORTANT: this is required before web_files_uid()
  1022. if(getuid() == 0) {
  1023. *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", NETDATA_USER);
  1024. }
  1025. else {
  1026. struct passwd *passwd = getpwuid(getuid());
  1027. *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", (passwd && passwd->pw_name)?passwd->pw_name:"");
  1028. }
  1029. // --------------------------------------------------------------------
  1030. // Check if the cloud is enabled
  1031. #if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
  1032. netdata_cloud_setting = 0;
  1033. #else
  1034. netdata_cloud_setting = appconfig_get_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", 1);
  1035. #endif
  1036. // This must be set before any point in the code that accesses it. Do not move it from this function.
  1037. appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
  1038. }
  1039. #define delta_startup_time(msg) \
  1040. { \
  1041. usec_t now_ut = now_monotonic_usec(); \
  1042. if(prev_msg) \
  1043. info("NETDATA STARTUP: in %7llu ms, %s - next: %s", (now_ut - last_ut) / USEC_PER_MS, prev_msg, msg); \
  1044. else \
  1045. info("NETDATA STARTUP: next: %s", msg); \
  1046. last_ut = now_ut; \
  1047. prev_msg = msg; \
  1048. }
  1049. int pgc_unittest(void);
  1050. int mrg_unittest(void);
  1051. int julytest(void);
  1052. int main(int argc, char **argv) {
  1053. // initialize the system clocks
  1054. clocks_init();
  1055. usec_t started_ut = now_monotonic_usec();
  1056. usec_t last_ut = started_ut;
  1057. const char *prev_msg = NULL;
  1058. // Initialize stderror avoiding coredump when info() or error() is called
  1059. stderror = stderr;
  1060. int i;
  1061. int config_loaded = 0;
  1062. int dont_fork = 0;
  1063. bool close_open_fds = true;
  1064. size_t default_stacksize;
  1065. char *user = NULL;
  1066. static_threads = static_threads_get();
  1067. netdata_ready=0;
  1068. // set the name for logging
  1069. program_name = "netdata";
  1070. if (argc > 1 && strcmp(argv[1], SPAWN_SERVER_COMMAND_LINE_ARGUMENT) == 0) {
  1071. // don't run netdata, this is the spawn server
  1072. spawn_server();
  1073. exit(0);
  1074. }
  1075. // parse options
  1076. {
  1077. int num_opts = sizeof(option_definitions) / sizeof(struct option_def);
  1078. char optstring[(num_opts * 2) + 1];
  1079. int string_i = 0;
  1080. for( i = 0; i < num_opts; i++ ) {
  1081. optstring[string_i] = option_definitions[i].val;
  1082. string_i++;
  1083. if(option_definitions[i].arg_name) {
  1084. optstring[string_i] = ':';
  1085. string_i++;
  1086. }
  1087. }
  1088. // terminate optstring
  1089. optstring[string_i] ='\0';
  1090. optstring[(num_opts *2)] ='\0';
  1091. int opt;
  1092. while( (opt = getopt(argc, argv, optstring)) != -1 ) {
  1093. switch(opt) {
  1094. case 'c':
  1095. if(load_netdata_conf(optarg, 1) != 1) {
  1096. error("Cannot load configuration file %s.", optarg);
  1097. return 1;
  1098. }
  1099. else {
  1100. debug(D_OPTIONS, "Configuration loaded from %s.", optarg);
  1101. post_conf_load(&user);
  1102. load_cloud_conf(1);
  1103. config_loaded = 1;
  1104. }
  1105. break;
  1106. case 'D':
  1107. dont_fork = 1;
  1108. break;
  1109. case 'd':
  1110. dont_fork = 0;
  1111. break;
  1112. case 'h':
  1113. return help(0);
  1114. case 'i':
  1115. config_set(CONFIG_SECTION_WEB, "bind to", optarg);
  1116. break;
  1117. case 'P':
  1118. strncpy(pidfile, optarg, FILENAME_MAX);
  1119. pidfile[FILENAME_MAX] = '\0';
  1120. break;
  1121. case 'p':
  1122. config_set(CONFIG_SECTION_GLOBAL, "default port", optarg);
  1123. break;
  1124. case 's':
  1125. config_set(CONFIG_SECTION_GLOBAL, "host access prefix", optarg);
  1126. break;
  1127. case 't':
  1128. config_set(CONFIG_SECTION_GLOBAL, "update every", optarg);
  1129. break;
  1130. case 'u':
  1131. config_set(CONFIG_SECTION_GLOBAL, "run as user", optarg);
  1132. break;
  1133. case 'v':
  1134. case 'V':
  1135. printf("%s %s\n", program_name, program_version);
  1136. return 0;
  1137. case 'W':
  1138. {
  1139. char* stacksize_string = "stacksize=";
  1140. char* debug_flags_string = "debug_flags=";
  1141. char* claim_string = "claim";
  1142. #ifdef ENABLE_DBENGINE
  1143. char* createdataset_string = "createdataset=";
  1144. char* stresstest_string = "stresstest=";
  1145. #endif
  1146. if(strcmp(optarg, "sqlite-check") == 0) {
  1147. sql_init_database(DB_CHECK_INTEGRITY, 0);
  1148. return 0;
  1149. }
  1150. if(strcmp(optarg, "sqlite-fix") == 0) {
  1151. sql_init_database(DB_CHECK_FIX_DB, 0);
  1152. return 0;
  1153. }
  1154. if(strcmp(optarg, "sqlite-compact") == 0) {
  1155. sql_init_database(DB_CHECK_RECLAIM_SPACE, 0);
  1156. return 0;
  1157. }
  1158. if(strcmp(optarg, "unittest") == 0) {
  1159. unittest_running = true;
  1160. if (unit_test_static_threads())
  1161. return 1;
  1162. if (unit_test_buffer())
  1163. return 1;
  1164. if (unit_test_str2ld())
  1165. return 1;
  1166. if (unit_test_bitmap256())
  1167. return 1;
  1168. // No call to load the config file on this code-path
  1169. post_conf_load(&user);
  1170. get_netdata_configured_variables();
  1171. default_rrd_update_every = 1;
  1172. default_rrd_memory_mode = RRD_MEMORY_MODE_RAM;
  1173. default_health_enabled = 0;
  1174. storage_tiers = 1;
  1175. registry_init();
  1176. if(rrd_init("unittest", NULL, true)) {
  1177. fprintf(stderr, "rrd_init failed for unittest\n");
  1178. return 1;
  1179. }
  1180. default_rrdpush_enabled = 0;
  1181. if(run_all_mockup_tests()) return 1;
  1182. if(unit_test_storage()) return 1;
  1183. #ifdef ENABLE_DBENGINE
  1184. if(test_dbengine()) return 1;
  1185. #endif
  1186. if(test_sqlite()) return 1;
  1187. if(string_unittest(10000)) return 1;
  1188. if (dictionary_unittest(10000))
  1189. return 1;
  1190. if(aral_unittest(10000))
  1191. return 1;
  1192. if (rrdlabels_unittest())
  1193. return 1;
  1194. if (ctx_unittest())
  1195. return 1;
  1196. fprintf(stderr, "\n\nALL TESTS PASSED\n\n");
  1197. return 0;
  1198. }
  1199. else if(strcmp(optarg, "escapetest") == 0) {
  1200. return command_argument_sanitization_tests();
  1201. }
  1202. #ifdef ENABLE_DBENGINE
  1203. else if(strcmp(optarg, "mctest") == 0) {
  1204. unittest_running = true;
  1205. return mc_unittest();
  1206. }
  1207. else if(strcmp(optarg, "ctxtest") == 0) {
  1208. unittest_running = true;
  1209. return ctx_unittest();
  1210. }
  1211. else if(strcmp(optarg, "dicttest") == 0) {
  1212. unittest_running = true;
  1213. return dictionary_unittest(10000);
  1214. }
  1215. else if(strcmp(optarg, "araltest") == 0) {
  1216. unittest_running = true;
  1217. return aral_unittest(10000);
  1218. }
  1219. else if(strcmp(optarg, "stringtest") == 0) {
  1220. unittest_running = true;
  1221. return string_unittest(10000);
  1222. }
  1223. else if(strcmp(optarg, "rrdlabelstest") == 0) {
  1224. unittest_running = true;
  1225. return rrdlabels_unittest();
  1226. }
  1227. else if(strcmp(optarg, "metatest") == 0) {
  1228. unittest_running = true;
  1229. return metadata_unittest();
  1230. }
  1231. else if(strcmp(optarg, "pgctest") == 0) {
  1232. unittest_running = true;
  1233. return pgc_unittest();
  1234. }
  1235. else if(strcmp(optarg, "mrgtest") == 0) {
  1236. unittest_running = true;
  1237. return mrg_unittest();
  1238. }
  1239. else if(strcmp(optarg, "julytest") == 0) {
  1240. unittest_running = true;
  1241. return julytest();
  1242. }
  1243. else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) {
  1244. optarg += strlen(createdataset_string);
  1245. unsigned history_seconds = strtoul(optarg, NULL, 0);
  1246. generate_dbengine_dataset(history_seconds);
  1247. return 0;
  1248. }
  1249. else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) {
  1250. char *endptr;
  1251. unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0,
  1252. page_cache_mb = 0, disk_space_mb = 0, workers = 16;
  1253. optarg += strlen(stresstest_string);
  1254. test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0);
  1255. if (',' == *endptr)
  1256. dset_charts = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1257. if (',' == *endptr)
  1258. query_threads = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1259. if (',' == *endptr)
  1260. ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1261. if (',' == *endptr)
  1262. page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1263. if (',' == *endptr)
  1264. disk_space_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1265. if (',' == *endptr)
  1266. workers = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1267. if (workers > 1024)
  1268. workers = 1024;
  1269. char workers_str[16];
  1270. snprintf(workers_str, 15, "%u", workers);
  1271. setenv("UV_THREADPOOL_SIZE", workers_str, 1);
  1272. dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds,
  1273. page_cache_mb, disk_space_mb);
  1274. return 0;
  1275. }
  1276. #endif
  1277. else if(strcmp(optarg, "simple-pattern") == 0) {
  1278. if(optind + 2 > argc) {
  1279. fprintf(stderr, "%s", "\nUSAGE: -W simple-pattern 'pattern' 'string'\n\n"
  1280. " Checks if 'pattern' matches the given 'string'.\n"
  1281. " - 'pattern' can be one or more space separated words.\n"
  1282. " - each 'word' can contain one or more asterisks.\n"
  1283. " - words starting with '!' give negative matches.\n"
  1284. " - words are processed left to right\n"
  1285. "\n"
  1286. "Examples:\n"
  1287. "\n"
  1288. " > match all veth interfaces, except veth0:\n"
  1289. "\n"
  1290. " -W simple-pattern '!veth0 veth*' 'veth12'\n"
  1291. "\n"
  1292. "\n"
  1293. " > match all *.ext files directly in /path/:\n"
  1294. " (this will not match *.ext files in a subdir of /path/)\n"
  1295. "\n"
  1296. " -W simple-pattern '!/path/*/*.ext /path/*.ext' '/path/test.ext'\n"
  1297. "\n"
  1298. );
  1299. return 1;
  1300. }
  1301. const char *haystack = argv[optind];
  1302. const char *needle = argv[optind + 1];
  1303. size_t len = strlen(needle) + 1;
  1304. char wildcarded[len];
  1305. SIMPLE_PATTERN *p = simple_pattern_create(haystack, NULL, SIMPLE_PATTERN_EXACT);
  1306. int ret = simple_pattern_matches_extract(p, needle, wildcarded, len);
  1307. simple_pattern_free(p);
  1308. if(ret) {
  1309. fprintf(stdout, "RESULT: MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded);
  1310. return 0;
  1311. }
  1312. else {
  1313. fprintf(stdout, "RESULT: NOT MATCHED - pattern '%s' does not match '%s', wildcarded '%s'\n", haystack, needle, wildcarded);
  1314. return 1;
  1315. }
  1316. }
  1317. else if(strncmp(optarg, stacksize_string, strlen(stacksize_string)) == 0) {
  1318. optarg += strlen(stacksize_string);
  1319. config_set(CONFIG_SECTION_GLOBAL, "pthread stack size", optarg);
  1320. }
  1321. else if(strncmp(optarg, debug_flags_string, strlen(debug_flags_string)) == 0) {
  1322. optarg += strlen(debug_flags_string);
  1323. config_set(CONFIG_SECTION_LOGS, "debug flags", optarg);
  1324. debug_flags = strtoull(optarg, NULL, 0);
  1325. }
  1326. else if(strcmp(optarg, "set") == 0) {
  1327. if(optind + 3 > argc) {
  1328. fprintf(stderr, "%s", "\nUSAGE: -W set 'section' 'key' 'value'\n\n"
  1329. " Overwrites settings of netdata.conf.\n"
  1330. "\n"
  1331. " These options interact with: -c netdata.conf\n"
  1332. " If -c netdata.conf is given on the command line,\n"
  1333. " before -W set... the user may overwrite command\n"
  1334. " line parameters at netdata.conf\n"
  1335. " If -c netdata.conf is given after (or missing)\n"
  1336. " -W set... the user cannot overwrite the command line\n"
  1337. " parameters."
  1338. "\n"
  1339. );
  1340. return 1;
  1341. }
  1342. const char *section = argv[optind];
  1343. const char *key = argv[optind + 1];
  1344. const char *value = argv[optind + 2];
  1345. optind += 3;
  1346. // set this one as the default
  1347. // only if it is not already set in the config file
  1348. // so the caller can use -c netdata.conf before or
  1349. // after this parameter to prevent or allow overwriting
  1350. // variables at netdata.conf
  1351. config_set_default(section, key, value);
  1352. // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value);
  1353. }
  1354. else if(strcmp(optarg, "set2") == 0) {
  1355. if(optind + 4 > argc) {
  1356. fprintf(stderr, "%s", "\nUSAGE: -W set 'conf_file' 'section' 'key' 'value'\n\n"
  1357. " Overwrites settings of netdata.conf or cloud.conf\n"
  1358. "\n"
  1359. " These options interact with: -c netdata.conf\n"
  1360. " If -c netdata.conf is given on the command line,\n"
  1361. " before -W set... the user may overwrite command\n"
  1362. " line parameters at netdata.conf\n"
  1363. " If -c netdata.conf is given after (or missing)\n"
  1364. " -W set... the user cannot overwrite the command line\n"
  1365. " parameters."
  1366. " conf_file can be \"cloud\" or \"netdata\".\n"
  1367. "\n"
  1368. );
  1369. return 1;
  1370. }
  1371. const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */
  1372. struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config;
  1373. const char *section = argv[optind + 1];
  1374. const char *key = argv[optind + 2];
  1375. const char *value = argv[optind + 3];
  1376. optind += 4;
  1377. // set this one as the default
  1378. // only if it is not already set in the config file
  1379. // so the caller can use -c netdata.conf before or
  1380. // after this parameter to prevent or allow overwriting
  1381. // variables at netdata.conf
  1382. appconfig_set_default(tmp_config, section, key, value);
  1383. // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value);
  1384. }
  1385. else if(strcmp(optarg, "get") == 0) {
  1386. if(optind + 3 > argc) {
  1387. fprintf(stderr, "%s", "\nUSAGE: -W get 'section' 'key' 'value'\n\n"
  1388. " Prints settings of netdata.conf.\n"
  1389. "\n"
  1390. " These options interact with: -c netdata.conf\n"
  1391. " -c netdata.conf has to be given before -W get.\n"
  1392. "\n"
  1393. );
  1394. return 1;
  1395. }
  1396. if(!config_loaded) {
  1397. fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n");
  1398. load_netdata_conf(NULL, 0);
  1399. post_conf_load(&user);
  1400. }
  1401. get_netdata_configured_variables();
  1402. const char *section = argv[optind];
  1403. const char *key = argv[optind + 1];
  1404. const char *def = argv[optind + 2];
  1405. const char *value = config_get(section, key, def);
  1406. printf("%s\n", value);
  1407. return 0;
  1408. }
  1409. else if(strcmp(optarg, "get2") == 0) {
  1410. if(optind + 4 > argc) {
  1411. fprintf(stderr, "%s", "\nUSAGE: -W get2 'conf_file' 'section' 'key' 'value'\n\n"
  1412. " Prints settings of netdata.conf or cloud.conf\n"
  1413. "\n"
  1414. " These options interact with: -c netdata.conf\n"
  1415. " -c netdata.conf has to be given before -W get2.\n"
  1416. " conf_file can be \"cloud\" or \"netdata\".\n"
  1417. "\n"
  1418. );
  1419. return 1;
  1420. }
  1421. if(!config_loaded) {
  1422. fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n");
  1423. load_netdata_conf(NULL, 0);
  1424. post_conf_load(&user);
  1425. load_cloud_conf(1);
  1426. }
  1427. get_netdata_configured_variables();
  1428. const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */
  1429. struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config;
  1430. const char *section = argv[optind + 1];
  1431. const char *key = argv[optind + 2];
  1432. const char *def = argv[optind + 3];
  1433. const char *value = appconfig_get(tmp_config, section, key, def);
  1434. printf("%s\n", value);
  1435. return 0;
  1436. }
  1437. else if(strncmp(optarg, claim_string, strlen(claim_string)) == 0) {
  1438. /* will trigger a claiming attempt when the agent is initialized */
  1439. claiming_pending_arguments = optarg + strlen(claim_string);
  1440. }
  1441. else if(strcmp(optarg, "buildinfo") == 0) {
  1442. printf("Version: %s %s\n", program_name, program_version);
  1443. print_build_info();
  1444. return 0;
  1445. }
  1446. else if(strcmp(optarg, "buildinfojson") == 0) {
  1447. print_build_info_json();
  1448. return 0;
  1449. }
  1450. else if(strcmp(optarg, "keepopenfds") == 0) {
  1451. // Internal dev option to skip closing inherited
  1452. // open FDs. Useful, when we want to run the agent
  1453. // under profiling tools that open/maintain their
  1454. // own FDs.
  1455. close_open_fds = false;
  1456. } else {
  1457. fprintf(stderr, "Unknown -W parameter '%s'\n", optarg);
  1458. return help(1);
  1459. }
  1460. }
  1461. break;
  1462. default: /* ? */
  1463. fprintf(stderr, "Unknown parameter '%c'\n", opt);
  1464. return help(1);
  1465. }
  1466. }
  1467. }
  1468. if (close_open_fds == true) {
  1469. // close all open file descriptors, except the standard ones
  1470. // the caller may have left open files (lxc-attach has this issue)
  1471. for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR);
  1472. }
  1473. if(!config_loaded) {
  1474. load_netdata_conf(NULL, 0);
  1475. post_conf_load(&user);
  1476. load_cloud_conf(0);
  1477. }
  1478. char *nd_disable_cloud = getenv("NETDATA_DISABLE_CLOUD");
  1479. if (nd_disable_cloud && !strncmp(nd_disable_cloud, "1", 1)) {
  1480. appconfig_set(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", "false");
  1481. }
  1482. // ------------------------------------------------------------------------
  1483. // initialize netdata
  1484. {
  1485. char *pmax = config_get(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for plugins", "1");
  1486. if(pmax && *pmax)
  1487. setenv("MALLOC_ARENA_MAX", pmax, 1);
  1488. #if defined(HAVE_C_MALLOPT)
  1489. i = (int)config_get_number(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for netdata", 1);
  1490. if(i > 0)
  1491. mallopt(M_ARENA_MAX, 1);
  1492. #ifdef NETDATA_INTERNAL_CHECKS
  1493. mallopt(M_PERTURB, 0x5A);
  1494. // mallopt(M_MXFAST, 0);
  1495. #endif
  1496. #endif
  1497. // set libuv worker threads
  1498. libuv_worker_threads = (int)get_netdata_cpus() * 2;
  1499. if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS)
  1500. libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  1501. if(libuv_worker_threads > MAX_LIBUV_WORKER_THREADS)
  1502. libuv_worker_threads = MAX_LIBUV_WORKER_THREADS;
  1503. libuv_worker_threads = config_get_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads);
  1504. if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS) {
  1505. libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  1506. config_set_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads);
  1507. }
  1508. {
  1509. char buf[20 + 1];
  1510. snprintfz(buf, 20, "%d", libuv_worker_threads);
  1511. setenv("UV_THREADPOOL_SIZE", buf, 1);
  1512. }
  1513. // prepare configuration environment variables for the plugins
  1514. get_netdata_configured_variables();
  1515. set_global_environment();
  1516. // work while we are cd into config_dir
  1517. // to allow the plugins refer to their config
  1518. // files using relative filenames
  1519. if(chdir(netdata_configured_user_config_dir) == -1)
  1520. fatal("Cannot cd to '%s'", netdata_configured_user_config_dir);
  1521. // Get execution path before switching user to avoid permission issues
  1522. get_netdata_execution_path();
  1523. }
  1524. {
  1525. // --------------------------------------------------------------------
  1526. // get the debugging flags from the configuration file
  1527. char *flags = config_get(CONFIG_SECTION_LOGS, "debug flags", "0x0000000000000000");
  1528. setenv("NETDATA_DEBUG_FLAGS", flags, 1);
  1529. debug_flags = strtoull(flags, NULL, 0);
  1530. debug(D_OPTIONS, "Debug flags set to '0x%" PRIX64 "'.", debug_flags);
  1531. if(debug_flags != 0) {
  1532. struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
  1533. if(setrlimit(RLIMIT_CORE, &rl) != 0)
  1534. error("Cannot request unlimited core dumps for debugging... Proceeding anyway...");
  1535. #ifdef HAVE_SYS_PRCTL_H
  1536. prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
  1537. #endif
  1538. }
  1539. // --------------------------------------------------------------------
  1540. // get log filenames and settings
  1541. log_init();
  1542. error_log_limit_unlimited();
  1543. // initialize the log files
  1544. open_all_log_files();
  1545. aral_judy_init();
  1546. get_system_timezone();
  1547. // --------------------------------------------------------------------
  1548. // get the certificate and start security
  1549. #ifdef ENABLE_HTTPS
  1550. security_init();
  1551. #endif
  1552. // --------------------------------------------------------------------
  1553. // This is the safest place to start the SILENCERS structure
  1554. set_silencers_filename();
  1555. health_initialize_global_silencers();
  1556. // --------------------------------------------------------------------
  1557. // Initialize ML configuration
  1558. delta_startup_time("initialize ML");
  1559. ml_init();
  1560. // --------------------------------------------------------------------
  1561. // setup process signals
  1562. // block signals while initializing threads.
  1563. // this causes the threads to block signals.
  1564. delta_startup_time("initialize signals");
  1565. signals_block();
  1566. signals_init(); // setup the signals we want to use
  1567. // --------------------------------------------------------------------
  1568. // check which threads are enabled and initialize them
  1569. delta_startup_time("initialize static threads");
  1570. // setup threads configs
  1571. default_stacksize = netdata_threads_init();
  1572. for (i = 0; static_threads[i].name != NULL ; i++) {
  1573. struct netdata_static_thread *st = &static_threads[i];
  1574. if(st->config_name)
  1575. st->enabled = config_get_boolean(st->config_section, st->config_name, st->enabled);
  1576. if(st->enabled && st->init_routine)
  1577. st->init_routine();
  1578. if(st->env_name)
  1579. setenv(st->env_name, st->enabled?"YES":"NO", 1);
  1580. if(st->global_variable)
  1581. *st->global_variable = (st->enabled) ? true : false;
  1582. }
  1583. // --------------------------------------------------------------------
  1584. // create the listening sockets
  1585. delta_startup_time("initialize web server");
  1586. web_client_api_v1_init();
  1587. web_server_threading_selection();
  1588. if(web_server_mode != WEB_SERVER_MODE_NONE)
  1589. api_listen_sockets_setup();
  1590. }
  1591. delta_startup_time("set resource limits");
  1592. #ifdef NETDATA_INTERNAL_CHECKS
  1593. if(debug_flags != 0) {
  1594. struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
  1595. if(setrlimit(RLIMIT_CORE, &rl) != 0)
  1596. error("Cannot request unlimited core dumps for debugging... Proceeding anyway...");
  1597. #ifdef HAVE_SYS_PRCTL_H
  1598. prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
  1599. #endif
  1600. }
  1601. #endif /* NETDATA_INTERNAL_CHECKS */
  1602. set_nofile_limit(&rlimit_nofile);
  1603. delta_startup_time("become daemon");
  1604. // fork, switch user, create pid file, set process priority
  1605. if(become_daemon(dont_fork, user) == -1)
  1606. fatal("Cannot daemonize myself.");
  1607. info("netdata started on pid %d.", getpid());
  1608. delta_startup_time("initialize threads after fork");
  1609. netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize));
  1610. // initialize internal registry
  1611. delta_startup_time("initialize registry");
  1612. registry_init();
  1613. // fork the spawn server
  1614. delta_startup_time("fork the spawn server");
  1615. spawn_init();
  1616. /*
  1617. * Libuv uv_spawn() uses SIGCHLD internally:
  1618. * https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485
  1619. * and inadvertently replaces the netdata signal handler which was setup during initialization.
  1620. * Thusly, we must explicitly restore the signal handler for SIGCHLD.
  1621. * Warning: extreme care is needed when mixing and matching POSIX and libuv.
  1622. */
  1623. signals_restore_SIGCHLD();
  1624. // ------------------------------------------------------------------------
  1625. // initialize rrd, registry, health, rrdpush, etc.
  1626. delta_startup_time("collecting system info");
  1627. netdata_anonymous_statistics_enabled=-1;
  1628. struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info));
  1629. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED);
  1630. get_system_info(system_info);
  1631. system_info->hops = 0;
  1632. get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist);
  1633. delta_startup_time("initialize RRD structures");
  1634. if(rrd_init(netdata_configured_hostname, system_info, false))
  1635. fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname);
  1636. delta_startup_time("check for incomplete shutdown");
  1637. char agent_crash_file[FILENAME_MAX + 1];
  1638. char agent_incomplete_shutdown_file[FILENAME_MAX + 1];
  1639. snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir);
  1640. int incomplete_shutdown_detected = (unlink(agent_incomplete_shutdown_file) == 0);
  1641. snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir);
  1642. int crash_detected = (unlink(agent_crash_file) == 0);
  1643. int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC, 444);
  1644. if (fd >= 0)
  1645. close(fd);
  1646. // ------------------------------------------------------------------------
  1647. // Claim netdata agent to a cloud endpoint
  1648. delta_startup_time("collect claiming info");
  1649. if (claiming_pending_arguments)
  1650. claim_agent(claiming_pending_arguments);
  1651. load_claiming_state();
  1652. // ------------------------------------------------------------------------
  1653. // enable log flood protection
  1654. error_log_limit_reset();
  1655. // Load host labels
  1656. delta_startup_time("collect host labels");
  1657. reload_host_labels();
  1658. // ------------------------------------------------------------------------
  1659. // spawn the threads
  1660. delta_startup_time("start the static threads");
  1661. web_server_config_options();
  1662. netdata_zero_metrics_enabled = config_get_boolean_ondemand(CONFIG_SECTION_DB, "enable zero metrics", CONFIG_BOOLEAN_NO);
  1663. set_late_global_environment();
  1664. for (i = 0; static_threads[i].name != NULL ; i++) {
  1665. struct netdata_static_thread *st = &static_threads[i];
  1666. if(st->enabled) {
  1667. st->thread = mallocz(sizeof(netdata_thread_t));
  1668. debug(D_SYSTEM, "Starting thread %s.", st->name);
  1669. netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st);
  1670. }
  1671. else debug(D_SYSTEM, "Not starting thread %s.", st->name);
  1672. }
  1673. // ------------------------------------------------------------------------
  1674. // Initialize netdata agent command serving from cli and signals
  1675. delta_startup_time("initialize commands API");
  1676. commands_init();
  1677. delta_startup_time("ready");
  1678. usec_t ready_ut = now_monotonic_usec();
  1679. info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS);
  1680. netdata_ready = 1;
  1681. send_statistics("START", "-", "-");
  1682. if (crash_detected)
  1683. send_statistics("CRASH", "-", "-");
  1684. if (incomplete_shutdown_detected)
  1685. send_statistics("INCOMPLETE_SHUTDOWN", "-", "-");
  1686. //check if ANALYTICS needs to start
  1687. if (netdata_anonymous_statistics_enabled == 1) {
  1688. for (i = 0; static_threads[i].name != NULL; i++) {
  1689. if (!strncmp(static_threads[i].name, "ANALYTICS", 9)) {
  1690. struct netdata_static_thread *st = &static_threads[i];
  1691. st->thread = mallocz(sizeof(netdata_thread_t));
  1692. st->enabled = 1;
  1693. debug(D_SYSTEM, "Starting thread %s.", st->name);
  1694. netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st);
  1695. }
  1696. }
  1697. }
  1698. // ------------------------------------------------------------------------
  1699. // Report ACLK build failure
  1700. #ifndef ENABLE_ACLK
  1701. error("This agent doesn't have ACLK.");
  1702. char filename[FILENAME_MAX + 1];
  1703. snprintfz(filename, FILENAME_MAX, "%s/.aclk_report_sent", netdata_configured_varlib_dir);
  1704. if (netdata_anonymous_statistics_enabled > 0 && access(filename, F_OK)) { // -1 -> not initialized
  1705. send_statistics("ACLK_DISABLED", "-", "-");
  1706. int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 444);
  1707. if (fd == -1)
  1708. error("Cannot create file '%s'. Please fix this.", filename);
  1709. else
  1710. close(fd);
  1711. }
  1712. #endif
  1713. // ------------------------------------------------------------------------
  1714. // unblock signals
  1715. signals_unblock();
  1716. // ------------------------------------------------------------------------
  1717. // Handle signals
  1718. signals_handle();
  1719. // should never reach this point
  1720. // but we need it for rpmlint #2752
  1721. return 1;
  1722. }