main.c 85 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "common.h"
  3. #include "buildinfo.h"
  4. #include "static_threads.h"
  5. #if defined(ENV32BIT)
  6. #warning COMPILING 32BIT NETDATA
  7. #endif
  8. bool unittest_running = false;
  9. int netdata_zero_metrics_enabled;
  10. int netdata_anonymous_statistics_enabled;
  11. int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  12. struct netdata_static_thread *static_threads;
  13. struct config netdata_config = {
  14. .first_section = NULL,
  15. .last_section = NULL,
  16. .mutex = NETDATA_MUTEX_INITIALIZER,
  17. .index = {
  18. .avl_tree = {
  19. .root = NULL,
  20. .compar = appconfig_section_compare
  21. },
  22. .rwlock = AVL_LOCK_INITIALIZER
  23. }
  24. };
  25. typedef struct service_thread {
  26. pid_t tid;
  27. SERVICE_THREAD_TYPE type;
  28. SERVICE_TYPE services;
  29. char name[NETDATA_THREAD_NAME_MAX + 1];
  30. bool cancelled;
  31. union {
  32. netdata_thread_t netdata_thread;
  33. uv_thread_t uv_thread;
  34. };
  35. force_quit_t force_quit_callback;
  36. request_quit_t request_quit_callback;
  37. void *data;
  38. } SERVICE_THREAD;
  39. struct service_globals {
  40. SERVICE_TYPE running;
  41. SPINLOCK lock;
  42. Pvoid_t pid_judy;
  43. } service_globals = {
  44. .running = ~0,
  45. .pid_judy = NULL,
  46. };
  47. SERVICE_THREAD *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused) {
  48. SERVICE_THREAD *sth = NULL;
  49. pid_t tid = gettid();
  50. netdata_spinlock_lock(&service_globals.lock);
  51. Pvoid_t *PValue = JudyLIns(&service_globals.pid_judy, tid, PJE0);
  52. if(!*PValue) {
  53. sth = callocz(1, sizeof(SERVICE_THREAD));
  54. sth->tid = tid;
  55. sth->type = thread_type;
  56. sth->request_quit_callback = request_quit_callback;
  57. sth->force_quit_callback = force_quit_callback;
  58. sth->data = data;
  59. os_thread_get_current_name_np(sth->name);
  60. *PValue = sth;
  61. switch(thread_type) {
  62. case SERVICE_THREAD_TYPE_NETDATA:
  63. sth->netdata_thread = netdata_thread_self();
  64. break;
  65. case SERVICE_THREAD_TYPE_LIBUV:
  66. sth->uv_thread = uv_thread_self();
  67. break;
  68. }
  69. }
  70. else {
  71. sth = *PValue;
  72. }
  73. netdata_spinlock_unlock(&service_globals.lock);
  74. return sth;
  75. }
  76. void service_exits(void) {
  77. pid_t tid = gettid();
  78. netdata_spinlock_lock(&service_globals.lock);
  79. Pvoid_t *PValue = JudyLGet(service_globals.pid_judy, tid, PJE0);
  80. if(PValue) {
  81. freez(*PValue);
  82. JudyLDel(&service_globals.pid_judy, tid, PJE0);
  83. }
  84. netdata_spinlock_unlock(&service_globals.lock);
  85. }
  86. bool service_running(SERVICE_TYPE service) {
  87. static __thread SERVICE_THREAD *sth = NULL;
  88. if(unlikely(!sth))
  89. sth = service_register(SERVICE_THREAD_TYPE_NETDATA, NULL, NULL, NULL, false);
  90. if(netdata_exit)
  91. __atomic_store_n(&service_globals.running, 0, __ATOMIC_RELAXED);
  92. if(service == 0)
  93. service = sth->services;
  94. sth->services |= service;
  95. return ((__atomic_load_n(&service_globals.running, __ATOMIC_RELAXED) & service) == service);
  96. }
  97. void service_signal_exit(SERVICE_TYPE service) {
  98. __atomic_and_fetch(&service_globals.running, ~(service), __ATOMIC_RELAXED);
  99. netdata_spinlock_lock(&service_globals.lock);
  100. Pvoid_t *PValue;
  101. Word_t tid = 0;
  102. bool first = true;
  103. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  104. SERVICE_THREAD *sth = *PValue;
  105. if((sth->services & service) && sth->request_quit_callback) {
  106. netdata_spinlock_unlock(&service_globals.lock);
  107. sth->request_quit_callback(sth->data);
  108. netdata_spinlock_lock(&service_globals.lock);
  109. continue;
  110. }
  111. }
  112. netdata_spinlock_unlock(&service_globals.lock);
  113. }
  114. static void service_to_buffer(BUFFER *wb, SERVICE_TYPE service) {
  115. if(service & SERVICE_MAINTENANCE)
  116. buffer_strcat(wb, "MAINTENANCE ");
  117. if(service & SERVICE_COLLECTORS)
  118. buffer_strcat(wb, "COLLECTORS ");
  119. if(service & SERVICE_ML_TRAINING)
  120. buffer_strcat(wb, "ML_TRAINING ");
  121. if(service & SERVICE_ML_PREDICTION)
  122. buffer_strcat(wb, "ML_PREDICTION ");
  123. if(service & SERVICE_REPLICATION)
  124. buffer_strcat(wb, "REPLICATION ");
  125. if(service & ABILITY_DATA_QUERIES)
  126. buffer_strcat(wb, "DATA_QUERIES ");
  127. if(service & ABILITY_WEB_REQUESTS)
  128. buffer_strcat(wb, "WEB_REQUESTS ");
  129. if(service & SERVICE_WEB_SERVER)
  130. buffer_strcat(wb, "WEB_SERVER ");
  131. if(service & SERVICE_ACLK)
  132. buffer_strcat(wb, "ACLK ");
  133. if(service & SERVICE_HEALTH)
  134. buffer_strcat(wb, "HEALTH ");
  135. if(service & SERVICE_STREAMING)
  136. buffer_strcat(wb, "STREAMING ");
  137. if(service & ABILITY_STREAMING_CONNECTIONS)
  138. buffer_strcat(wb, "STREAMING_CONNECTIONS ");
  139. if(service & SERVICE_CONTEXT)
  140. buffer_strcat(wb, "CONTEXT ");
  141. if(service & SERVICE_ANALYTICS)
  142. buffer_strcat(wb, "ANALYTICS ");
  143. if(service & SERVICE_EXPORTERS)
  144. buffer_strcat(wb, "EXPORTERS ");
  145. }
  146. static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) {
  147. BUFFER *service_list = buffer_create(1024);
  148. BUFFER *thread_list = buffer_create(1024);
  149. usec_t started_ut = now_monotonic_usec(), ended_ut;
  150. size_t running;
  151. SERVICE_TYPE running_services = 0;
  152. // cancel the threads
  153. running = 0;
  154. running_services = 0;
  155. {
  156. buffer_flush(thread_list);
  157. netdata_spinlock_lock(&service_globals.lock);
  158. Pvoid_t *PValue;
  159. Word_t tid = 0;
  160. bool first = true;
  161. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  162. SERVICE_THREAD *sth = *PValue;
  163. if(sth->services & service && sth->tid != gettid() && !sth->cancelled) {
  164. sth->cancelled = true;
  165. switch(sth->type) {
  166. case SERVICE_THREAD_TYPE_NETDATA:
  167. netdata_thread_cancel(sth->netdata_thread);
  168. break;
  169. case SERVICE_THREAD_TYPE_LIBUV:
  170. break;
  171. }
  172. if(running)
  173. buffer_strcat(thread_list, ", ");
  174. buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid);
  175. running++;
  176. running_services |= sth->services & service;
  177. if(sth->force_quit_callback) {
  178. netdata_spinlock_unlock(&service_globals.lock);
  179. sth->force_quit_callback(sth->data);
  180. netdata_spinlock_lock(&service_globals.lock);
  181. continue;
  182. }
  183. }
  184. }
  185. netdata_spinlock_unlock(&service_globals.lock);
  186. }
  187. service_signal_exit(service);
  188. // signal them to stop
  189. size_t last_running = 0;
  190. size_t stale_time_ut = 0;
  191. usec_t sleep_ut = 50 * USEC_PER_MS;
  192. size_t log_countdown_ut = sleep_ut;
  193. do {
  194. if(running != last_running)
  195. stale_time_ut = 0;
  196. last_running = running;
  197. running = 0;
  198. running_services = 0;
  199. buffer_flush(thread_list);
  200. netdata_spinlock_lock(&service_globals.lock);
  201. Pvoid_t *PValue;
  202. Word_t tid = 0;
  203. bool first = true;
  204. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  205. SERVICE_THREAD *sth = *PValue;
  206. if(sth->services & service && sth->tid != gettid()) {
  207. if(running)
  208. buffer_strcat(thread_list, ", ");
  209. buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid);
  210. running_services |= sth->services & service;
  211. running++;
  212. }
  213. }
  214. netdata_spinlock_unlock(&service_globals.lock);
  215. if(running) {
  216. log_countdown_ut -= (log_countdown_ut >= sleep_ut) ? sleep_ut : log_countdown_ut;
  217. if(log_countdown_ut == 0 || running != last_running) {
  218. log_countdown_ut = 20 * sleep_ut;
  219. buffer_flush(service_list);
  220. service_to_buffer(service_list, running_services);
  221. info("SERVICE CONTROL: waiting for the following %zu services [ %s] to exit: %s",
  222. running, buffer_tostring(service_list),
  223. running <= 10 ? buffer_tostring(thread_list) : "");
  224. }
  225. sleep_usec(sleep_ut);
  226. stale_time_ut += sleep_ut;
  227. }
  228. ended_ut = now_monotonic_usec();
  229. } while(running && (ended_ut - started_ut < timeout_ut || stale_time_ut < timeout_ut));
  230. if(running) {
  231. buffer_flush(service_list);
  232. service_to_buffer(service_list, running_services);
  233. info("SERVICE CONTROL: "
  234. "the following %zu service(s) [ %s] take too long to exit: %s; "
  235. "giving up on them...",
  236. running, buffer_tostring(service_list),
  237. buffer_tostring(thread_list));
  238. }
  239. buffer_free(thread_list);
  240. buffer_free(service_list);
  241. return (running == 0);
  242. }
  243. #define delta_shutdown_time(msg) \
  244. { \
  245. usec_t now_ut = now_monotonic_usec(); \
  246. if(prev_msg) \
  247. info("NETDATA SHUTDOWN: in %7llu ms, %s%s - next: %s", (now_ut - last_ut) / USEC_PER_MS, (timeout)?"(TIMEOUT) ":"", prev_msg, msg); \
  248. else \
  249. info("NETDATA SHUTDOWN: next: %s", msg); \
  250. last_ut = now_ut; \
  251. prev_msg = msg; \
  252. timeout = false; \
  253. }
  254. void netdata_cleanup_and_exit(int ret) {
  255. usec_t started_ut = now_monotonic_usec();
  256. usec_t last_ut = started_ut;
  257. const char *prev_msg = NULL;
  258. bool timeout = false;
  259. error_log_limit_unlimited();
  260. info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret);
  261. send_statistics("EXIT", ret?"ERROR":"OK","-");
  262. delta_shutdown_time("create shutdown file");
  263. char agent_crash_file[FILENAME_MAX + 1];
  264. char agent_incomplete_shutdown_file[FILENAME_MAX + 1];
  265. snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir);
  266. snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir);
  267. (void) rename(agent_crash_file, agent_incomplete_shutdown_file);
  268. delta_shutdown_time("disable maintenance, new queries, new web requests, new streaming connections and aclk");
  269. service_signal_exit(
  270. SERVICE_MAINTENANCE
  271. | ABILITY_DATA_QUERIES
  272. | ABILITY_WEB_REQUESTS
  273. | ABILITY_STREAMING_CONNECTIONS
  274. | SERVICE_ACLK
  275. );
  276. delta_shutdown_time("stop replication, exporters, ML training, health and web servers threads");
  277. timeout = !service_wait_exit(
  278. SERVICE_REPLICATION
  279. | SERVICE_EXPORTERS
  280. | SERVICE_ML_TRAINING
  281. | SERVICE_HEALTH
  282. | SERVICE_WEB_SERVER
  283. , 3 * USEC_PER_SEC);
  284. delta_shutdown_time("stop collectors and streaming threads");
  285. timeout = !service_wait_exit(
  286. SERVICE_COLLECTORS
  287. | SERVICE_STREAMING
  288. , 3 * USEC_PER_SEC);
  289. delta_shutdown_time("stop ML prediction and context threads");
  290. timeout = !service_wait_exit(
  291. SERVICE_ML_PREDICTION
  292. | SERVICE_CONTEXT
  293. , 3 * USEC_PER_SEC);
  294. delta_shutdown_time("stop maintenance thread");
  295. timeout = !service_wait_exit(
  296. SERVICE_MAINTENANCE
  297. , 3 * USEC_PER_SEC);
  298. delta_shutdown_time("clean rrdhost database");
  299. rrdhost_cleanup_all();
  300. delta_shutdown_time("prepare metasync shutdown");
  301. metadata_sync_shutdown_prepare();
  302. #ifdef ENABLE_ACLK
  303. delta_shutdown_time("signal aclk sync to stop");
  304. aclk_sync_exit_all();
  305. #endif
  306. delta_shutdown_time("stop aclk threads");
  307. timeout = !service_wait_exit(
  308. SERVICE_ACLK
  309. , 3 * USEC_PER_SEC);
  310. delta_shutdown_time("stop all remaining worker threads");
  311. timeout = !service_wait_exit(~0, 10 * USEC_PER_SEC);
  312. delta_shutdown_time("cancel main threads");
  313. cancel_main_threads();
  314. if(!ret) {
  315. // exit cleanly
  316. #ifdef ENABLE_DBENGINE
  317. if(dbengine_enabled) {
  318. delta_shutdown_time("flush dbengine tiers");
  319. for (size_t tier = 0; tier < storage_tiers; tier++)
  320. rrdeng_prepare_exit(multidb_ctx[tier]);
  321. }
  322. #endif
  323. // free the database
  324. delta_shutdown_time("free rrdhost structures");
  325. rrdhost_free_all();
  326. delta_shutdown_time("stop metasync threads");
  327. metadata_sync_shutdown();
  328. #ifdef ENABLE_DBENGINE
  329. if(dbengine_enabled) {
  330. delta_shutdown_time("stop dbengine tiers");
  331. for (size_t tier = 0; tier < storage_tiers; tier++)
  332. rrdeng_exit(multidb_ctx[tier]);
  333. }
  334. #endif
  335. }
  336. delta_shutdown_time("close SQL context db");
  337. sql_close_context_database();
  338. delta_shutdown_time("closed SQL main db");
  339. sql_close_database();
  340. // unlink the pid
  341. if(pidfile[0]) {
  342. delta_shutdown_time("remove pid file");
  343. if(unlink(pidfile) != 0)
  344. error("EXIT: cannot unlink pidfile '%s'.", pidfile);
  345. }
  346. #ifdef ENABLE_HTTPS
  347. delta_shutdown_time("free openssl structures");
  348. security_clean_openssl();
  349. #endif
  350. delta_shutdown_time("remove incomplete shutdown file");
  351. (void) unlink(agent_incomplete_shutdown_file);
  352. delta_shutdown_time("exit");
  353. usec_t ended_ut = now_monotonic_usec();
  354. info("NETDATA SHUTDOWN: completed in %llu ms - netdata is now exiting - bye bye...", (ended_ut - started_ut) / USEC_PER_MS);
  355. exit(ret);
  356. }
  357. void web_server_threading_selection(void) {
  358. web_server_mode = web_server_mode_id(config_get(CONFIG_SECTION_WEB, "mode", web_server_mode_name(web_server_mode)));
  359. int static_threaded = (web_server_mode == WEB_SERVER_MODE_STATIC_THREADED);
  360. int i;
  361. for (i = 0; static_threads[i].name; i++) {
  362. if (static_threads[i].start_routine == socket_listen_main_static_threaded)
  363. static_threads[i].enabled = static_threaded;
  364. }
  365. }
  366. int make_dns_decision(const char *section_name, const char *config_name, const char *default_value, SIMPLE_PATTERN *p)
  367. {
  368. char *value = config_get(section_name,config_name,default_value);
  369. if(!strcmp("yes",value))
  370. return 1;
  371. if(!strcmp("no",value))
  372. return 0;
  373. if(strcmp("heuristic",value))
  374. error("Invalid configuration option '%s' for '%s'/'%s'. Valid options are 'yes', 'no' and 'heuristic'. Proceeding with 'heuristic'",
  375. value, section_name, config_name);
  376. return simple_pattern_is_potential_name(p);
  377. }
  378. void web_server_config_options(void)
  379. {
  380. web_client_timeout =
  381. (int)config_get_number(CONFIG_SECTION_WEB, "disconnect idle clients after seconds", web_client_timeout);
  382. web_client_first_request_timeout =
  383. (int)config_get_number(CONFIG_SECTION_WEB, "timeout for first request", web_client_first_request_timeout);
  384. web_client_streaming_rate_t =
  385. config_get_number(CONFIG_SECTION_WEB, "accept a streaming request every seconds", web_client_streaming_rate_t);
  386. respect_web_browser_do_not_track_policy =
  387. config_get_boolean(CONFIG_SECTION_WEB, "respect do not track policy", respect_web_browser_do_not_track_policy);
  388. web_x_frame_options = config_get(CONFIG_SECTION_WEB, "x-frame-options response header", "");
  389. if(!*web_x_frame_options)
  390. web_x_frame_options = NULL;
  391. web_allow_connections_from =
  392. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow connections from", "localhost *"),
  393. NULL, SIMPLE_PATTERN_EXACT);
  394. web_allow_connections_dns =
  395. make_dns_decision(CONFIG_SECTION_WEB, "allow connections by dns", "heuristic", web_allow_connections_from);
  396. web_allow_dashboard_from =
  397. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow dashboard from", "localhost *"),
  398. NULL, SIMPLE_PATTERN_EXACT);
  399. web_allow_dashboard_dns =
  400. make_dns_decision(CONFIG_SECTION_WEB, "allow dashboard by dns", "heuristic", web_allow_dashboard_from);
  401. web_allow_badges_from =
  402. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow badges from", "*"), NULL, SIMPLE_PATTERN_EXACT);
  403. web_allow_badges_dns =
  404. make_dns_decision(CONFIG_SECTION_WEB, "allow badges by dns", "heuristic", web_allow_badges_from);
  405. web_allow_registry_from =
  406. simple_pattern_create(config_get(CONFIG_SECTION_REGISTRY, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT);
  407. web_allow_registry_dns = make_dns_decision(CONFIG_SECTION_REGISTRY, "allow by dns", "heuristic",
  408. web_allow_registry_from);
  409. web_allow_streaming_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow streaming from", "*"),
  410. NULL, SIMPLE_PATTERN_EXACT);
  411. web_allow_streaming_dns = make_dns_decision(CONFIG_SECTION_WEB, "allow streaming by dns", "heuristic",
  412. web_allow_streaming_from);
  413. // Note the default is not heuristic, the wildcards could match DNS but the intent is ip-addresses.
  414. web_allow_netdataconf_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow netdata.conf from",
  415. "localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.*"
  416. " 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.*"
  417. " 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.*"
  418. " 172.31.* UNKNOWN"), NULL, SIMPLE_PATTERN_EXACT);
  419. web_allow_netdataconf_dns =
  420. make_dns_decision(CONFIG_SECTION_WEB, "allow netdata.conf by dns", "no", web_allow_netdataconf_from);
  421. web_allow_mgmt_from =
  422. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow management from", "localhost"),
  423. NULL, SIMPLE_PATTERN_EXACT);
  424. web_allow_mgmt_dns =
  425. make_dns_decision(CONFIG_SECTION_WEB, "allow management by dns","heuristic",web_allow_mgmt_from);
  426. #ifdef NETDATA_WITH_ZLIB
  427. web_enable_gzip = config_get_boolean(CONFIG_SECTION_WEB, "enable gzip compression", web_enable_gzip);
  428. char *s = config_get(CONFIG_SECTION_WEB, "gzip compression strategy", "default");
  429. if(!strcmp(s, "default"))
  430. web_gzip_strategy = Z_DEFAULT_STRATEGY;
  431. else if(!strcmp(s, "filtered"))
  432. web_gzip_strategy = Z_FILTERED;
  433. else if(!strcmp(s, "huffman only"))
  434. web_gzip_strategy = Z_HUFFMAN_ONLY;
  435. else if(!strcmp(s, "rle"))
  436. web_gzip_strategy = Z_RLE;
  437. else if(!strcmp(s, "fixed"))
  438. web_gzip_strategy = Z_FIXED;
  439. else {
  440. error("Invalid compression strategy '%s'. Valid strategies are 'default', 'filtered', 'huffman only', 'rle' and 'fixed'. Proceeding with 'default'.", s);
  441. web_gzip_strategy = Z_DEFAULT_STRATEGY;
  442. }
  443. web_gzip_level = (int)config_get_number(CONFIG_SECTION_WEB, "gzip compression level", 3);
  444. if(web_gzip_level < 1) {
  445. error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 1 (fastest compression).", web_gzip_level);
  446. web_gzip_level = 1;
  447. }
  448. else if(web_gzip_level > 9) {
  449. error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 9 (best compression).", web_gzip_level);
  450. web_gzip_level = 9;
  451. }
  452. #endif /* NETDATA_WITH_ZLIB */
  453. }
  454. // killpid kills pid with SIGTERM.
  455. int killpid(pid_t pid) {
  456. int ret;
  457. debug(D_EXIT, "Request to kill pid %d", pid);
  458. errno = 0;
  459. ret = kill(pid, SIGTERM);
  460. if (ret == -1) {
  461. switch(errno) {
  462. case ESRCH:
  463. // We wanted the process to exit so just let the caller handle.
  464. return ret;
  465. case EPERM:
  466. error("Cannot kill pid %d, but I do not have enough permissions.", pid);
  467. break;
  468. default:
  469. error("Cannot kill pid %d, but I received an error.", pid);
  470. break;
  471. }
  472. }
  473. return ret;
  474. }
  475. void cancel_main_threads() {
  476. error_log_limit_unlimited();
  477. int i, found = 0;
  478. usec_t max = 5 * USEC_PER_SEC, step = 100000;
  479. for (i = 0; static_threads[i].name != NULL ; i++) {
  480. if(static_threads[i].enabled == NETDATA_MAIN_THREAD_RUNNING) {
  481. info("EXIT: Stopping main thread: %s", static_threads[i].name);
  482. netdata_thread_cancel(*static_threads[i].thread);
  483. found++;
  484. }
  485. }
  486. netdata_exit = 1;
  487. while(found && max > 0) {
  488. max -= step;
  489. info("Waiting %d threads to finish...", found);
  490. sleep_usec(step);
  491. found = 0;
  492. for (i = 0; static_threads[i].name != NULL ; i++) {
  493. if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED)
  494. found++;
  495. }
  496. }
  497. if(found) {
  498. for (i = 0; static_threads[i].name != NULL ; i++) {
  499. if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED)
  500. error("Main thread %s takes too long to exit. Giving up...", static_threads[i].name);
  501. }
  502. }
  503. else
  504. info("All threads finished.");
  505. for (i = 0; static_threads[i].name != NULL ; i++)
  506. freez(static_threads[i].thread);
  507. freez(static_threads);
  508. }
  509. struct option_def option_definitions[] = {
  510. // opt description arg name default value
  511. { 'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME},
  512. { 'D', "Do not fork. Run in the foreground.", NULL, "run in the background"},
  513. { 'd', "Fork. Run in the background.", NULL, "run in the background"},
  514. { 'h', "Display this help message.", NULL, NULL},
  515. { 'P', "File to save a pid while running.", "filename", "do not save pid to a file"},
  516. { 'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"},
  517. { 'p', "API/Web port to use.", "port", "19999"},
  518. { 's', "Prefix for /proc and /sys (for containers).", "path", "no prefix"},
  519. { 't', "The internal clock of netdata.", "seconds", "1"},
  520. { 'u', "Run as user.", "username", "netdata"},
  521. { 'v', "Print netdata version and exit.", NULL, NULL},
  522. { 'V', "Print netdata version and exit.", NULL, NULL},
  523. { 'W', "See Advanced options below.", "options", NULL},
  524. };
  525. int help(int exitcode) {
  526. FILE *stream;
  527. if(exitcode == 0)
  528. stream = stdout;
  529. else
  530. stream = stderr;
  531. int num_opts = sizeof(option_definitions) / sizeof(struct option_def);
  532. int i;
  533. int max_len_arg = 0;
  534. // Compute maximum argument length
  535. for( i = 0; i < num_opts; i++ ) {
  536. if(option_definitions[i].arg_name) {
  537. int len_arg = (int)strlen(option_definitions[i].arg_name);
  538. if(len_arg > max_len_arg) max_len_arg = len_arg;
  539. }
  540. }
  541. if(max_len_arg > 30) max_len_arg = 30;
  542. if(max_len_arg < 20) max_len_arg = 20;
  543. fprintf(stream, "%s", "\n"
  544. " ^\n"
  545. " |.-. .-. .-. .-. . netdata \n"
  546. " | '-' '-' '-' '-' real-time performance monitoring, done right! \n"
  547. " +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n"
  548. "\n"
  549. " Copyright (C) 2016-2022, Netdata, Inc. <info@netdata.cloud>\n"
  550. " Released under GNU General Public License v3 or later.\n"
  551. " All rights reserved.\n"
  552. "\n"
  553. " Home Page : https://netdata.cloud\n"
  554. " Source Code: https://github.com/netdata/netdata\n"
  555. " Docs : https://learn.netdata.cloud\n"
  556. " Support : https://github.com/netdata/netdata/issues\n"
  557. " License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n"
  558. "\n"
  559. " Twitter : https://twitter.com/linuxnetdata\n"
  560. " LinkedIn : https://linkedin.com/company/netdata-cloud/\n"
  561. " Facebook : https://facebook.com/linuxnetdata/\n"
  562. "\n"
  563. "\n"
  564. );
  565. fprintf(stream, " SYNOPSIS: netdata [options]\n");
  566. fprintf(stream, "\n");
  567. fprintf(stream, " Options:\n\n");
  568. // Output options description.
  569. for( i = 0; i < num_opts; i++ ) {
  570. fprintf(stream, " -%c %-*s %s", option_definitions[i].val, max_len_arg, option_definitions[i].arg_name ? option_definitions[i].arg_name : "", option_definitions[i].description);
  571. if(option_definitions[i].default_value) {
  572. fprintf(stream, "\n %c %-*s Default: %s\n", ' ', max_len_arg, "", option_definitions[i].default_value);
  573. } else {
  574. fprintf(stream, "\n");
  575. }
  576. fprintf(stream, "\n");
  577. }
  578. fprintf(stream, "\n Advanced options:\n\n"
  579. " -W stacksize=N Set the stacksize (in bytes).\n\n"
  580. " -W debug_flags=N Set runtime tracing to debug.log.\n\n"
  581. " -W unittest Run internal unittests and exit.\n\n"
  582. " -W sqlite-check Check metadata database integrity and exit.\n\n"
  583. " -W sqlite-fix Check metadata database integrity, fix if needed and exit.\n\n"
  584. " -W sqlite-compact Reclaim metadata database unused space and exit.\n\n"
  585. #ifdef ENABLE_DBENGINE
  586. " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n"
  587. " -W stresstest=A,B,C,D,E,F,G\n"
  588. " Run a DB engine stress test for A seconds,\n"
  589. " with B writers and C readers, with a ramp up\n"
  590. " time of D seconds for writers, a page cache\n"
  591. " size of E MiB, an optional disk space limit\n"
  592. " of F MiB, G libuv workers (default 16) and exit.\n\n"
  593. #endif
  594. " -W set section option value\n"
  595. " set netdata.conf option from the command line.\n\n"
  596. " -W buildinfo Print the version, the configure options,\n"
  597. " a list of optional features, and whether they\n"
  598. " are enabled or not.\n\n"
  599. " -W buildinfojson Print the version, the configure options,\n"
  600. " a list of optional features, and whether they\n"
  601. " are enabled or not, in JSON format.\n\n"
  602. " -W simple-pattern pattern string\n"
  603. " Check if string matches pattern and exit.\n\n"
  604. " -W \"claim -token=TOKEN -rooms=ROOM1,ROOM2\"\n"
  605. " Claim the agent to the workspace rooms pointed to by TOKEN and ROOM*.\n\n"
  606. );
  607. fprintf(stream, "\n Signals netdata handles:\n\n"
  608. " - HUP Close and reopen log files.\n"
  609. " - USR1 Save internal DB to disk.\n"
  610. " - USR2 Reload health configuration.\n"
  611. "\n"
  612. );
  613. fflush(stream);
  614. return exitcode;
  615. }
  616. #ifdef ENABLE_HTTPS
  617. static void security_init(){
  618. char filename[FILENAME_MAX + 1];
  619. snprintfz(filename, FILENAME_MAX, "%s/ssl/key.pem",netdata_configured_user_config_dir);
  620. netdata_ssl_security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename);
  621. snprintfz(filename, FILENAME_MAX, "%s/ssl/cert.pem",netdata_configured_user_config_dir);
  622. netdata_ssl_security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename);
  623. tls_version = config_get(CONFIG_SECTION_WEB, "tls version", "1.3");
  624. tls_ciphers = config_get(CONFIG_SECTION_WEB, "tls ciphers", "none");
  625. security_openssl_library();
  626. }
  627. #endif
  628. static void log_init(void) {
  629. char filename[FILENAME_MAX + 1];
  630. snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir);
  631. stdout_filename = config_get(CONFIG_SECTION_LOGS, "debug", filename);
  632. snprintfz(filename, FILENAME_MAX, "%s/error.log", netdata_configured_log_dir);
  633. stderr_filename = config_get(CONFIG_SECTION_LOGS, "error", filename);
  634. snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir);
  635. stdaccess_filename = config_get(CONFIG_SECTION_LOGS, "access", filename);
  636. snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir);
  637. stdhealth_filename = config_get(CONFIG_SECTION_LOGS, "health", filename);
  638. #ifdef ENABLE_ACLK
  639. aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO);
  640. if (aclklog_enabled) {
  641. snprintfz(filename, FILENAME_MAX, "%s/aclk.log", netdata_configured_log_dir);
  642. aclklog_filename = config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename);
  643. }
  644. #endif
  645. char deffacility[8];
  646. snprintfz(deffacility,7,"%s","daemon");
  647. facility_log = config_get(CONFIG_SECTION_LOGS, "facility", deffacility);
  648. error_log_throttle_period = config_get_number(CONFIG_SECTION_LOGS, "errors flood protection period", error_log_throttle_period);
  649. error_log_errors_per_period = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "errors to trigger flood protection", (long long int)error_log_errors_per_period);
  650. error_log_errors_per_period_backup = error_log_errors_per_period;
  651. setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors flood protection period" , ""), 1);
  652. setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors to trigger flood protection", ""), 1);
  653. }
  654. char *initialize_lock_directory_path(char *prefix)
  655. {
  656. char filename[FILENAME_MAX + 1];
  657. snprintfz(filename, FILENAME_MAX, "%s/lock", prefix);
  658. return config_get(CONFIG_SECTION_DIRECTORIES, "lock", filename);
  659. }
  660. static void backwards_compatible_config() {
  661. // move [global] options to the [web] section
  662. config_move(CONFIG_SECTION_GLOBAL, "http port listen backlog",
  663. CONFIG_SECTION_WEB, "listen backlog");
  664. config_move(CONFIG_SECTION_GLOBAL, "bind socket to IP",
  665. CONFIG_SECTION_WEB, "bind to");
  666. config_move(CONFIG_SECTION_GLOBAL, "bind to",
  667. CONFIG_SECTION_WEB, "bind to");
  668. config_move(CONFIG_SECTION_GLOBAL, "port",
  669. CONFIG_SECTION_WEB, "default port");
  670. config_move(CONFIG_SECTION_GLOBAL, "default port",
  671. CONFIG_SECTION_WEB, "default port");
  672. config_move(CONFIG_SECTION_GLOBAL, "disconnect idle web clients after seconds",
  673. CONFIG_SECTION_WEB, "disconnect idle clients after seconds");
  674. config_move(CONFIG_SECTION_GLOBAL, "respect web browser do not track policy",
  675. CONFIG_SECTION_WEB, "respect do not track policy");
  676. config_move(CONFIG_SECTION_GLOBAL, "web x-frame-options header",
  677. CONFIG_SECTION_WEB, "x-frame-options response header");
  678. config_move(CONFIG_SECTION_GLOBAL, "enable web responses gzip compression",
  679. CONFIG_SECTION_WEB, "enable gzip compression");
  680. config_move(CONFIG_SECTION_GLOBAL, "web compression strategy",
  681. CONFIG_SECTION_WEB, "gzip compression strategy");
  682. config_move(CONFIG_SECTION_GLOBAL, "web compression level",
  683. CONFIG_SECTION_WEB, "gzip compression level");
  684. config_move(CONFIG_SECTION_GLOBAL, "config directory",
  685. CONFIG_SECTION_DIRECTORIES, "config");
  686. config_move(CONFIG_SECTION_GLOBAL, "stock config directory",
  687. CONFIG_SECTION_DIRECTORIES, "stock config");
  688. config_move(CONFIG_SECTION_GLOBAL, "log directory",
  689. CONFIG_SECTION_DIRECTORIES, "log");
  690. config_move(CONFIG_SECTION_GLOBAL, "web files directory",
  691. CONFIG_SECTION_DIRECTORIES, "web");
  692. config_move(CONFIG_SECTION_GLOBAL, "cache directory",
  693. CONFIG_SECTION_DIRECTORIES, "cache");
  694. config_move(CONFIG_SECTION_GLOBAL, "lib directory",
  695. CONFIG_SECTION_DIRECTORIES, "lib");
  696. config_move(CONFIG_SECTION_GLOBAL, "home directory",
  697. CONFIG_SECTION_DIRECTORIES, "home");
  698. config_move(CONFIG_SECTION_GLOBAL, "lock directory",
  699. CONFIG_SECTION_DIRECTORIES, "lock");
  700. config_move(CONFIG_SECTION_GLOBAL, "plugins directory",
  701. CONFIG_SECTION_DIRECTORIES, "plugins");
  702. config_move(CONFIG_SECTION_HEALTH, "health configuration directory",
  703. CONFIG_SECTION_DIRECTORIES, "health config");
  704. config_move(CONFIG_SECTION_HEALTH, "stock health configuration directory",
  705. CONFIG_SECTION_DIRECTORIES, "stock health config");
  706. config_move(CONFIG_SECTION_REGISTRY, "registry db directory",
  707. CONFIG_SECTION_DIRECTORIES, "registry");
  708. config_move(CONFIG_SECTION_GLOBAL, "debug log",
  709. CONFIG_SECTION_LOGS, "debug");
  710. config_move(CONFIG_SECTION_GLOBAL, "error log",
  711. CONFIG_SECTION_LOGS, "error");
  712. config_move(CONFIG_SECTION_GLOBAL, "access log",
  713. CONFIG_SECTION_LOGS, "access");
  714. config_move(CONFIG_SECTION_GLOBAL, "facility log",
  715. CONFIG_SECTION_LOGS, "facility");
  716. config_move(CONFIG_SECTION_GLOBAL, "errors flood protection period",
  717. CONFIG_SECTION_LOGS, "errors flood protection period");
  718. config_move(CONFIG_SECTION_GLOBAL, "errors to trigger flood protection",
  719. CONFIG_SECTION_LOGS, "errors to trigger flood protection");
  720. config_move(CONFIG_SECTION_GLOBAL, "debug flags",
  721. CONFIG_SECTION_LOGS, "debug flags");
  722. config_move(CONFIG_SECTION_GLOBAL, "TZ environment variable",
  723. CONFIG_SECTION_ENV_VARS, "TZ");
  724. config_move(CONFIG_SECTION_PLUGINS, "PATH environment variable",
  725. CONFIG_SECTION_ENV_VARS, "PATH");
  726. config_move(CONFIG_SECTION_PLUGINS, "PYTHONPATH environment variable",
  727. CONFIG_SECTION_ENV_VARS, "PYTHONPATH");
  728. config_move(CONFIG_SECTION_STATSD, "enabled",
  729. CONFIG_SECTION_PLUGINS, "statsd");
  730. config_move(CONFIG_SECTION_GLOBAL, "memory mode",
  731. CONFIG_SECTION_DB, "mode");
  732. config_move(CONFIG_SECTION_GLOBAL, "history",
  733. CONFIG_SECTION_DB, "retention");
  734. config_move(CONFIG_SECTION_GLOBAL, "update every",
  735. CONFIG_SECTION_DB, "update every");
  736. config_move(CONFIG_SECTION_GLOBAL, "page cache size",
  737. CONFIG_SECTION_DB, "dbengine page cache size MB");
  738. config_move(CONFIG_SECTION_DB, "page cache size",
  739. CONFIG_SECTION_DB, "dbengine page cache size MB");
  740. config_move(CONFIG_SECTION_GLOBAL, "page cache uses malloc",
  741. CONFIG_SECTION_DB, "dbengine page cache with malloc");
  742. config_move(CONFIG_SECTION_DB, "page cache with malloc",
  743. CONFIG_SECTION_DB, "dbengine page cache with malloc");
  744. config_move(CONFIG_SECTION_GLOBAL, "dbengine disk space",
  745. CONFIG_SECTION_DB, "dbengine disk space MB");
  746. config_move(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space",
  747. CONFIG_SECTION_DB, "dbengine multihost disk space MB");
  748. config_move(CONFIG_SECTION_GLOBAL, "memory deduplication (ksm)",
  749. CONFIG_SECTION_DB, "memory deduplication (ksm)");
  750. config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch timeout",
  751. CONFIG_SECTION_DB, "dbengine page fetch timeout secs");
  752. config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch retries",
  753. CONFIG_SECTION_DB, "dbengine page fetch retries");
  754. config_move(CONFIG_SECTION_GLOBAL, "dbengine extent pages",
  755. CONFIG_SECTION_DB, "dbengine pages per extent");
  756. config_move(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds",
  757. CONFIG_SECTION_DB, "cleanup obsolete charts after secs");
  758. config_move(CONFIG_SECTION_GLOBAL, "gap when lost iterations above",
  759. CONFIG_SECTION_DB, "gap when lost iterations above");
  760. config_move(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds",
  761. CONFIG_SECTION_DB, "cleanup orphan hosts after secs");
  762. config_move(CONFIG_SECTION_GLOBAL, "delete obsolete charts files",
  763. CONFIG_SECTION_DB, "delete obsolete charts files");
  764. config_move(CONFIG_SECTION_GLOBAL, "delete orphan hosts files",
  765. CONFIG_SECTION_DB, "delete orphan hosts files");
  766. config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics",
  767. CONFIG_SECTION_DB, "enable zero metrics");
  768. }
  769. static void get_netdata_configured_variables() {
  770. backwards_compatible_config();
  771. // ------------------------------------------------------------------------
  772. // get the hostname
  773. char buf[HOSTNAME_MAX + 1];
  774. if(gethostname(buf, HOSTNAME_MAX) == -1){
  775. error("Cannot get machine hostname.");
  776. }
  777. netdata_configured_hostname = config_get(CONFIG_SECTION_GLOBAL, "hostname", buf);
  778. debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname);
  779. // ------------------------------------------------------------------------
  780. // get default database update frequency
  781. default_rrd_update_every = (int) config_get_number(CONFIG_SECTION_DB, "update every", UPDATE_EVERY);
  782. if(default_rrd_update_every < 1 || default_rrd_update_every > 600) {
  783. error("Invalid data collection frequency (update every) %d given. Defaulting to %d.", default_rrd_update_every, UPDATE_EVERY);
  784. default_rrd_update_every = UPDATE_EVERY;
  785. config_set_number(CONFIG_SECTION_DB, "update every", default_rrd_update_every);
  786. }
  787. // ------------------------------------------------------------------------
  788. // get default memory mode for the database
  789. {
  790. const char *mode = config_get(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode));
  791. default_rrd_memory_mode = rrd_memory_mode_id(mode);
  792. if(strcmp(mode, rrd_memory_mode_name(default_rrd_memory_mode)) != 0) {
  793. error("Invalid memory mode '%s' given. Using '%s'", mode, rrd_memory_mode_name(default_rrd_memory_mode));
  794. config_set(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode));
  795. }
  796. }
  797. // ------------------------------------------------------------------------
  798. // get default database size
  799. if(default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && default_rrd_memory_mode != RRD_MEMORY_MODE_NONE) {
  800. default_rrd_history_entries = (int)config_get_number(
  801. CONFIG_SECTION_DB, "retention",
  802. align_entries_to_pagesize(default_rrd_memory_mode, RRD_DEFAULT_HISTORY_ENTRIES));
  803. long h = align_entries_to_pagesize(default_rrd_memory_mode, default_rrd_history_entries);
  804. if (h != default_rrd_history_entries) {
  805. config_set_number(CONFIG_SECTION_DB, "retention", h);
  806. default_rrd_history_entries = (int)h;
  807. }
  808. }
  809. // ------------------------------------------------------------------------
  810. // get system paths
  811. netdata_configured_user_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "config", netdata_configured_user_config_dir);
  812. netdata_configured_stock_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "stock config", netdata_configured_stock_config_dir);
  813. netdata_configured_log_dir = config_get(CONFIG_SECTION_DIRECTORIES, "log", netdata_configured_log_dir);
  814. netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir);
  815. netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir);
  816. netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir);
  817. char *env_home=getenv("HOME");
  818. netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", env_home?env_home:netdata_configured_home_dir);
  819. netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir);
  820. {
  821. pluginsd_initialize_plugin_directories();
  822. netdata_configured_primary_plugins_dir = plugin_directories[PLUGINSD_STOCK_PLUGINS_DIRECTORY_PATH];
  823. }
  824. #ifdef ENABLE_DBENGINE
  825. // ------------------------------------------------------------------------
  826. // get default Database Engine page cache size in MiB
  827. default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
  828. db_engine_journal_check = config_get_boolean(CONFIG_SECTION_DB, "dbengine enable journal integrity check", CONFIG_BOOLEAN_NO);
  829. if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) {
  830. error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB);
  831. default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
  832. config_set_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
  833. }
  834. // ------------------------------------------------------------------------
  835. // get default Database Engine disk space quota in MiB
  836. default_rrdeng_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
  837. if(default_rrdeng_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
  838. error("Invalid dbengine disk space %d given. Defaulting to %d.", default_rrdeng_disk_quota_mb, RRDENG_MIN_DISK_SPACE_MB);
  839. default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB;
  840. config_set_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
  841. }
  842. default_multidb_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", compute_multidb_diskspace());
  843. if(default_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
  844. error("Invalid multidb disk space %d given. Defaulting to %d.", default_multidb_disk_quota_mb, default_rrdeng_disk_quota_mb);
  845. default_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb;
  846. config_set_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", default_multidb_disk_quota_mb);
  847. }
  848. #else
  849. if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  850. error_report("RRD_MEMORY_MODE_DBENGINE is not supported in this platform. The agent will use db mode 'save' instead.");
  851. default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE;
  852. }
  853. #endif
  854. // ------------------------------------------------------------------------
  855. netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", "");
  856. verify_netdata_host_prefix();
  857. // --------------------------------------------------------------------
  858. // get KSM settings
  859. #ifdef MADV_MERGEABLE
  860. enable_ksm = config_get_boolean(CONFIG_SECTION_DB, "memory deduplication (ksm)", enable_ksm);
  861. #endif
  862. // --------------------------------------------------------------------
  863. // metric correlations
  864. enable_metric_correlations = config_get_boolean(CONFIG_SECTION_GLOBAL, "enable metric correlations", enable_metric_correlations);
  865. default_metric_correlations_method = weights_string_to_method(config_get(
  866. CONFIG_SECTION_GLOBAL, "metric correlations method",
  867. weights_method_to_string(default_metric_correlations_method)));
  868. // --------------------------------------------------------------------
  869. rrdset_free_obsolete_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
  870. // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short
  871. // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at
  872. // https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information.
  873. if (rrdset_free_obsolete_time_s < 10) {
  874. rrdset_free_obsolete_time_s = 10;
  875. info("The \"cleanup obsolete charts after seconds\" option was set to 10 seconds.");
  876. config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
  877. }
  878. gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above);
  879. if (gap_when_lost_iterations_above < 1) {
  880. gap_when_lost_iterations_above = 1;
  881. config_set_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above);
  882. }
  883. gap_when_lost_iterations_above += 2;
  884. // --------------------------------------------------------------------
  885. // get various system parameters
  886. get_system_HZ();
  887. get_system_cpus_uncached();
  888. get_system_pid_max();
  889. }
  890. int load_netdata_conf(char *filename, char overwrite_used) {
  891. errno = 0;
  892. int ret = 0;
  893. if(filename && *filename) {
  894. ret = config_load(filename, overwrite_used, NULL);
  895. if(!ret)
  896. error("CONFIG: cannot load config file '%s'.", filename);
  897. }
  898. else {
  899. filename = strdupz_path_subpath(netdata_configured_user_config_dir, "netdata.conf");
  900. ret = config_load(filename, overwrite_used, NULL);
  901. if(!ret) {
  902. info("CONFIG: cannot load user config '%s'. Will try the stock version.", filename);
  903. freez(filename);
  904. filename = strdupz_path_subpath(netdata_configured_stock_config_dir, "netdata.conf");
  905. ret = config_load(filename, overwrite_used, NULL);
  906. if(!ret)
  907. info("CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename);
  908. }
  909. freez(filename);
  910. }
  911. return ret;
  912. }
  913. // coverity[ +tainted_string_sanitize_content : arg-0 ]
  914. static inline void coverity_remove_taint(char *s)
  915. {
  916. (void)s;
  917. }
  918. int get_system_info(struct rrdhost_system_info *system_info) {
  919. char *script;
  920. script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("system-info.sh") + 2));
  921. sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "system-info.sh");
  922. if (unlikely(access(script, R_OK) != 0)) {
  923. info("System info script %s not found.",script);
  924. freez(script);
  925. return 1;
  926. }
  927. pid_t command_pid;
  928. info("Executing %s", script);
  929. FILE *fp_child_input;
  930. FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input);
  931. if(fp_child_output) {
  932. char line[200 + 1];
  933. // Removed the double strlens, if the Coverity tainted string warning reappears I'll revert.
  934. // One time init code, but I'm curious about the warning...
  935. while (fgets(line, 200, fp_child_output) != NULL) {
  936. char *value=line;
  937. while (*value && *value != '=') value++;
  938. if (*value=='=') {
  939. *value='\0';
  940. value++;
  941. char *end = value;
  942. while (*end && *end != '\n') end++;
  943. *end = '\0'; // Overwrite newline if present
  944. coverity_remove_taint(line); // I/O is controlled result of system_info.sh - not tainted
  945. coverity_remove_taint(value);
  946. if(unlikely(rrdhost_set_system_info_variable(system_info, line, value))) {
  947. info("Unexpected environment variable %s=%s", line, value);
  948. }
  949. else {
  950. info("%s=%s", line, value);
  951. setenv(line, value, 1);
  952. }
  953. }
  954. }
  955. netdata_pclose(fp_child_input, fp_child_output, command_pid);
  956. }
  957. freez(script);
  958. return 0;
  959. }
  960. void set_silencers_filename() {
  961. char filename[FILENAME_MAX + 1];
  962. snprintfz(filename, FILENAME_MAX, "%s/health.silencers.json", netdata_configured_varlib_dir);
  963. silencers_filename = config_get(CONFIG_SECTION_HEALTH, "silencers file", filename);
  964. }
  965. /* Any config setting that can be accessed without a default value i.e. configget(...,...,NULL) *MUST*
  966. be set in this procedure to be called in all the relevant code paths.
  967. */
  968. void post_conf_load(char **user)
  969. {
  970. // --------------------------------------------------------------------
  971. // get the user we should run
  972. // IMPORTANT: this is required before web_files_uid()
  973. if(getuid() == 0) {
  974. *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", NETDATA_USER);
  975. }
  976. else {
  977. struct passwd *passwd = getpwuid(getuid());
  978. *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", (passwd && passwd->pw_name)?passwd->pw_name:"");
  979. }
  980. // --------------------------------------------------------------------
  981. // Check if the cloud is enabled
  982. #if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
  983. netdata_cloud_setting = 0;
  984. #else
  985. netdata_cloud_setting = appconfig_get_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", 1);
  986. #endif
  987. // This must be set before any point in the code that accesses it. Do not move it from this function.
  988. appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
  989. }
  990. #define delta_startup_time(msg) \
  991. { \
  992. usec_t now_ut = now_monotonic_usec(); \
  993. if(prev_msg) \
  994. info("NETDATA STARTUP: in %7llu ms, %s - next: %s", (now_ut - last_ut) / USEC_PER_MS, prev_msg, msg); \
  995. else \
  996. info("NETDATA STARTUP: next: %s", msg); \
  997. last_ut = now_ut; \
  998. prev_msg = msg; \
  999. }
  1000. int pgc_unittest(void);
  1001. int mrg_unittest(void);
  1002. int julytest(void);
  1003. int main(int argc, char **argv) {
  1004. // initialize the system clocks
  1005. clocks_init();
  1006. usec_t started_ut = now_monotonic_usec();
  1007. usec_t last_ut = started_ut;
  1008. const char *prev_msg = NULL;
  1009. int i;
  1010. int config_loaded = 0;
  1011. int dont_fork = 0;
  1012. bool close_open_fds = true;
  1013. size_t default_stacksize;
  1014. char *user = NULL;
  1015. static_threads = static_threads_get();
  1016. netdata_ready=0;
  1017. // set the name for logging
  1018. program_name = "netdata";
  1019. if (argc > 1 && strcmp(argv[1], SPAWN_SERVER_COMMAND_LINE_ARGUMENT) == 0) {
  1020. // don't run netdata, this is the spawn server
  1021. spawn_server();
  1022. exit(0);
  1023. }
  1024. // parse options
  1025. {
  1026. int num_opts = sizeof(option_definitions) / sizeof(struct option_def);
  1027. char optstring[(num_opts * 2) + 1];
  1028. int string_i = 0;
  1029. for( i = 0; i < num_opts; i++ ) {
  1030. optstring[string_i] = option_definitions[i].val;
  1031. string_i++;
  1032. if(option_definitions[i].arg_name) {
  1033. optstring[string_i] = ':';
  1034. string_i++;
  1035. }
  1036. }
  1037. // terminate optstring
  1038. optstring[string_i] ='\0';
  1039. optstring[(num_opts *2)] ='\0';
  1040. int opt;
  1041. while( (opt = getopt(argc, argv, optstring)) != -1 ) {
  1042. switch(opt) {
  1043. case 'c':
  1044. if(load_netdata_conf(optarg, 1) != 1) {
  1045. error("Cannot load configuration file %s.", optarg);
  1046. return 1;
  1047. }
  1048. else {
  1049. debug(D_OPTIONS, "Configuration loaded from %s.", optarg);
  1050. post_conf_load(&user);
  1051. load_cloud_conf(1);
  1052. config_loaded = 1;
  1053. }
  1054. break;
  1055. case 'D':
  1056. dont_fork = 1;
  1057. break;
  1058. case 'd':
  1059. dont_fork = 0;
  1060. break;
  1061. case 'h':
  1062. return help(0);
  1063. case 'i':
  1064. config_set(CONFIG_SECTION_WEB, "bind to", optarg);
  1065. break;
  1066. case 'P':
  1067. strncpy(pidfile, optarg, FILENAME_MAX);
  1068. pidfile[FILENAME_MAX] = '\0';
  1069. break;
  1070. case 'p':
  1071. config_set(CONFIG_SECTION_GLOBAL, "default port", optarg);
  1072. break;
  1073. case 's':
  1074. config_set(CONFIG_SECTION_GLOBAL, "host access prefix", optarg);
  1075. break;
  1076. case 't':
  1077. config_set(CONFIG_SECTION_GLOBAL, "update every", optarg);
  1078. break;
  1079. case 'u':
  1080. config_set(CONFIG_SECTION_GLOBAL, "run as user", optarg);
  1081. break;
  1082. case 'v':
  1083. case 'V':
  1084. printf("%s %s\n", program_name, program_version);
  1085. return 0;
  1086. case 'W':
  1087. {
  1088. char* stacksize_string = "stacksize=";
  1089. char* debug_flags_string = "debug_flags=";
  1090. char* claim_string = "claim";
  1091. #ifdef ENABLE_DBENGINE
  1092. char* createdataset_string = "createdataset=";
  1093. char* stresstest_string = "stresstest=";
  1094. #endif
  1095. if(strcmp(optarg, "sqlite-check") == 0) {
  1096. sql_init_database(DB_CHECK_INTEGRITY, 0);
  1097. return 0;
  1098. }
  1099. if(strcmp(optarg, "sqlite-fix") == 0) {
  1100. sql_init_database(DB_CHECK_FIX_DB, 0);
  1101. return 0;
  1102. }
  1103. if(strcmp(optarg, "sqlite-compact") == 0) {
  1104. sql_init_database(DB_CHECK_RECLAIM_SPACE, 0);
  1105. return 0;
  1106. }
  1107. if(strcmp(optarg, "unittest") == 0) {
  1108. unittest_running = true;
  1109. if (unit_test_static_threads())
  1110. return 1;
  1111. if (unit_test_buffer())
  1112. return 1;
  1113. if (unit_test_str2ld())
  1114. return 1;
  1115. if (unit_test_bitmap256())
  1116. return 1;
  1117. // No call to load the config file on this code-path
  1118. post_conf_load(&user);
  1119. get_netdata_configured_variables();
  1120. default_rrd_update_every = 1;
  1121. default_rrd_memory_mode = RRD_MEMORY_MODE_RAM;
  1122. default_health_enabled = 0;
  1123. storage_tiers = 1;
  1124. registry_init();
  1125. if(rrd_init("unittest", NULL, true)) {
  1126. fprintf(stderr, "rrd_init failed for unittest\n");
  1127. return 1;
  1128. }
  1129. default_rrdpush_enabled = 0;
  1130. if(run_all_mockup_tests()) return 1;
  1131. if(unit_test_storage()) return 1;
  1132. #ifdef ENABLE_DBENGINE
  1133. if(test_dbengine()) return 1;
  1134. #endif
  1135. if(test_sqlite()) return 1;
  1136. if(string_unittest(10000)) return 1;
  1137. if (dictionary_unittest(10000))
  1138. return 1;
  1139. if(aral_unittest(10000))
  1140. return 1;
  1141. if (rrdlabels_unittest())
  1142. return 1;
  1143. if (ctx_unittest())
  1144. return 1;
  1145. fprintf(stderr, "\n\nALL TESTS PASSED\n\n");
  1146. return 0;
  1147. }
  1148. else if(strcmp(optarg, "escapetest") == 0) {
  1149. return command_argument_sanitization_tests();
  1150. }
  1151. #ifdef ENABLE_DBENGINE
  1152. else if(strcmp(optarg, "mctest") == 0) {
  1153. unittest_running = true;
  1154. return mc_unittest();
  1155. }
  1156. else if(strcmp(optarg, "ctxtest") == 0) {
  1157. unittest_running = true;
  1158. return ctx_unittest();
  1159. }
  1160. else if(strcmp(optarg, "dicttest") == 0) {
  1161. unittest_running = true;
  1162. return dictionary_unittest(10000);
  1163. }
  1164. else if(strcmp(optarg, "araltest") == 0) {
  1165. unittest_running = true;
  1166. return aral_unittest(10000);
  1167. }
  1168. else if(strcmp(optarg, "stringtest") == 0) {
  1169. unittest_running = true;
  1170. return string_unittest(10000);
  1171. }
  1172. else if(strcmp(optarg, "rrdlabelstest") == 0) {
  1173. unittest_running = true;
  1174. return rrdlabels_unittest();
  1175. }
  1176. else if(strcmp(optarg, "metatest") == 0) {
  1177. unittest_running = true;
  1178. return metadata_unittest();
  1179. }
  1180. else if(strcmp(optarg, "pgctest") == 0) {
  1181. unittest_running = true;
  1182. return pgc_unittest();
  1183. }
  1184. else if(strcmp(optarg, "mrgtest") == 0) {
  1185. unittest_running = true;
  1186. return mrg_unittest();
  1187. }
  1188. else if(strcmp(optarg, "julytest") == 0) {
  1189. unittest_running = true;
  1190. return julytest();
  1191. }
  1192. else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) {
  1193. optarg += strlen(createdataset_string);
  1194. unsigned history_seconds = strtoul(optarg, NULL, 0);
  1195. generate_dbengine_dataset(history_seconds);
  1196. return 0;
  1197. }
  1198. else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) {
  1199. char *endptr;
  1200. unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0,
  1201. page_cache_mb = 0, disk_space_mb = 0, workers = 16;
  1202. optarg += strlen(stresstest_string);
  1203. test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0);
  1204. if (',' == *endptr)
  1205. dset_charts = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1206. if (',' == *endptr)
  1207. query_threads = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1208. if (',' == *endptr)
  1209. ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1210. if (',' == *endptr)
  1211. page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1212. if (',' == *endptr)
  1213. disk_space_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1214. if (',' == *endptr)
  1215. workers = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1216. if (workers > 1024)
  1217. workers = 1024;
  1218. char workers_str[16];
  1219. snprintf(workers_str, 15, "%u", workers);
  1220. setenv("UV_THREADPOOL_SIZE", workers_str, 1);
  1221. dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds,
  1222. page_cache_mb, disk_space_mb);
  1223. return 0;
  1224. }
  1225. #endif
  1226. else if(strcmp(optarg, "simple-pattern") == 0) {
  1227. if(optind + 2 > argc) {
  1228. fprintf(stderr, "%s", "\nUSAGE: -W simple-pattern 'pattern' 'string'\n\n"
  1229. " Checks if 'pattern' matches the given 'string'.\n"
  1230. " - 'pattern' can be one or more space separated words.\n"
  1231. " - each 'word' can contain one or more asterisks.\n"
  1232. " - words starting with '!' give negative matches.\n"
  1233. " - words are processed left to right\n"
  1234. "\n"
  1235. "Examples:\n"
  1236. "\n"
  1237. " > match all veth interfaces, except veth0:\n"
  1238. "\n"
  1239. " -W simple-pattern '!veth0 veth*' 'veth12'\n"
  1240. "\n"
  1241. "\n"
  1242. " > match all *.ext files directly in /path/:\n"
  1243. " (this will not match *.ext files in a subdir of /path/)\n"
  1244. "\n"
  1245. " -W simple-pattern '!/path/*/*.ext /path/*.ext' '/path/test.ext'\n"
  1246. "\n"
  1247. );
  1248. return 1;
  1249. }
  1250. const char *haystack = argv[optind];
  1251. const char *needle = argv[optind + 1];
  1252. size_t len = strlen(needle) + 1;
  1253. char wildcarded[len];
  1254. SIMPLE_PATTERN *p = simple_pattern_create(haystack, NULL, SIMPLE_PATTERN_EXACT);
  1255. int ret = simple_pattern_matches_extract(p, needle, wildcarded, len);
  1256. simple_pattern_free(p);
  1257. if(ret) {
  1258. fprintf(stdout, "RESULT: MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded);
  1259. return 0;
  1260. }
  1261. else {
  1262. fprintf(stdout, "RESULT: NOT MATCHED - pattern '%s' does not match '%s', wildcarded '%s'\n", haystack, needle, wildcarded);
  1263. return 1;
  1264. }
  1265. }
  1266. else if(strncmp(optarg, stacksize_string, strlen(stacksize_string)) == 0) {
  1267. optarg += strlen(stacksize_string);
  1268. config_set(CONFIG_SECTION_GLOBAL, "pthread stack size", optarg);
  1269. }
  1270. else if(strncmp(optarg, debug_flags_string, strlen(debug_flags_string)) == 0) {
  1271. optarg += strlen(debug_flags_string);
  1272. config_set(CONFIG_SECTION_LOGS, "debug flags", optarg);
  1273. debug_flags = strtoull(optarg, NULL, 0);
  1274. }
  1275. else if(strcmp(optarg, "set") == 0) {
  1276. if(optind + 3 > argc) {
  1277. fprintf(stderr, "%s", "\nUSAGE: -W set 'section' 'key' 'value'\n\n"
  1278. " Overwrites settings of netdata.conf.\n"
  1279. "\n"
  1280. " These options interact with: -c netdata.conf\n"
  1281. " If -c netdata.conf is given on the command line,\n"
  1282. " before -W set... the user may overwrite command\n"
  1283. " line parameters at netdata.conf\n"
  1284. " If -c netdata.conf is given after (or missing)\n"
  1285. " -W set... the user cannot overwrite the command line\n"
  1286. " parameters."
  1287. "\n"
  1288. );
  1289. return 1;
  1290. }
  1291. const char *section = argv[optind];
  1292. const char *key = argv[optind + 1];
  1293. const char *value = argv[optind + 2];
  1294. optind += 3;
  1295. // set this one as the default
  1296. // only if it is not already set in the config file
  1297. // so the caller can use -c netdata.conf before or
  1298. // after this parameter to prevent or allow overwriting
  1299. // variables at netdata.conf
  1300. config_set_default(section, key, value);
  1301. // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value);
  1302. }
  1303. else if(strcmp(optarg, "set2") == 0) {
  1304. if(optind + 4 > argc) {
  1305. fprintf(stderr, "%s", "\nUSAGE: -W set 'conf_file' 'section' 'key' 'value'\n\n"
  1306. " Overwrites settings of netdata.conf or cloud.conf\n"
  1307. "\n"
  1308. " These options interact with: -c netdata.conf\n"
  1309. " If -c netdata.conf is given on the command line,\n"
  1310. " before -W set... the user may overwrite command\n"
  1311. " line parameters at netdata.conf\n"
  1312. " If -c netdata.conf is given after (or missing)\n"
  1313. " -W set... the user cannot overwrite the command line\n"
  1314. " parameters."
  1315. " conf_file can be \"cloud\" or \"netdata\".\n"
  1316. "\n"
  1317. );
  1318. return 1;
  1319. }
  1320. const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */
  1321. struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config;
  1322. const char *section = argv[optind + 1];
  1323. const char *key = argv[optind + 2];
  1324. const char *value = argv[optind + 3];
  1325. optind += 4;
  1326. // set this one as the default
  1327. // only if it is not already set in the config file
  1328. // so the caller can use -c netdata.conf before or
  1329. // after this parameter to prevent or allow overwriting
  1330. // variables at netdata.conf
  1331. appconfig_set_default(tmp_config, section, key, value);
  1332. // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value);
  1333. }
  1334. else if(strcmp(optarg, "get") == 0) {
  1335. if(optind + 3 > argc) {
  1336. fprintf(stderr, "%s", "\nUSAGE: -W get 'section' 'key' 'value'\n\n"
  1337. " Prints settings of netdata.conf.\n"
  1338. "\n"
  1339. " These options interact with: -c netdata.conf\n"
  1340. " -c netdata.conf has to be given before -W get.\n"
  1341. "\n"
  1342. );
  1343. return 1;
  1344. }
  1345. if(!config_loaded) {
  1346. fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n");
  1347. load_netdata_conf(NULL, 0);
  1348. post_conf_load(&user);
  1349. }
  1350. get_netdata_configured_variables();
  1351. const char *section = argv[optind];
  1352. const char *key = argv[optind + 1];
  1353. const char *def = argv[optind + 2];
  1354. const char *value = config_get(section, key, def);
  1355. printf("%s\n", value);
  1356. return 0;
  1357. }
  1358. else if(strcmp(optarg, "get2") == 0) {
  1359. if(optind + 4 > argc) {
  1360. fprintf(stderr, "%s", "\nUSAGE: -W get2 'conf_file' 'section' 'key' 'value'\n\n"
  1361. " Prints settings of netdata.conf or cloud.conf\n"
  1362. "\n"
  1363. " These options interact with: -c netdata.conf\n"
  1364. " -c netdata.conf has to be given before -W get2.\n"
  1365. " conf_file can be \"cloud\" or \"netdata\".\n"
  1366. "\n"
  1367. );
  1368. return 1;
  1369. }
  1370. if(!config_loaded) {
  1371. fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n");
  1372. load_netdata_conf(NULL, 0);
  1373. post_conf_load(&user);
  1374. load_cloud_conf(1);
  1375. }
  1376. get_netdata_configured_variables();
  1377. const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */
  1378. struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config;
  1379. const char *section = argv[optind + 1];
  1380. const char *key = argv[optind + 2];
  1381. const char *def = argv[optind + 3];
  1382. const char *value = appconfig_get(tmp_config, section, key, def);
  1383. printf("%s\n", value);
  1384. return 0;
  1385. }
  1386. else if(strncmp(optarg, claim_string, strlen(claim_string)) == 0) {
  1387. /* will trigger a claiming attempt when the agent is initialized */
  1388. claiming_pending_arguments = optarg + strlen(claim_string);
  1389. }
  1390. else if(strcmp(optarg, "buildinfo") == 0) {
  1391. printf("Version: %s %s\n", program_name, program_version);
  1392. print_build_info();
  1393. return 0;
  1394. }
  1395. else if(strcmp(optarg, "buildinfojson") == 0) {
  1396. print_build_info_json();
  1397. return 0;
  1398. }
  1399. else if(strcmp(optarg, "keepopenfds") == 0) {
  1400. // Internal dev option to skip closing inherited
  1401. // open FDs. Useful, when we want to run the agent
  1402. // under profiling tools that open/maintain their
  1403. // own FDs.
  1404. close_open_fds = false;
  1405. } else {
  1406. fprintf(stderr, "Unknown -W parameter '%s'\n", optarg);
  1407. return help(1);
  1408. }
  1409. }
  1410. break;
  1411. default: /* ? */
  1412. fprintf(stderr, "Unknown parameter '%c'\n", opt);
  1413. return help(1);
  1414. }
  1415. }
  1416. }
  1417. #ifdef _SC_OPEN_MAX
  1418. if (close_open_fds == true) {
  1419. // close all open file descriptors, except the standard ones
  1420. // the caller may have left open files (lxc-attach has this issue)
  1421. for(int fd = (int) (sysconf(_SC_OPEN_MAX) - 1); fd > 2; fd--)
  1422. if(fd_is_valid(fd))
  1423. close(fd);
  1424. }
  1425. #endif
  1426. if(!config_loaded) {
  1427. load_netdata_conf(NULL, 0);
  1428. post_conf_load(&user);
  1429. load_cloud_conf(0);
  1430. }
  1431. char *nd_disable_cloud = getenv("NETDATA_DISABLE_CLOUD");
  1432. if (nd_disable_cloud && !strncmp(nd_disable_cloud, "1", 1)) {
  1433. appconfig_set(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", "false");
  1434. }
  1435. // ------------------------------------------------------------------------
  1436. // initialize netdata
  1437. {
  1438. char *pmax = config_get(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for plugins", "1");
  1439. if(pmax && *pmax)
  1440. setenv("MALLOC_ARENA_MAX", pmax, 1);
  1441. #if defined(HAVE_C_MALLOPT)
  1442. i = (int)config_get_number(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for netdata", 1);
  1443. if(i > 0)
  1444. mallopt(M_ARENA_MAX, 1);
  1445. #ifdef NETDATA_INTERNAL_CHECKS
  1446. mallopt(M_PERTURB, 0x5A);
  1447. // mallopt(M_MXFAST, 0);
  1448. #endif
  1449. #endif
  1450. // set libuv worker threads
  1451. libuv_worker_threads = get_system_cpus() * 2;
  1452. if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS)
  1453. libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  1454. if(libuv_worker_threads > MAX_LIBUV_WORKER_THREADS)
  1455. libuv_worker_threads = MAX_LIBUV_WORKER_THREADS;
  1456. libuv_worker_threads = config_get_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads);
  1457. if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS) {
  1458. libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  1459. config_set_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads);
  1460. }
  1461. {
  1462. char buf[20 + 1];
  1463. snprintfz(buf, 20, "%d", libuv_worker_threads);
  1464. setenv("UV_THREADPOOL_SIZE", buf, 1);
  1465. }
  1466. // prepare configuration environment variables for the plugins
  1467. get_netdata_configured_variables();
  1468. set_global_environment();
  1469. // work while we are cd into config_dir
  1470. // to allow the plugins refer to their config
  1471. // files using relative filenames
  1472. if(chdir(netdata_configured_user_config_dir) == -1)
  1473. fatal("Cannot cd to '%s'", netdata_configured_user_config_dir);
  1474. // Get execution path before switching user to avoid permission issues
  1475. get_netdata_execution_path();
  1476. }
  1477. {
  1478. // --------------------------------------------------------------------
  1479. // get the debugging flags from the configuration file
  1480. char *flags = config_get(CONFIG_SECTION_LOGS, "debug flags", "0x0000000000000000");
  1481. setenv("NETDATA_DEBUG_FLAGS", flags, 1);
  1482. debug_flags = strtoull(flags, NULL, 0);
  1483. debug(D_OPTIONS, "Debug flags set to '0x%" PRIX64 "'.", debug_flags);
  1484. if(debug_flags != 0) {
  1485. struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
  1486. if(setrlimit(RLIMIT_CORE, &rl) != 0)
  1487. error("Cannot request unlimited core dumps for debugging... Proceeding anyway...");
  1488. #ifdef HAVE_SYS_PRCTL_H
  1489. prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
  1490. #endif
  1491. }
  1492. // --------------------------------------------------------------------
  1493. // get log filenames and settings
  1494. log_init();
  1495. error_log_limit_unlimited();
  1496. // initialize the log files
  1497. open_all_log_files();
  1498. get_system_timezone();
  1499. // --------------------------------------------------------------------
  1500. // get the certificate and start security
  1501. #ifdef ENABLE_HTTPS
  1502. security_init();
  1503. #endif
  1504. // --------------------------------------------------------------------
  1505. // This is the safest place to start the SILENCERS structure
  1506. set_silencers_filename();
  1507. health_initialize_global_silencers();
  1508. // --------------------------------------------------------------------
  1509. // Initialize ML configuration
  1510. delta_startup_time("initialize ML");
  1511. ml_init();
  1512. // --------------------------------------------------------------------
  1513. // setup process signals
  1514. // block signals while initializing threads.
  1515. // this causes the threads to block signals.
  1516. delta_startup_time("initialize signals");
  1517. signals_block();
  1518. signals_init(); // setup the signals we want to use
  1519. // --------------------------------------------------------------------
  1520. // check which threads are enabled and initialize them
  1521. delta_startup_time("initialize static threads");
  1522. // setup threads configs
  1523. default_stacksize = netdata_threads_init();
  1524. for (i = 0; static_threads[i].name != NULL ; i++) {
  1525. struct netdata_static_thread *st = &static_threads[i];
  1526. if(st->config_name)
  1527. st->enabled = config_get_boolean(st->config_section, st->config_name, st->enabled);
  1528. if(st->enabled && st->init_routine)
  1529. st->init_routine();
  1530. if(st->env_name)
  1531. setenv(st->env_name, st->enabled?"YES":"NO", 1);
  1532. if(st->global_variable)
  1533. *st->global_variable = (st->enabled) ? true : false;
  1534. }
  1535. // --------------------------------------------------------------------
  1536. // create the listening sockets
  1537. delta_startup_time("initialize web server");
  1538. web_client_api_v1_init();
  1539. web_server_threading_selection();
  1540. if(web_server_mode != WEB_SERVER_MODE_NONE)
  1541. api_listen_sockets_setup();
  1542. }
  1543. delta_startup_time("set resource limits");
  1544. #ifdef NETDATA_INTERNAL_CHECKS
  1545. if(debug_flags != 0) {
  1546. struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
  1547. if(setrlimit(RLIMIT_CORE, &rl) != 0)
  1548. error("Cannot request unlimited core dumps for debugging... Proceeding anyway...");
  1549. #ifdef HAVE_SYS_PRCTL_H
  1550. prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
  1551. #endif
  1552. }
  1553. #endif /* NETDATA_INTERNAL_CHECKS */
  1554. // get the max file limit
  1555. if(getrlimit(RLIMIT_NOFILE, &rlimit_nofile) != 0)
  1556. error("getrlimit(RLIMIT_NOFILE) failed");
  1557. else
  1558. info("resources control: allowed file descriptors: soft = %zu, max = %zu", (size_t)rlimit_nofile.rlim_cur, (size_t)rlimit_nofile.rlim_max);
  1559. delta_startup_time("become daemon");
  1560. // fork, switch user, create pid file, set process priority
  1561. if(become_daemon(dont_fork, user) == -1)
  1562. fatal("Cannot daemonize myself.");
  1563. info("netdata started on pid %d.", getpid());
  1564. delta_startup_time("initialize threads after fork");
  1565. netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize));
  1566. // initialize internal registry
  1567. delta_startup_time("initialize registry");
  1568. registry_init();
  1569. // fork the spawn server
  1570. delta_startup_time("fork the spawn server");
  1571. spawn_init();
  1572. /*
  1573. * Libuv uv_spawn() uses SIGCHLD internally:
  1574. * https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485
  1575. * and inadvertently replaces the netdata signal handler which was setup during initialization.
  1576. * Thusly, we must explicitly restore the signal handler for SIGCHLD.
  1577. * Warning: extreme care is needed when mixing and matching POSIX and libuv.
  1578. */
  1579. signals_restore_SIGCHLD();
  1580. // ------------------------------------------------------------------------
  1581. // initialize rrd, registry, health, rrdpush, etc.
  1582. delta_startup_time("collecting system info");
  1583. netdata_anonymous_statistics_enabled=-1;
  1584. struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info));
  1585. get_system_info(system_info);
  1586. system_info->hops = 0;
  1587. get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist);
  1588. delta_startup_time("initialize RRD structures");
  1589. if(rrd_init(netdata_configured_hostname, system_info, false))
  1590. fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname);
  1591. delta_startup_time("check for incomplete shutdown");
  1592. char agent_crash_file[FILENAME_MAX + 1];
  1593. char agent_incomplete_shutdown_file[FILENAME_MAX + 1];
  1594. snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir);
  1595. int incomplete_shutdown_detected = (unlink(agent_incomplete_shutdown_file) == 0);
  1596. snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir);
  1597. int crash_detected = (unlink(agent_crash_file) == 0);
  1598. int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC, 444);
  1599. if (fd >= 0)
  1600. close(fd);
  1601. // ------------------------------------------------------------------------
  1602. // Claim netdata agent to a cloud endpoint
  1603. delta_startup_time("collect claiming info");
  1604. if (claiming_pending_arguments)
  1605. claim_agent(claiming_pending_arguments);
  1606. load_claiming_state();
  1607. // ------------------------------------------------------------------------
  1608. // enable log flood protection
  1609. error_log_limit_reset();
  1610. // Load host labels
  1611. delta_startup_time("collect host labels");
  1612. reload_host_labels();
  1613. // ------------------------------------------------------------------------
  1614. // spawn the threads
  1615. delta_startup_time("start the static threads");
  1616. web_server_config_options();
  1617. netdata_zero_metrics_enabled = config_get_boolean_ondemand(CONFIG_SECTION_DB, "enable zero metrics", CONFIG_BOOLEAN_NO);
  1618. set_late_global_environment();
  1619. for (i = 0; static_threads[i].name != NULL ; i++) {
  1620. struct netdata_static_thread *st = &static_threads[i];
  1621. if(st->enabled) {
  1622. st->thread = mallocz(sizeof(netdata_thread_t));
  1623. debug(D_SYSTEM, "Starting thread %s.", st->name);
  1624. netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st);
  1625. }
  1626. else debug(D_SYSTEM, "Not starting thread %s.", st->name);
  1627. }
  1628. // ------------------------------------------------------------------------
  1629. // Initialize netdata agent command serving from cli and signals
  1630. delta_startup_time("initialize commands API");
  1631. commands_init();
  1632. delta_startup_time("ready");
  1633. usec_t ready_ut = now_monotonic_usec();
  1634. info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS);
  1635. netdata_ready = 1;
  1636. send_statistics("START", "-", "-");
  1637. if (crash_detected)
  1638. send_statistics("CRASH", "-", "-");
  1639. if (incomplete_shutdown_detected)
  1640. send_statistics("INCOMPLETE_SHUTDOWN", "-", "-");
  1641. //check if ANALYTICS needs to start
  1642. if (netdata_anonymous_statistics_enabled == 1) {
  1643. for (i = 0; static_threads[i].name != NULL; i++) {
  1644. if (!strncmp(static_threads[i].name, "ANALYTICS", 9)) {
  1645. struct netdata_static_thread *st = &static_threads[i];
  1646. st->thread = mallocz(sizeof(netdata_thread_t));
  1647. st->enabled = 1;
  1648. debug(D_SYSTEM, "Starting thread %s.", st->name);
  1649. netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st);
  1650. }
  1651. }
  1652. }
  1653. // ------------------------------------------------------------------------
  1654. // Report ACLK build failure
  1655. #ifndef ENABLE_ACLK
  1656. error("This agent doesn't have ACLK.");
  1657. char filename[FILENAME_MAX + 1];
  1658. snprintfz(filename, FILENAME_MAX, "%s/.aclk_report_sent", netdata_configured_varlib_dir);
  1659. if (netdata_anonymous_statistics_enabled > 0 && access(filename, F_OK)) { // -1 -> not initialized
  1660. send_statistics("ACLK_DISABLED", "-", "-");
  1661. int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 444);
  1662. if (fd == -1)
  1663. error("Cannot create file '%s'. Please fix this.", filename);
  1664. else
  1665. close(fd);
  1666. }
  1667. #endif
  1668. // ------------------------------------------------------------------------
  1669. // unblock signals
  1670. signals_unblock();
  1671. // ------------------------------------------------------------------------
  1672. // Handle signals
  1673. signals_handle();
  1674. // should never reach this point
  1675. // but we need it for rpmlint #2752
  1676. return 1;
  1677. }