main.c 94 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "common.h"
  3. #include "buildinfo.h"
  4. #include "static_threads.h"
  5. #include "database/engine/page_test.h"
  6. #if defined(ENV32BIT)
  7. #warning COMPILING 32BIT NETDATA
  8. #endif
  9. bool unittest_running = false;
  10. int netdata_zero_metrics_enabled;
  11. int netdata_anonymous_statistics_enabled;
  12. int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  13. bool ieee754_doubles = false;
  14. time_t netdata_start_time = 0;
  15. struct netdata_static_thread *static_threads;
  16. struct config netdata_config = {
  17. .first_section = NULL,
  18. .last_section = NULL,
  19. .mutex = NETDATA_MUTEX_INITIALIZER,
  20. .index = {
  21. .avl_tree = {
  22. .root = NULL,
  23. .compar = appconfig_section_compare
  24. },
  25. .rwlock = AVL_LOCK_INITIALIZER
  26. }
  27. };
  28. typedef struct service_thread {
  29. pid_t tid;
  30. SERVICE_THREAD_TYPE type;
  31. SERVICE_TYPE services;
  32. char name[NETDATA_THREAD_NAME_MAX + 1];
  33. bool stop_immediately;
  34. bool cancelled;
  35. union {
  36. netdata_thread_t netdata_thread;
  37. uv_thread_t uv_thread;
  38. };
  39. force_quit_t force_quit_callback;
  40. request_quit_t request_quit_callback;
  41. void *data;
  42. } SERVICE_THREAD;
  43. struct service_globals {
  44. SPINLOCK lock;
  45. Pvoid_t pid_judy;
  46. } service_globals = {
  47. .pid_judy = NULL,
  48. };
  49. SERVICE_THREAD *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused) {
  50. SERVICE_THREAD *sth = NULL;
  51. pid_t tid = gettid();
  52. spinlock_lock(&service_globals.lock);
  53. Pvoid_t *PValue = JudyLIns(&service_globals.pid_judy, tid, PJE0);
  54. if(!*PValue) {
  55. sth = callocz(1, sizeof(SERVICE_THREAD));
  56. sth->tid = tid;
  57. sth->type = thread_type;
  58. sth->request_quit_callback = request_quit_callback;
  59. sth->force_quit_callback = force_quit_callback;
  60. sth->data = data;
  61. os_thread_get_current_name_np(sth->name);
  62. *PValue = sth;
  63. switch(thread_type) {
  64. default:
  65. case SERVICE_THREAD_TYPE_NETDATA:
  66. sth->netdata_thread = netdata_thread_self();
  67. break;
  68. case SERVICE_THREAD_TYPE_EVENT_LOOP:
  69. case SERVICE_THREAD_TYPE_LIBUV:
  70. sth->uv_thread = uv_thread_self();
  71. break;
  72. }
  73. }
  74. else {
  75. sth = *PValue;
  76. }
  77. spinlock_unlock(&service_globals.lock);
  78. return sth;
  79. }
  80. void service_exits(void) {
  81. pid_t tid = gettid();
  82. spinlock_lock(&service_globals.lock);
  83. Pvoid_t *PValue = JudyLGet(service_globals.pid_judy, tid, PJE0);
  84. if(PValue) {
  85. freez(*PValue);
  86. JudyLDel(&service_globals.pid_judy, tid, PJE0);
  87. }
  88. spinlock_unlock(&service_globals.lock);
  89. }
  90. bool service_running(SERVICE_TYPE service) {
  91. static __thread SERVICE_THREAD *sth = NULL;
  92. if(unlikely(!sth))
  93. sth = service_register(SERVICE_THREAD_TYPE_NETDATA, NULL, NULL, NULL, false);
  94. sth->services |= service;
  95. return !(sth->stop_immediately || netdata_exit);
  96. }
  97. void service_signal_exit(SERVICE_TYPE service) {
  98. spinlock_lock(&service_globals.lock);
  99. Pvoid_t *PValue;
  100. Word_t tid = 0;
  101. bool first = true;
  102. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  103. SERVICE_THREAD *sth = *PValue;
  104. if((sth->services & service)) {
  105. sth->stop_immediately = true;
  106. if(sth->request_quit_callback) {
  107. spinlock_unlock(&service_globals.lock);
  108. sth->request_quit_callback(sth->data);
  109. spinlock_lock(&service_globals.lock);
  110. }
  111. }
  112. }
  113. spinlock_unlock(&service_globals.lock);
  114. }
  115. static void service_to_buffer(BUFFER *wb, SERVICE_TYPE service) {
  116. if(service & SERVICE_MAINTENANCE)
  117. buffer_strcat(wb, "MAINTENANCE ");
  118. if(service & SERVICE_COLLECTORS)
  119. buffer_strcat(wb, "COLLECTORS ");
  120. if(service & SERVICE_REPLICATION)
  121. buffer_strcat(wb, "REPLICATION ");
  122. if(service & ABILITY_DATA_QUERIES)
  123. buffer_strcat(wb, "DATA_QUERIES ");
  124. if(service & ABILITY_WEB_REQUESTS)
  125. buffer_strcat(wb, "WEB_REQUESTS ");
  126. if(service & SERVICE_WEB_SERVER)
  127. buffer_strcat(wb, "WEB_SERVER ");
  128. if(service & SERVICE_ACLK)
  129. buffer_strcat(wb, "ACLK ");
  130. if(service & SERVICE_HEALTH)
  131. buffer_strcat(wb, "HEALTH ");
  132. if(service & SERVICE_STREAMING)
  133. buffer_strcat(wb, "STREAMING ");
  134. if(service & ABILITY_STREAMING_CONNECTIONS)
  135. buffer_strcat(wb, "STREAMING_CONNECTIONS ");
  136. if(service & SERVICE_CONTEXT)
  137. buffer_strcat(wb, "CONTEXT ");
  138. if(service & SERVICE_ANALYTICS)
  139. buffer_strcat(wb, "ANALYTICS ");
  140. if(service & SERVICE_EXPORTERS)
  141. buffer_strcat(wb, "EXPORTERS ");
  142. if(service & SERVICE_HTTPD)
  143. buffer_strcat(wb, "HTTPD ");
  144. }
  145. static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) {
  146. BUFFER *service_list = buffer_create(1024, NULL);
  147. BUFFER *thread_list = buffer_create(1024, NULL);
  148. usec_t started_ut = now_monotonic_usec(), ended_ut;
  149. size_t running;
  150. SERVICE_TYPE running_services = 0;
  151. // cancel the threads
  152. running = 0;
  153. running_services = 0;
  154. {
  155. buffer_flush(thread_list);
  156. spinlock_lock(&service_globals.lock);
  157. Pvoid_t *PValue;
  158. Word_t tid = 0;
  159. bool first = true;
  160. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  161. SERVICE_THREAD *sth = *PValue;
  162. if(sth->services & service && sth->tid != gettid() && !sth->cancelled) {
  163. sth->cancelled = true;
  164. switch(sth->type) {
  165. default:
  166. case SERVICE_THREAD_TYPE_NETDATA:
  167. netdata_thread_cancel(sth->netdata_thread);
  168. break;
  169. case SERVICE_THREAD_TYPE_EVENT_LOOP:
  170. case SERVICE_THREAD_TYPE_LIBUV:
  171. break;
  172. }
  173. if(running)
  174. buffer_strcat(thread_list, ", ");
  175. buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid);
  176. running++;
  177. running_services |= sth->services & service;
  178. if(sth->force_quit_callback) {
  179. spinlock_unlock(&service_globals.lock);
  180. sth->force_quit_callback(sth->data);
  181. spinlock_lock(&service_globals.lock);
  182. continue;
  183. }
  184. }
  185. }
  186. spinlock_unlock(&service_globals.lock);
  187. }
  188. service_signal_exit(service);
  189. // signal them to stop
  190. size_t last_running = 0;
  191. size_t stale_time_ut = 0;
  192. usec_t sleep_ut = 50 * USEC_PER_MS;
  193. size_t log_countdown_ut = sleep_ut;
  194. do {
  195. if(running != last_running)
  196. stale_time_ut = 0;
  197. last_running = running;
  198. running = 0;
  199. running_services = 0;
  200. buffer_flush(thread_list);
  201. spinlock_lock(&service_globals.lock);
  202. Pvoid_t *PValue;
  203. Word_t tid = 0;
  204. bool first = true;
  205. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  206. SERVICE_THREAD *sth = *PValue;
  207. if(sth->services & service && sth->tid != gettid()) {
  208. if(running)
  209. buffer_strcat(thread_list, ", ");
  210. buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid);
  211. running_services |= sth->services & service;
  212. running++;
  213. }
  214. }
  215. spinlock_unlock(&service_globals.lock);
  216. if(running) {
  217. log_countdown_ut -= (log_countdown_ut >= sleep_ut) ? sleep_ut : log_countdown_ut;
  218. if(log_countdown_ut == 0 || running != last_running) {
  219. log_countdown_ut = 20 * sleep_ut;
  220. buffer_flush(service_list);
  221. service_to_buffer(service_list, running_services);
  222. netdata_log_info("SERVICE CONTROL: waiting for the following %zu services [ %s] to exit: %s",
  223. running, buffer_tostring(service_list),
  224. running <= 10 ? buffer_tostring(thread_list) : "");
  225. }
  226. sleep_usec(sleep_ut);
  227. stale_time_ut += sleep_ut;
  228. }
  229. ended_ut = now_monotonic_usec();
  230. } while(running && (ended_ut - started_ut < timeout_ut || stale_time_ut < timeout_ut));
  231. if(running) {
  232. buffer_flush(service_list);
  233. service_to_buffer(service_list, running_services);
  234. netdata_log_info("SERVICE CONTROL: "
  235. "the following %zu service(s) [ %s] take too long to exit: %s; "
  236. "giving up on them...",
  237. running, buffer_tostring(service_list),
  238. buffer_tostring(thread_list));
  239. }
  240. buffer_free(thread_list);
  241. buffer_free(service_list);
  242. return (running == 0);
  243. }
  244. #define delta_shutdown_time(msg) \
  245. do { \
  246. usec_t now_ut = now_monotonic_usec(); \
  247. if(prev_msg) \
  248. netdata_log_info("NETDATA SHUTDOWN: in %7llu ms, %s%s - next: %s", (now_ut - last_ut) / USEC_PER_MS, (timeout)?"(TIMEOUT) ":"", prev_msg, msg); \
  249. else \
  250. netdata_log_info("NETDATA SHUTDOWN: next: %s", msg); \
  251. last_ut = now_ut; \
  252. prev_msg = msg; \
  253. timeout = false; \
  254. } while(0)
  255. void web_client_cache_destroy(void);
  256. void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) {
  257. usec_t started_ut = now_monotonic_usec();
  258. usec_t last_ut = started_ut;
  259. const char *prev_msg = NULL;
  260. bool timeout = false;
  261. nd_log_limits_unlimited();
  262. netdata_log_info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret);
  263. // send the stat from our caller
  264. analytics_statistic_t statistic = { action, action_result, action_data };
  265. analytics_statistic_send(&statistic);
  266. // notify we are exiting
  267. statistic = (analytics_statistic_t) {"EXIT", ret?"ERROR":"OK","-"};
  268. analytics_statistic_send(&statistic);
  269. delta_shutdown_time("create shutdown file");
  270. char agent_crash_file[FILENAME_MAX + 1];
  271. char agent_incomplete_shutdown_file[FILENAME_MAX + 1];
  272. snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir);
  273. snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir);
  274. (void) rename(agent_crash_file, agent_incomplete_shutdown_file);
  275. #ifdef ENABLE_DBENGINE
  276. if(dbengine_enabled) {
  277. delta_shutdown_time("dbengine exit mode");
  278. for (size_t tier = 0; tier < storage_tiers; tier++)
  279. rrdeng_exit_mode(multidb_ctx[tier]);
  280. }
  281. #endif
  282. delta_shutdown_time("close webrtc connections");
  283. webrtc_close_all_connections();
  284. delta_shutdown_time("disable maintenance, new queries, new web requests, new streaming connections and aclk");
  285. service_signal_exit(
  286. SERVICE_MAINTENANCE
  287. | ABILITY_DATA_QUERIES
  288. | ABILITY_WEB_REQUESTS
  289. | ABILITY_STREAMING_CONNECTIONS
  290. | SERVICE_ACLK
  291. | SERVICE_ACLKSYNC
  292. );
  293. delta_shutdown_time("stop replication, exporters, health and web servers threads");
  294. timeout = !service_wait_exit(
  295. SERVICE_EXPORTERS
  296. | SERVICE_HEALTH
  297. | SERVICE_WEB_SERVER
  298. | SERVICE_HTTPD
  299. , 3 * USEC_PER_SEC);
  300. delta_shutdown_time("stop collectors and streaming threads");
  301. timeout = !service_wait_exit(
  302. SERVICE_COLLECTORS
  303. | SERVICE_STREAMING
  304. , 3 * USEC_PER_SEC);
  305. delta_shutdown_time("stop replication threads");
  306. timeout = !service_wait_exit(
  307. SERVICE_REPLICATION // replication has to be stopped after STREAMING, because it cleans up ARAL
  308. , 3 * USEC_PER_SEC);
  309. delta_shutdown_time("prepare metasync shutdown");
  310. metadata_sync_shutdown_prepare();
  311. delta_shutdown_time("disable ML detection and training threads");
  312. ml_stop_threads();
  313. ml_fini();
  314. delta_shutdown_time("stop context thread");
  315. timeout = !service_wait_exit(
  316. SERVICE_CONTEXT
  317. , 3 * USEC_PER_SEC);
  318. delta_shutdown_time("stop maintenance thread");
  319. timeout = !service_wait_exit(
  320. SERVICE_MAINTENANCE
  321. , 3 * USEC_PER_SEC);
  322. delta_shutdown_time("clear web client cache");
  323. web_client_cache_destroy();
  324. delta_shutdown_time("stop aclk threads");
  325. timeout = !service_wait_exit(
  326. SERVICE_ACLK
  327. , 3 * USEC_PER_SEC);
  328. delta_shutdown_time("stop all remaining worker threads");
  329. timeout = !service_wait_exit(~0, 10 * USEC_PER_SEC);
  330. delta_shutdown_time("cancel main threads");
  331. cancel_main_threads();
  332. if(!ret) {
  333. // exit cleanly
  334. #ifdef ENABLE_DBENGINE
  335. if(dbengine_enabled) {
  336. delta_shutdown_time("flush dbengine tiers");
  337. for (size_t tier = 0; tier < storage_tiers; tier++)
  338. rrdeng_prepare_exit(multidb_ctx[tier]);
  339. for (size_t tier = 0; tier < storage_tiers; tier++) {
  340. if (!multidb_ctx[tier])
  341. continue;
  342. completion_wait_for(&multidb_ctx[tier]->quiesce.completion);
  343. completion_destroy(&multidb_ctx[tier]->quiesce.completion);
  344. }
  345. }
  346. #endif
  347. // free the database
  348. delta_shutdown_time("stop collection for all hosts");
  349. // rrdhost_free_all();
  350. rrd_finalize_collection_for_all_hosts();
  351. delta_shutdown_time("stop metasync threads");
  352. metadata_sync_shutdown();
  353. #ifdef ENABLE_DBENGINE
  354. if(dbengine_enabled) {
  355. delta_shutdown_time("wait for dbengine collectors to finish");
  356. size_t running = 1;
  357. size_t count = 10;
  358. while(running && count) {
  359. running = 0;
  360. for (size_t tier = 0; tier < storage_tiers; tier++)
  361. running += rrdeng_collectors_running(multidb_ctx[tier]);
  362. if(running) {
  363. nd_log_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS);
  364. nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
  365. "waiting for %zu collectors to finish", running);
  366. // sleep_usec(100 * USEC_PER_MS);
  367. cleanup_destroyed_dictionaries();
  368. }
  369. count--;
  370. }
  371. delta_shutdown_time("wait for dbengine main cache to finish flushing");
  372. while (pgc_hot_and_dirty_entries(main_cache)) {
  373. pgc_flush_all_hot_and_dirty_pages(main_cache, PGC_SECTION_ALL);
  374. sleep_usec(100 * USEC_PER_MS);
  375. }
  376. delta_shutdown_time("stop dbengine tiers");
  377. for (size_t tier = 0; tier < storage_tiers; tier++)
  378. rrdeng_exit(multidb_ctx[tier]);
  379. rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL);
  380. }
  381. #endif
  382. }
  383. delta_shutdown_time("close SQL context db");
  384. sql_close_context_database();
  385. delta_shutdown_time("closed SQL main db");
  386. sql_close_database();
  387. // unlink the pid
  388. if(pidfile[0]) {
  389. delta_shutdown_time("remove pid file");
  390. if(unlink(pidfile) != 0)
  391. netdata_log_error("EXIT: cannot unlink pidfile '%s'.", pidfile);
  392. }
  393. #ifdef ENABLE_HTTPS
  394. delta_shutdown_time("free openssl structures");
  395. netdata_ssl_cleanup();
  396. #endif
  397. delta_shutdown_time("remove incomplete shutdown file");
  398. (void) unlink(agent_incomplete_shutdown_file);
  399. delta_shutdown_time("exit");
  400. usec_t ended_ut = now_monotonic_usec();
  401. netdata_log_info("NETDATA SHUTDOWN: completed in %llu ms - netdata is now exiting - bye bye...", (ended_ut - started_ut) / USEC_PER_MS);
  402. exit(ret);
  403. }
  404. void web_server_threading_selection(void) {
  405. web_server_mode = web_server_mode_id(config_get(CONFIG_SECTION_WEB, "mode", web_server_mode_name(web_server_mode)));
  406. int static_threaded = (web_server_mode == WEB_SERVER_MODE_STATIC_THREADED);
  407. int i;
  408. for (i = 0; static_threads[i].name; i++) {
  409. if (static_threads[i].start_routine == socket_listen_main_static_threaded)
  410. static_threads[i].enabled = static_threaded;
  411. }
  412. }
  413. int make_dns_decision(const char *section_name, const char *config_name, const char *default_value, SIMPLE_PATTERN *p)
  414. {
  415. char *value = config_get(section_name,config_name,default_value);
  416. if(!strcmp("yes",value))
  417. return 1;
  418. if(!strcmp("no",value))
  419. return 0;
  420. if(strcmp("heuristic",value))
  421. netdata_log_error("Invalid configuration option '%s' for '%s'/'%s'. Valid options are 'yes', 'no' and 'heuristic'. Proceeding with 'heuristic'",
  422. value, section_name, config_name);
  423. return simple_pattern_is_potential_name(p);
  424. }
  425. void web_server_config_options(void)
  426. {
  427. web_client_timeout =
  428. (int)config_get_number(CONFIG_SECTION_WEB, "disconnect idle clients after seconds", web_client_timeout);
  429. web_client_first_request_timeout =
  430. (int)config_get_number(CONFIG_SECTION_WEB, "timeout for first request", web_client_first_request_timeout);
  431. web_client_streaming_rate_t =
  432. config_get_number(CONFIG_SECTION_WEB, "accept a streaming request every seconds", web_client_streaming_rate_t);
  433. respect_web_browser_do_not_track_policy =
  434. config_get_boolean(CONFIG_SECTION_WEB, "respect do not track policy", respect_web_browser_do_not_track_policy);
  435. web_x_frame_options = config_get(CONFIG_SECTION_WEB, "x-frame-options response header", "");
  436. if(!*web_x_frame_options)
  437. web_x_frame_options = NULL;
  438. web_allow_connections_from =
  439. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow connections from", "localhost *"),
  440. NULL, SIMPLE_PATTERN_EXACT, true);
  441. web_allow_connections_dns =
  442. make_dns_decision(CONFIG_SECTION_WEB, "allow connections by dns", "heuristic", web_allow_connections_from);
  443. web_allow_dashboard_from =
  444. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow dashboard from", "localhost *"),
  445. NULL, SIMPLE_PATTERN_EXACT, true);
  446. web_allow_dashboard_dns =
  447. make_dns_decision(CONFIG_SECTION_WEB, "allow dashboard by dns", "heuristic", web_allow_dashboard_from);
  448. web_allow_badges_from =
  449. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow badges from", "*"), NULL, SIMPLE_PATTERN_EXACT,
  450. true);
  451. web_allow_badges_dns =
  452. make_dns_decision(CONFIG_SECTION_WEB, "allow badges by dns", "heuristic", web_allow_badges_from);
  453. web_allow_registry_from =
  454. simple_pattern_create(config_get(CONFIG_SECTION_REGISTRY, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT,
  455. true);
  456. web_allow_registry_dns = make_dns_decision(CONFIG_SECTION_REGISTRY, "allow by dns", "heuristic",
  457. web_allow_registry_from);
  458. web_allow_streaming_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow streaming from", "*"),
  459. NULL, SIMPLE_PATTERN_EXACT, true);
  460. web_allow_streaming_dns = make_dns_decision(CONFIG_SECTION_WEB, "allow streaming by dns", "heuristic",
  461. web_allow_streaming_from);
  462. // Note the default is not heuristic, the wildcards could match DNS but the intent is ip-addresses.
  463. web_allow_netdataconf_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow netdata.conf from",
  464. "localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.*"
  465. " 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.*"
  466. " 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.*"
  467. " 172.31.* UNKNOWN"), NULL, SIMPLE_PATTERN_EXACT,
  468. true);
  469. web_allow_netdataconf_dns =
  470. make_dns_decision(CONFIG_SECTION_WEB, "allow netdata.conf by dns", "no", web_allow_netdataconf_from);
  471. web_allow_mgmt_from =
  472. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow management from", "localhost"),
  473. NULL, SIMPLE_PATTERN_EXACT, true);
  474. web_allow_mgmt_dns =
  475. make_dns_decision(CONFIG_SECTION_WEB, "allow management by dns","heuristic",web_allow_mgmt_from);
  476. web_enable_gzip = config_get_boolean(CONFIG_SECTION_WEB, "enable gzip compression", web_enable_gzip);
  477. char *s = config_get(CONFIG_SECTION_WEB, "gzip compression strategy", "default");
  478. if(!strcmp(s, "default"))
  479. web_gzip_strategy = Z_DEFAULT_STRATEGY;
  480. else if(!strcmp(s, "filtered"))
  481. web_gzip_strategy = Z_FILTERED;
  482. else if(!strcmp(s, "huffman only"))
  483. web_gzip_strategy = Z_HUFFMAN_ONLY;
  484. else if(!strcmp(s, "rle"))
  485. web_gzip_strategy = Z_RLE;
  486. else if(!strcmp(s, "fixed"))
  487. web_gzip_strategy = Z_FIXED;
  488. else {
  489. netdata_log_error("Invalid compression strategy '%s'. Valid strategies are 'default', 'filtered', 'huffman only', 'rle' and 'fixed'. Proceeding with 'default'.", s);
  490. web_gzip_strategy = Z_DEFAULT_STRATEGY;
  491. }
  492. web_gzip_level = (int)config_get_number(CONFIG_SECTION_WEB, "gzip compression level", 3);
  493. if(web_gzip_level < 1) {
  494. netdata_log_error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 1 (fastest compression).", web_gzip_level);
  495. web_gzip_level = 1;
  496. }
  497. else if(web_gzip_level > 9) {
  498. netdata_log_error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 9 (best compression).", web_gzip_level);
  499. web_gzip_level = 9;
  500. }
  501. }
  502. // killpid kills pid with SIGTERM.
  503. int killpid(pid_t pid) {
  504. int ret;
  505. netdata_log_debug(D_EXIT, "Request to kill pid %d", pid);
  506. int signal = SIGTERM;
  507. //#ifdef NETDATA_INTERNAL_CHECKS
  508. // if(service_running(SERVICE_COLLECTORS))
  509. // signal = SIGABRT;
  510. //#endif
  511. errno = 0;
  512. ret = kill(pid, signal);
  513. if (ret == -1) {
  514. switch(errno) {
  515. case ESRCH:
  516. // We wanted the process to exit so just let the caller handle.
  517. return ret;
  518. case EPERM:
  519. netdata_log_error("Cannot kill pid %d, but I do not have enough permissions.", pid);
  520. break;
  521. default:
  522. netdata_log_error("Cannot kill pid %d, but I received an error.", pid);
  523. break;
  524. }
  525. }
  526. return ret;
  527. }
  528. static void set_nofile_limit(struct rlimit *rl) {
  529. // get the num files allowed
  530. if(getrlimit(RLIMIT_NOFILE, rl) != 0) {
  531. netdata_log_error("getrlimit(RLIMIT_NOFILE) failed");
  532. return;
  533. }
  534. netdata_log_info("resources control: allowed file descriptors: soft = %zu, max = %zu",
  535. (size_t) rl->rlim_cur, (size_t) rl->rlim_max);
  536. // make the soft/hard limits equal
  537. rl->rlim_cur = rl->rlim_max;
  538. if (setrlimit(RLIMIT_NOFILE, rl) != 0) {
  539. netdata_log_error("setrlimit(RLIMIT_NOFILE, { %zu, %zu }) failed", (size_t)rl->rlim_cur, (size_t)rl->rlim_max);
  540. }
  541. // sanity check to make sure we have enough file descriptors available to open
  542. if (getrlimit(RLIMIT_NOFILE, rl) != 0) {
  543. netdata_log_error("getrlimit(RLIMIT_NOFILE) failed");
  544. return;
  545. }
  546. if (rl->rlim_cur < 1024)
  547. netdata_log_error("Number of open file descriptors allowed for this process is too low (RLIMIT_NOFILE=%zu)", (size_t)rl->rlim_cur);
  548. }
  549. void cancel_main_threads() {
  550. nd_log_limits_unlimited();
  551. int i, found = 0;
  552. usec_t max = 5 * USEC_PER_SEC, step = 100000;
  553. for (i = 0; static_threads[i].name != NULL ; i++) {
  554. if (static_threads[i].enabled == NETDATA_MAIN_THREAD_RUNNING) {
  555. if (static_threads[i].thread) {
  556. netdata_log_info("EXIT: Stopping main thread: %s", static_threads[i].name);
  557. netdata_thread_cancel(*static_threads[i].thread);
  558. } else {
  559. netdata_log_info("EXIT: No thread running (marking as EXITED): %s", static_threads[i].name);
  560. static_threads[i].enabled = NETDATA_MAIN_THREAD_EXITED;
  561. }
  562. found++;
  563. }
  564. }
  565. netdata_exit = 1;
  566. while(found && max > 0) {
  567. max -= step;
  568. netdata_log_info("Waiting %d threads to finish...", found);
  569. sleep_usec(step);
  570. found = 0;
  571. for (i = 0; static_threads[i].name != NULL ; i++) {
  572. if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED)
  573. found++;
  574. }
  575. }
  576. if(found) {
  577. for (i = 0; static_threads[i].name != NULL ; i++) {
  578. if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED)
  579. netdata_log_error("Main thread %s takes too long to exit. Giving up...", static_threads[i].name);
  580. }
  581. }
  582. else
  583. netdata_log_info("All threads finished.");
  584. for (i = 0; static_threads[i].name != NULL ; i++)
  585. freez(static_threads[i].thread);
  586. freez(static_threads);
  587. }
  588. struct option_def option_definitions[] = {
  589. // opt description arg name default value
  590. { 'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME},
  591. { 'D', "Do not fork. Run in the foreground.", NULL, "run in the background"},
  592. { 'd', "Fork. Run in the background.", NULL, "run in the background"},
  593. { 'h', "Display this help message.", NULL, NULL},
  594. { 'P', "File to save a pid while running.", "filename", "do not save pid to a file"},
  595. { 'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"},
  596. { 'p', "API/Web port to use.", "port", "19999"},
  597. { 's', "Prefix for /proc and /sys (for containers).", "path", "no prefix"},
  598. { 't', "The internal clock of netdata.", "seconds", "1"},
  599. { 'u', "Run as user.", "username", "netdata"},
  600. { 'v', "Print netdata version and exit.", NULL, NULL},
  601. { 'V', "Print netdata version and exit.", NULL, NULL},
  602. { 'W', "See Advanced options below.", "options", NULL},
  603. };
  604. int help(int exitcode) {
  605. FILE *stream;
  606. if(exitcode == 0)
  607. stream = stdout;
  608. else
  609. stream = stderr;
  610. int num_opts = sizeof(option_definitions) / sizeof(struct option_def);
  611. int i;
  612. int max_len_arg = 0;
  613. // Compute maximum argument length
  614. for( i = 0; i < num_opts; i++ ) {
  615. if(option_definitions[i].arg_name) {
  616. int len_arg = (int)strlen(option_definitions[i].arg_name);
  617. if(len_arg > max_len_arg) max_len_arg = len_arg;
  618. }
  619. }
  620. if(max_len_arg > 30) max_len_arg = 30;
  621. if(max_len_arg < 20) max_len_arg = 20;
  622. fprintf(stream, "%s", "\n"
  623. " ^\n"
  624. " |.-. .-. .-. .-. . netdata \n"
  625. " | '-' '-' '-' '-' real-time performance monitoring, done right! \n"
  626. " +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n"
  627. "\n"
  628. " Copyright (C) 2016-2023, Netdata, Inc. <info@netdata.cloud>\n"
  629. " Released under GNU General Public License v3 or later.\n"
  630. " All rights reserved.\n"
  631. "\n"
  632. " Home Page : https://netdata.cloud\n"
  633. " Source Code: https://github.com/netdata/netdata\n"
  634. " Docs : https://learn.netdata.cloud\n"
  635. " Support : https://github.com/netdata/netdata/issues\n"
  636. " License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n"
  637. "\n"
  638. " Twitter : https://twitter.com/netdatahq\n"
  639. " LinkedIn : https://linkedin.com/company/netdata-cloud/\n"
  640. " Facebook : https://facebook.com/linuxnetdata/\n"
  641. "\n"
  642. "\n"
  643. );
  644. fprintf(stream, " SYNOPSIS: netdata [options]\n");
  645. fprintf(stream, "\n");
  646. fprintf(stream, " Options:\n\n");
  647. // Output options description.
  648. for( i = 0; i < num_opts; i++ ) {
  649. fprintf(stream, " -%c %-*s %s", option_definitions[i].val, max_len_arg, option_definitions[i].arg_name ? option_definitions[i].arg_name : "", option_definitions[i].description);
  650. if(option_definitions[i].default_value) {
  651. fprintf(stream, "\n %c %-*s Default: %s\n", ' ', max_len_arg, "", option_definitions[i].default_value);
  652. } else {
  653. fprintf(stream, "\n");
  654. }
  655. fprintf(stream, "\n");
  656. }
  657. fprintf(stream, "\n Advanced options:\n\n"
  658. " -W stacksize=N Set the stacksize (in bytes).\n\n"
  659. " -W debug_flags=N Set runtime tracing to debug.log.\n\n"
  660. " -W unittest Run internal unittests and exit.\n\n"
  661. " -W sqlite-meta-recover Run recovery on the metadata database and exit.\n\n"
  662. " -W sqlite-compact Reclaim metadata database unused space and exit.\n\n"
  663. #ifdef ENABLE_DBENGINE
  664. " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n"
  665. " -W stresstest=A,B,C,D,E,F,G\n"
  666. " Run a DB engine stress test for A seconds,\n"
  667. " with B writers and C readers, with a ramp up\n"
  668. " time of D seconds for writers, a page cache\n"
  669. " size of E MiB, an optional disk space limit\n"
  670. " of F MiB, G libuv workers (default 16) and exit.\n\n"
  671. #endif
  672. " -W set section option value\n"
  673. " set netdata.conf option from the command line.\n\n"
  674. " -W buildinfo Print the version, the configure options,\n"
  675. " a list of optional features, and whether they\n"
  676. " are enabled or not.\n\n"
  677. " -W buildinfojson Print the version, the configure options,\n"
  678. " a list of optional features, and whether they\n"
  679. " are enabled or not, in JSON format.\n\n"
  680. " -W simple-pattern pattern string\n"
  681. " Check if string matches pattern and exit.\n\n"
  682. " -W \"claim -token=TOKEN -rooms=ROOM1,ROOM2\"\n"
  683. " Claim the agent to the workspace rooms pointed to by TOKEN and ROOM*.\n\n"
  684. );
  685. fprintf(stream, "\n Signals netdata handles:\n\n"
  686. " - HUP Close and reopen log files.\n"
  687. " - USR1 Save internal DB to disk.\n"
  688. " - USR2 Reload health configuration.\n"
  689. "\n"
  690. );
  691. fflush(stream);
  692. return exitcode;
  693. }
  694. #ifdef ENABLE_HTTPS
  695. static void security_init(){
  696. char filename[FILENAME_MAX + 1];
  697. snprintfz(filename, FILENAME_MAX, "%s/ssl/key.pem",netdata_configured_user_config_dir);
  698. netdata_ssl_security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename);
  699. snprintfz(filename, FILENAME_MAX, "%s/ssl/cert.pem",netdata_configured_user_config_dir);
  700. netdata_ssl_security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename);
  701. tls_version = config_get(CONFIG_SECTION_WEB, "tls version", "1.3");
  702. tls_ciphers = config_get(CONFIG_SECTION_WEB, "tls ciphers", "none");
  703. netdata_ssl_initialize_openssl();
  704. }
  705. #endif
  706. static void log_init(void) {
  707. nd_log_set_facility(config_get(CONFIG_SECTION_LOGS, "facility", "daemon"));
  708. time_t period = ND_LOG_DEFAULT_THROTTLE_PERIOD;
  709. size_t logs = ND_LOG_DEFAULT_THROTTLE_LOGS;
  710. period = config_get_number(CONFIG_SECTION_LOGS, "logs flood protection period", period);
  711. logs = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "logs to trigger flood protection", (long long int)logs);
  712. nd_log_set_flood_protection(logs, period);
  713. nd_log_set_priority_level(config_get(CONFIG_SECTION_LOGS, "level", NDLP_INFO_STR));
  714. // char filename[FILENAME_MAX + 1];
  715. // snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir);
  716. nd_log_set_user_settings(NDLS_DEBUG, config_get(CONFIG_SECTION_LOGS, "debug", "stderr"));
  717. // bool with_journal = is_stderr_connected_to_journal() /* || nd_log_journal_socket_available() */;
  718. nd_log_set_user_settings(NDLS_DAEMON, config_get(CONFIG_SECTION_LOGS, "daemon", "stderr"));
  719. nd_log_set_user_settings(NDLS_COLLECTORS, config_get(CONFIG_SECTION_LOGS, "collector", "stderr"));
  720. nd_log_set_user_settings(NDLS_ACCESS, config_get(CONFIG_SECTION_LOGS, "access", "stderr"));
  721. nd_log_set_user_settings(NDLS_HEALTH, config_get(CONFIG_SECTION_LOGS, "health", "stderr"));
  722. #ifdef ENABLE_ACLK
  723. aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO);
  724. if (aclklog_enabled) {
  725. nd_log_set_user_settings(NDLS_ACLK, config_get(CONFIG_SECTION_CLOUD, "conversation log file", "stderr"));
  726. }
  727. #endif
  728. }
  729. char *initialize_lock_directory_path(char *prefix)
  730. {
  731. char filename[FILENAME_MAX + 1];
  732. snprintfz(filename, FILENAME_MAX, "%s/lock", prefix);
  733. return config_get(CONFIG_SECTION_DIRECTORIES, "lock", filename);
  734. }
  735. static void backwards_compatible_config() {
  736. // move [global] options to the [web] section
  737. config_move(CONFIG_SECTION_GLOBAL, "http port listen backlog",
  738. CONFIG_SECTION_WEB, "listen backlog");
  739. config_move(CONFIG_SECTION_GLOBAL, "bind socket to IP",
  740. CONFIG_SECTION_WEB, "bind to");
  741. config_move(CONFIG_SECTION_GLOBAL, "bind to",
  742. CONFIG_SECTION_WEB, "bind to");
  743. config_move(CONFIG_SECTION_GLOBAL, "port",
  744. CONFIG_SECTION_WEB, "default port");
  745. config_move(CONFIG_SECTION_GLOBAL, "default port",
  746. CONFIG_SECTION_WEB, "default port");
  747. config_move(CONFIG_SECTION_GLOBAL, "disconnect idle web clients after seconds",
  748. CONFIG_SECTION_WEB, "disconnect idle clients after seconds");
  749. config_move(CONFIG_SECTION_GLOBAL, "respect web browser do not track policy",
  750. CONFIG_SECTION_WEB, "respect do not track policy");
  751. config_move(CONFIG_SECTION_GLOBAL, "web x-frame-options header",
  752. CONFIG_SECTION_WEB, "x-frame-options response header");
  753. config_move(CONFIG_SECTION_GLOBAL, "enable web responses gzip compression",
  754. CONFIG_SECTION_WEB, "enable gzip compression");
  755. config_move(CONFIG_SECTION_GLOBAL, "web compression strategy",
  756. CONFIG_SECTION_WEB, "gzip compression strategy");
  757. config_move(CONFIG_SECTION_GLOBAL, "web compression level",
  758. CONFIG_SECTION_WEB, "gzip compression level");
  759. config_move(CONFIG_SECTION_GLOBAL, "config directory",
  760. CONFIG_SECTION_DIRECTORIES, "config");
  761. config_move(CONFIG_SECTION_GLOBAL, "stock config directory",
  762. CONFIG_SECTION_DIRECTORIES, "stock config");
  763. config_move(CONFIG_SECTION_GLOBAL, "log directory",
  764. CONFIG_SECTION_DIRECTORIES, "log");
  765. config_move(CONFIG_SECTION_GLOBAL, "web files directory",
  766. CONFIG_SECTION_DIRECTORIES, "web");
  767. config_move(CONFIG_SECTION_GLOBAL, "cache directory",
  768. CONFIG_SECTION_DIRECTORIES, "cache");
  769. config_move(CONFIG_SECTION_GLOBAL, "lib directory",
  770. CONFIG_SECTION_DIRECTORIES, "lib");
  771. config_move(CONFIG_SECTION_GLOBAL, "home directory",
  772. CONFIG_SECTION_DIRECTORIES, "home");
  773. config_move(CONFIG_SECTION_GLOBAL, "lock directory",
  774. CONFIG_SECTION_DIRECTORIES, "lock");
  775. config_move(CONFIG_SECTION_GLOBAL, "plugins directory",
  776. CONFIG_SECTION_DIRECTORIES, "plugins");
  777. config_move(CONFIG_SECTION_HEALTH, "health configuration directory",
  778. CONFIG_SECTION_DIRECTORIES, "health config");
  779. config_move(CONFIG_SECTION_HEALTH, "stock health configuration directory",
  780. CONFIG_SECTION_DIRECTORIES, "stock health config");
  781. config_move(CONFIG_SECTION_REGISTRY, "registry db directory",
  782. CONFIG_SECTION_DIRECTORIES, "registry");
  783. config_move(CONFIG_SECTION_GLOBAL, "debug log",
  784. CONFIG_SECTION_LOGS, "debug");
  785. config_move(CONFIG_SECTION_GLOBAL, "error log",
  786. CONFIG_SECTION_LOGS, "error");
  787. config_move(CONFIG_SECTION_GLOBAL, "access log",
  788. CONFIG_SECTION_LOGS, "access");
  789. config_move(CONFIG_SECTION_GLOBAL, "facility log",
  790. CONFIG_SECTION_LOGS, "facility");
  791. config_move(CONFIG_SECTION_GLOBAL, "errors flood protection period",
  792. CONFIG_SECTION_LOGS, "errors flood protection period");
  793. config_move(CONFIG_SECTION_GLOBAL, "errors to trigger flood protection",
  794. CONFIG_SECTION_LOGS, "errors to trigger flood protection");
  795. config_move(CONFIG_SECTION_GLOBAL, "debug flags",
  796. CONFIG_SECTION_LOGS, "debug flags");
  797. config_move(CONFIG_SECTION_GLOBAL, "TZ environment variable",
  798. CONFIG_SECTION_ENV_VARS, "TZ");
  799. config_move(CONFIG_SECTION_PLUGINS, "PATH environment variable",
  800. CONFIG_SECTION_ENV_VARS, "PATH");
  801. config_move(CONFIG_SECTION_PLUGINS, "PYTHONPATH environment variable",
  802. CONFIG_SECTION_ENV_VARS, "PYTHONPATH");
  803. config_move(CONFIG_SECTION_STATSD, "enabled",
  804. CONFIG_SECTION_PLUGINS, "statsd");
  805. config_move(CONFIG_SECTION_GLOBAL, "memory mode",
  806. CONFIG_SECTION_DB, "mode");
  807. config_move(CONFIG_SECTION_GLOBAL, "history",
  808. CONFIG_SECTION_DB, "retention");
  809. config_move(CONFIG_SECTION_GLOBAL, "update every",
  810. CONFIG_SECTION_DB, "update every");
  811. config_move(CONFIG_SECTION_GLOBAL, "page cache size",
  812. CONFIG_SECTION_DB, "dbengine page cache size MB");
  813. config_move(CONFIG_SECTION_DB, "page cache size",
  814. CONFIG_SECTION_DB, "dbengine page cache size MB");
  815. config_move(CONFIG_SECTION_GLOBAL, "page cache uses malloc",
  816. CONFIG_SECTION_DB, "dbengine page cache with malloc");
  817. config_move(CONFIG_SECTION_DB, "page cache with malloc",
  818. CONFIG_SECTION_DB, "dbengine page cache with malloc");
  819. config_move(CONFIG_SECTION_GLOBAL, "dbengine disk space",
  820. CONFIG_SECTION_DB, "dbengine disk space MB");
  821. config_move(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space",
  822. CONFIG_SECTION_DB, "dbengine multihost disk space MB");
  823. config_move(CONFIG_SECTION_GLOBAL, "memory deduplication (ksm)",
  824. CONFIG_SECTION_DB, "memory deduplication (ksm)");
  825. config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch timeout",
  826. CONFIG_SECTION_DB, "dbengine page fetch timeout secs");
  827. config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch retries",
  828. CONFIG_SECTION_DB, "dbengine page fetch retries");
  829. config_move(CONFIG_SECTION_GLOBAL, "dbengine extent pages",
  830. CONFIG_SECTION_DB, "dbengine pages per extent");
  831. config_move(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds",
  832. CONFIG_SECTION_DB, "cleanup obsolete charts after secs");
  833. config_move(CONFIG_SECTION_GLOBAL, "gap when lost iterations above",
  834. CONFIG_SECTION_DB, "gap when lost iterations above");
  835. config_move(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds",
  836. CONFIG_SECTION_DB, "cleanup orphan hosts after secs");
  837. config_move(CONFIG_SECTION_GLOBAL, "delete obsolete charts files",
  838. CONFIG_SECTION_DB, "delete obsolete charts files");
  839. config_move(CONFIG_SECTION_GLOBAL, "delete orphan hosts files",
  840. CONFIG_SECTION_DB, "delete orphan hosts files");
  841. config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics",
  842. CONFIG_SECTION_DB, "enable zero metrics");
  843. config_move(CONFIG_SECTION_LOGS, "error",
  844. CONFIG_SECTION_LOGS, "daemon");
  845. config_move(CONFIG_SECTION_LOGS, "severity level",
  846. CONFIG_SECTION_LOGS, "level");
  847. config_move(CONFIG_SECTION_LOGS, "errors to trigger flood protection",
  848. CONFIG_SECTION_LOGS, "logs to trigger flood protection");
  849. config_move(CONFIG_SECTION_LOGS, "errors flood protection period",
  850. CONFIG_SECTION_LOGS, "logs flood protection period");
  851. config_move(CONFIG_SECTION_HEALTH, "is ephemeral",
  852. CONFIG_SECTION_GLOBAL, "is ephemeral node");
  853. config_move(CONFIG_SECTION_HEALTH, "has unstable connection",
  854. CONFIG_SECTION_GLOBAL, "has unstable connection");
  855. }
  856. static int get_hostname(char *buf, size_t buf_size) {
  857. if (netdata_configured_host_prefix && *netdata_configured_host_prefix) {
  858. char filename[FILENAME_MAX + 1];
  859. snprintfz(filename, FILENAME_MAX, "%s/etc/hostname", netdata_configured_host_prefix);
  860. if (!read_file(filename, buf, buf_size)) {
  861. trim(buf);
  862. return 0;
  863. }
  864. }
  865. return gethostname(buf, buf_size);
  866. }
  867. static void get_netdata_configured_variables() {
  868. backwards_compatible_config();
  869. // ------------------------------------------------------------------------
  870. // get the hostname
  871. netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", "");
  872. verify_netdata_host_prefix();
  873. char buf[HOSTNAME_MAX + 1];
  874. if (get_hostname(buf, HOSTNAME_MAX))
  875. netdata_log_error("Cannot get machine hostname.");
  876. netdata_configured_hostname = config_get(CONFIG_SECTION_GLOBAL, "hostname", buf);
  877. netdata_log_debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname);
  878. // ------------------------------------------------------------------------
  879. // get default database update frequency
  880. default_rrd_update_every = (int) config_get_number(CONFIG_SECTION_DB, "update every", UPDATE_EVERY);
  881. if(default_rrd_update_every < 1 || default_rrd_update_every > 600) {
  882. netdata_log_error("Invalid data collection frequency (update every) %d given. Defaulting to %d.", default_rrd_update_every, UPDATE_EVERY);
  883. default_rrd_update_every = UPDATE_EVERY;
  884. config_set_number(CONFIG_SECTION_DB, "update every", default_rrd_update_every);
  885. }
  886. // ------------------------------------------------------------------------
  887. // get default memory mode for the database
  888. {
  889. const char *mode = config_get(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode));
  890. default_rrd_memory_mode = rrd_memory_mode_id(mode);
  891. if(strcmp(mode, rrd_memory_mode_name(default_rrd_memory_mode)) != 0) {
  892. netdata_log_error("Invalid memory mode '%s' given. Using '%s'", mode, rrd_memory_mode_name(default_rrd_memory_mode));
  893. config_set(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode));
  894. }
  895. }
  896. // ------------------------------------------------------------------------
  897. // get default database size
  898. if(default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && default_rrd_memory_mode != RRD_MEMORY_MODE_NONE) {
  899. default_rrd_history_entries = (int)config_get_number(
  900. CONFIG_SECTION_DB, "retention",
  901. align_entries_to_pagesize(default_rrd_memory_mode, RRD_DEFAULT_HISTORY_ENTRIES));
  902. long h = align_entries_to_pagesize(default_rrd_memory_mode, default_rrd_history_entries);
  903. if (h != default_rrd_history_entries) {
  904. config_set_number(CONFIG_SECTION_DB, "retention", h);
  905. default_rrd_history_entries = (int)h;
  906. }
  907. }
  908. // ------------------------------------------------------------------------
  909. // get system paths
  910. netdata_configured_user_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "config", netdata_configured_user_config_dir);
  911. netdata_configured_stock_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "stock config", netdata_configured_stock_config_dir);
  912. netdata_configured_log_dir = config_get(CONFIG_SECTION_DIRECTORIES, "log", netdata_configured_log_dir);
  913. netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir);
  914. netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir);
  915. netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir);
  916. netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir);
  917. {
  918. pluginsd_initialize_plugin_directories();
  919. netdata_configured_primary_plugins_dir = plugin_directories[PLUGINSD_STOCK_PLUGINS_DIRECTORY_PATH];
  920. }
  921. #ifdef ENABLE_DBENGINE
  922. // ------------------------------------------------------------------------
  923. // get default Database Engine page type
  924. const char *page_type = config_get(CONFIG_SECTION_DB, "dbengine page type", "raw");
  925. if (strcmp(page_type, "gorilla") == 0) {
  926. tier_page_type[0] = PAGE_GORILLA_METRICS;
  927. } else if (strcmp(page_type, "raw") != 0) {
  928. netdata_log_error("Invalid dbengine page type ''%s' given. Defaulting to 'raw'.", page_type);
  929. }
  930. // ------------------------------------------------------------------------
  931. // get default Database Engine page cache size in MiB
  932. default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
  933. default_rrdeng_extent_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine extent cache size MB", default_rrdeng_extent_cache_mb);
  934. db_engine_journal_check = config_get_boolean(CONFIG_SECTION_DB, "dbengine enable journal integrity check", CONFIG_BOOLEAN_NO);
  935. if(default_rrdeng_extent_cache_mb < 0)
  936. default_rrdeng_extent_cache_mb = 0;
  937. if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) {
  938. netdata_log_error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB);
  939. default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
  940. config_set_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
  941. }
  942. // ------------------------------------------------------------------------
  943. // get default Database Engine disk space quota in MiB
  944. default_rrdeng_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
  945. if(default_rrdeng_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
  946. netdata_log_error("Invalid dbengine disk space %d given. Defaulting to %d.", default_rrdeng_disk_quota_mb, RRDENG_MIN_DISK_SPACE_MB);
  947. default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB;
  948. config_set_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
  949. }
  950. default_multidb_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", compute_multidb_diskspace());
  951. if(default_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
  952. netdata_log_error("Invalid multidb disk space %d given. Defaulting to %d.", default_multidb_disk_quota_mb, default_rrdeng_disk_quota_mb);
  953. default_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb;
  954. config_set_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", default_multidb_disk_quota_mb);
  955. }
  956. #else
  957. if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  958. error_report("RRD_MEMORY_MODE_DBENGINE is not supported in this platform. The agent will use db mode 'save' instead.");
  959. default_rrd_memory_mode = RRD_MEMORY_MODE_RAM;
  960. }
  961. #endif
  962. // --------------------------------------------------------------------
  963. // get KSM settings
  964. #ifdef MADV_MERGEABLE
  965. enable_ksm = config_get_boolean(CONFIG_SECTION_DB, "memory deduplication (ksm)", enable_ksm);
  966. #endif
  967. // --------------------------------------------------------------------
  968. // metric correlations
  969. enable_metric_correlations = config_get_boolean(CONFIG_SECTION_GLOBAL, "enable metric correlations", enable_metric_correlations);
  970. default_metric_correlations_method = weights_string_to_method(config_get(
  971. CONFIG_SECTION_GLOBAL, "metric correlations method",
  972. weights_method_to_string(default_metric_correlations_method)));
  973. // --------------------------------------------------------------------
  974. rrdset_free_obsolete_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
  975. rrdhost_free_ephemeral_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup ephemeral hosts after secs", rrdhost_free_ephemeral_time_s);
  976. // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short
  977. // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at
  978. // https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information.
  979. if (rrdset_free_obsolete_time_s < 10) {
  980. rrdset_free_obsolete_time_s = 10;
  981. netdata_log_info("The \"cleanup obsolete charts after seconds\" option was set to 10 seconds.");
  982. config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
  983. }
  984. gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above);
  985. if (gap_when_lost_iterations_above < 1) {
  986. gap_when_lost_iterations_above = 1;
  987. config_set_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above);
  988. }
  989. gap_when_lost_iterations_above += 2;
  990. // --------------------------------------------------------------------
  991. // get various system parameters
  992. get_system_HZ();
  993. get_system_cpus_uncached();
  994. get_system_pid_max();
  995. }
  996. static void post_conf_load(char **user)
  997. {
  998. // --------------------------------------------------------------------
  999. // get the user we should run
  1000. // IMPORTANT: this is required before web_files_uid()
  1001. if(getuid() == 0) {
  1002. *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", NETDATA_USER);
  1003. }
  1004. else {
  1005. struct passwd *passwd = getpwuid(getuid());
  1006. *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", (passwd && passwd->pw_name)?passwd->pw_name:"");
  1007. }
  1008. }
  1009. static bool load_netdata_conf(char *filename, char overwrite_used, char **user) {
  1010. errno = 0;
  1011. int ret = 0;
  1012. if(filename && *filename) {
  1013. ret = config_load(filename, overwrite_used, NULL);
  1014. if(!ret)
  1015. netdata_log_error("CONFIG: cannot load config file '%s'.", filename);
  1016. }
  1017. else {
  1018. filename = strdupz_path_subpath(netdata_configured_user_config_dir, "netdata.conf");
  1019. ret = config_load(filename, overwrite_used, NULL);
  1020. if(!ret) {
  1021. netdata_log_info("CONFIG: cannot load user config '%s'. Will try the stock version.", filename);
  1022. freez(filename);
  1023. filename = strdupz_path_subpath(netdata_configured_stock_config_dir, "netdata.conf");
  1024. ret = config_load(filename, overwrite_used, NULL);
  1025. if(!ret)
  1026. netdata_log_info("CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename);
  1027. }
  1028. freez(filename);
  1029. }
  1030. post_conf_load(user);
  1031. return ret;
  1032. }
  1033. // coverity[ +tainted_string_sanitize_content : arg-0 ]
  1034. static inline void coverity_remove_taint(char *s)
  1035. {
  1036. (void)s;
  1037. }
  1038. int get_system_info(struct rrdhost_system_info *system_info) {
  1039. char *script;
  1040. script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("system-info.sh") + 2));
  1041. sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "system-info.sh");
  1042. if (unlikely(access(script, R_OK) != 0)) {
  1043. netdata_log_error("System info script %s not found.",script);
  1044. freez(script);
  1045. return 1;
  1046. }
  1047. pid_t command_pid;
  1048. FILE *fp_child_input;
  1049. FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input);
  1050. if(fp_child_output) {
  1051. char line[200 + 1];
  1052. // Removed the double strlens, if the Coverity tainted string warning reappears I'll revert.
  1053. // One time init code, but I'm curious about the warning...
  1054. while (fgets(line, 200, fp_child_output) != NULL) {
  1055. char *value=line;
  1056. while (*value && *value != '=') value++;
  1057. if (*value=='=') {
  1058. *value='\0';
  1059. value++;
  1060. char *end = value;
  1061. while (*end && *end != '\n') end++;
  1062. *end = '\0'; // Overwrite newline if present
  1063. coverity_remove_taint(line); // I/O is controlled result of system_info.sh - not tainted
  1064. coverity_remove_taint(value);
  1065. if(unlikely(rrdhost_set_system_info_variable(system_info, line, value))) {
  1066. netdata_log_error("Unexpected environment variable %s=%s", line, value);
  1067. } else {
  1068. setenv(line, value, 1);
  1069. }
  1070. }
  1071. }
  1072. netdata_pclose(fp_child_input, fp_child_output, command_pid);
  1073. }
  1074. freez(script);
  1075. return 0;
  1076. }
  1077. void set_silencers_filename() {
  1078. char filename[FILENAME_MAX + 1];
  1079. snprintfz(filename, FILENAME_MAX, "%s/health.silencers.json", netdata_configured_varlib_dir);
  1080. silencers_filename = config_get(CONFIG_SECTION_HEALTH, "silencers file", filename);
  1081. }
  1082. /* Any config setting that can be accessed without a default value i.e. configget(...,...,NULL) *MUST*
  1083. be set in this procedure to be called in all the relevant code paths.
  1084. */
  1085. #define delta_startup_time(msg) \
  1086. { \
  1087. usec_t now_ut = now_monotonic_usec(); \
  1088. if(prev_msg) \
  1089. netdata_log_info("NETDATA STARTUP: in %7llu ms, %s - next: %s", (now_ut - last_ut) / USEC_PER_MS, prev_msg, msg); \
  1090. else \
  1091. netdata_log_info("NETDATA STARTUP: next: %s", msg); \
  1092. last_ut = now_ut; \
  1093. prev_msg = msg; \
  1094. }
  1095. int buffer_unittest(void);
  1096. int pgc_unittest(void);
  1097. int mrg_unittest(void);
  1098. int julytest(void);
  1099. int pluginsd_parser_unittest(void);
  1100. void replication_initialize(void);
  1101. void bearer_tokens_init(void);
  1102. int unittest_rrdpush_compressions(void);
  1103. int uuid_unittest(void);
  1104. int progress_unittest(void);
  1105. int dyncfg_unittest(void);
  1106. int unittest_prepare_rrd(char **user) {
  1107. post_conf_load(user);
  1108. get_netdata_configured_variables();
  1109. default_rrd_update_every = 1;
  1110. default_rrd_memory_mode = RRD_MEMORY_MODE_RAM;
  1111. default_health_enabled = 0;
  1112. storage_tiers = 1;
  1113. registry_init();
  1114. if(rrd_init("unittest", NULL, true)) {
  1115. fprintf(stderr, "rrd_init failed for unittest\n");
  1116. return 1;
  1117. }
  1118. default_rrdpush_enabled = 0;
  1119. return 0;
  1120. }
  1121. int main(int argc, char **argv) {
  1122. // initialize the system clocks
  1123. clocks_init();
  1124. netdata_start_time = now_realtime_sec();
  1125. usec_t started_ut = now_monotonic_usec();
  1126. usec_t last_ut = started_ut;
  1127. const char *prev_msg = NULL;
  1128. int i;
  1129. int config_loaded = 0;
  1130. int dont_fork = 0;
  1131. bool close_open_fds = true;
  1132. size_t default_stacksize;
  1133. char *user = NULL;
  1134. static_threads = static_threads_get();
  1135. netdata_ready = false;
  1136. // set the name for logging
  1137. program_name = "netdata";
  1138. if (argc > 1 && strcmp(argv[1], SPAWN_SERVER_COMMAND_LINE_ARGUMENT) == 0) {
  1139. // don't run netdata, this is the spawn server
  1140. spawn_server();
  1141. exit(0);
  1142. }
  1143. // parse options
  1144. {
  1145. int num_opts = sizeof(option_definitions) / sizeof(struct option_def);
  1146. char optstring[(num_opts * 2) + 1];
  1147. int string_i = 0;
  1148. for( i = 0; i < num_opts; i++ ) {
  1149. optstring[string_i] = option_definitions[i].val;
  1150. string_i++;
  1151. if(option_definitions[i].arg_name) {
  1152. optstring[string_i] = ':';
  1153. string_i++;
  1154. }
  1155. }
  1156. // terminate optstring
  1157. optstring[string_i] ='\0';
  1158. optstring[(num_opts *2)] ='\0';
  1159. int opt;
  1160. while( (opt = getopt(argc, argv, optstring)) != -1 ) {
  1161. switch(opt) {
  1162. case 'c':
  1163. if(!load_netdata_conf(optarg, 1, &user)) {
  1164. netdata_log_error("Cannot load configuration file %s.", optarg);
  1165. return 1;
  1166. }
  1167. else {
  1168. netdata_log_debug(D_OPTIONS, "Configuration loaded from %s.", optarg);
  1169. load_cloud_conf(1);
  1170. config_loaded = 1;
  1171. }
  1172. break;
  1173. case 'D':
  1174. dont_fork = 1;
  1175. break;
  1176. case 'd':
  1177. dont_fork = 0;
  1178. break;
  1179. case 'h':
  1180. return help(0);
  1181. case 'i':
  1182. config_set(CONFIG_SECTION_WEB, "bind to", optarg);
  1183. break;
  1184. case 'P':
  1185. strncpy(pidfile, optarg, FILENAME_MAX);
  1186. pidfile[FILENAME_MAX] = '\0';
  1187. break;
  1188. case 'p':
  1189. config_set(CONFIG_SECTION_GLOBAL, "default port", optarg);
  1190. break;
  1191. case 's':
  1192. config_set(CONFIG_SECTION_GLOBAL, "host access prefix", optarg);
  1193. break;
  1194. case 't':
  1195. config_set(CONFIG_SECTION_GLOBAL, "update every", optarg);
  1196. break;
  1197. case 'u':
  1198. config_set(CONFIG_SECTION_GLOBAL, "run as user", optarg);
  1199. break;
  1200. case 'v':
  1201. case 'V':
  1202. printf("%s %s\n", program_name, program_version);
  1203. return 0;
  1204. case 'W':
  1205. {
  1206. char* stacksize_string = "stacksize=";
  1207. char* debug_flags_string = "debug_flags=";
  1208. char* claim_string = "claim";
  1209. #ifdef ENABLE_DBENGINE
  1210. char* createdataset_string = "createdataset=";
  1211. char* stresstest_string = "stresstest=";
  1212. if(strcmp(optarg, "pgd-tests") == 0) {
  1213. return pgd_test(argc, argv);
  1214. }
  1215. #endif
  1216. if(strcmp(optarg, "sqlite-meta-recover") == 0) {
  1217. sql_init_database(DB_CHECK_RECOVER, 0);
  1218. return 0;
  1219. }
  1220. if(strcmp(optarg, "sqlite-compact") == 0) {
  1221. sql_init_database(DB_CHECK_RECLAIM_SPACE, 0);
  1222. return 0;
  1223. }
  1224. if(strcmp(optarg, "unittest") == 0) {
  1225. unittest_running = true;
  1226. if (pluginsd_parser_unittest()) return 1;
  1227. if (unit_test_static_threads()) return 1;
  1228. if (unit_test_buffer()) return 1;
  1229. if (unit_test_str2ld()) return 1;
  1230. if (buffer_unittest()) return 1;
  1231. if (unit_test_bitmaps()) return 1;
  1232. // No call to load the config file on this code-path
  1233. if (unittest_prepare_rrd(&user)) return 1;
  1234. if (run_all_mockup_tests()) return 1;
  1235. if (unit_test_storage()) return 1;
  1236. #ifdef ENABLE_DBENGINE
  1237. if (test_dbengine()) return 1;
  1238. #endif
  1239. if (test_sqlite()) return 1;
  1240. if (string_unittest(10000)) return 1;
  1241. if (dictionary_unittest(10000)) return 1;
  1242. if (aral_unittest(10000)) return 1;
  1243. if (rrdlabels_unittest()) return 1;
  1244. if (ctx_unittest()) return 1;
  1245. if (uuid_unittest()) return 1;
  1246. if (dyncfg_unittest()) return 1;
  1247. fprintf(stderr, "\n\nALL TESTS PASSED\n\n");
  1248. return 0;
  1249. }
  1250. else if(strcmp(optarg, "escapetest") == 0) {
  1251. return command_argument_sanitization_tests();
  1252. }
  1253. else if(strcmp(optarg, "dicttest") == 0) {
  1254. unittest_running = true;
  1255. return dictionary_unittest(10000);
  1256. }
  1257. else if(strcmp(optarg, "araltest") == 0) {
  1258. unittest_running = true;
  1259. return aral_unittest(10000);
  1260. }
  1261. else if(strcmp(optarg, "stringtest") == 0) {
  1262. unittest_running = true;
  1263. return string_unittest(10000);
  1264. }
  1265. else if(strcmp(optarg, "rrdlabelstest") == 0) {
  1266. unittest_running = true;
  1267. return rrdlabels_unittest();
  1268. }
  1269. else if(strcmp(optarg, "buffertest") == 0) {
  1270. unittest_running = true;
  1271. return buffer_unittest();
  1272. }
  1273. else if(strcmp(optarg, "uuidtest") == 0) {
  1274. unittest_running = true;
  1275. return uuid_unittest();
  1276. }
  1277. #ifdef ENABLE_DBENGINE
  1278. else if(strcmp(optarg, "mctest") == 0) {
  1279. unittest_running = true;
  1280. return mc_unittest();
  1281. }
  1282. else if(strcmp(optarg, "ctxtest") == 0) {
  1283. unittest_running = true;
  1284. return ctx_unittest();
  1285. }
  1286. else if(strcmp(optarg, "metatest") == 0) {
  1287. unittest_running = true;
  1288. return metadata_unittest();
  1289. }
  1290. else if(strcmp(optarg, "pgctest") == 0) {
  1291. unittest_running = true;
  1292. return pgc_unittest();
  1293. }
  1294. else if(strcmp(optarg, "mrgtest") == 0) {
  1295. unittest_running = true;
  1296. return mrg_unittest();
  1297. }
  1298. else if(strcmp(optarg, "julytest") == 0) {
  1299. unittest_running = true;
  1300. return julytest();
  1301. }
  1302. else if(strcmp(optarg, "parsertest") == 0) {
  1303. unittest_running = true;
  1304. return pluginsd_parser_unittest();
  1305. }
  1306. else if(strcmp(optarg, "rrdpush_compressions_test") == 0) {
  1307. unittest_running = true;
  1308. return unittest_rrdpush_compressions();
  1309. }
  1310. else if(strcmp(optarg, "progresstest") == 0) {
  1311. unittest_running = true;
  1312. return progress_unittest();
  1313. }
  1314. else if(strcmp(optarg, "dyncfgtest") == 0) {
  1315. unittest_running = true;
  1316. if(unittest_prepare_rrd(&user))
  1317. return 1;
  1318. return dyncfg_unittest();
  1319. }
  1320. else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) {
  1321. optarg += strlen(createdataset_string);
  1322. unsigned history_seconds = strtoul(optarg, NULL, 0);
  1323. post_conf_load(&user);
  1324. get_netdata_configured_variables();
  1325. default_rrd_update_every = 1;
  1326. registry_init();
  1327. if(rrd_init("dbengine-dataset", NULL, true)) {
  1328. fprintf(stderr, "rrd_init failed for unittest\n");
  1329. return 1;
  1330. }
  1331. generate_dbengine_dataset(history_seconds);
  1332. return 0;
  1333. }
  1334. else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) {
  1335. char *endptr;
  1336. unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0,
  1337. page_cache_mb = 0, disk_space_mb = 0, workers = 16;
  1338. optarg += strlen(stresstest_string);
  1339. test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0);
  1340. if (',' == *endptr)
  1341. dset_charts = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1342. if (',' == *endptr)
  1343. query_threads = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1344. if (',' == *endptr)
  1345. ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1346. if (',' == *endptr)
  1347. page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1348. if (',' == *endptr)
  1349. disk_space_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1350. if (',' == *endptr)
  1351. workers = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1352. if (workers > 1024)
  1353. workers = 1024;
  1354. char workers_str[16];
  1355. snprintf(workers_str, 15, "%u", workers);
  1356. setenv("UV_THREADPOOL_SIZE", workers_str, 1);
  1357. dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds,
  1358. page_cache_mb, disk_space_mb);
  1359. return 0;
  1360. }
  1361. #endif
  1362. else if(strcmp(optarg, "simple-pattern") == 0) {
  1363. if(optind + 2 > argc) {
  1364. fprintf(stderr, "%s", "\nUSAGE: -W simple-pattern 'pattern' 'string'\n\n"
  1365. " Checks if 'pattern' matches the given 'string'.\n"
  1366. " - 'pattern' can be one or more space separated words.\n"
  1367. " - each 'word' can contain one or more asterisks.\n"
  1368. " - words starting with '!' give negative matches.\n"
  1369. " - words are processed left to right\n"
  1370. "\n"
  1371. "Examples:\n"
  1372. "\n"
  1373. " > match all veth interfaces, except veth0:\n"
  1374. "\n"
  1375. " -W simple-pattern '!veth0 veth*' 'veth12'\n"
  1376. "\n"
  1377. "\n"
  1378. " > match all *.ext files directly in /path/:\n"
  1379. " (this will not match *.ext files in a subdir of /path/)\n"
  1380. "\n"
  1381. " -W simple-pattern '!/path/*/*.ext /path/*.ext' '/path/test.ext'\n"
  1382. "\n"
  1383. );
  1384. return 1;
  1385. }
  1386. const char *haystack = argv[optind];
  1387. const char *needle = argv[optind + 1];
  1388. size_t len = strlen(needle) + 1;
  1389. char wildcarded[len];
  1390. SIMPLE_PATTERN *p = simple_pattern_create(haystack, NULL, SIMPLE_PATTERN_EXACT, true);
  1391. SIMPLE_PATTERN_RESULT ret = simple_pattern_matches_extract(p, needle, wildcarded, len);
  1392. simple_pattern_free(p);
  1393. if(ret == SP_MATCHED_POSITIVE) {
  1394. fprintf(stdout, "RESULT: POSITIVE MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded);
  1395. return 0;
  1396. }
  1397. else if(ret == SP_MATCHED_NEGATIVE) {
  1398. fprintf(stdout, "RESULT: NEGATIVE MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded);
  1399. return 0;
  1400. }
  1401. else {
  1402. fprintf(stdout, "RESULT: NOT MATCHED - pattern '%s' does not match '%s', wildcarded '%s'\n", haystack, needle, wildcarded);
  1403. return 1;
  1404. }
  1405. }
  1406. else if(strncmp(optarg, stacksize_string, strlen(stacksize_string)) == 0) {
  1407. optarg += strlen(stacksize_string);
  1408. config_set(CONFIG_SECTION_GLOBAL, "pthread stack size", optarg);
  1409. }
  1410. else if(strncmp(optarg, debug_flags_string, strlen(debug_flags_string)) == 0) {
  1411. optarg += strlen(debug_flags_string);
  1412. config_set(CONFIG_SECTION_LOGS, "debug flags", optarg);
  1413. debug_flags = strtoull(optarg, NULL, 0);
  1414. }
  1415. else if(strcmp(optarg, "set") == 0) {
  1416. if(optind + 3 > argc) {
  1417. fprintf(stderr, "%s", "\nUSAGE: -W set 'section' 'key' 'value'\n\n"
  1418. " Overwrites settings of netdata.conf.\n"
  1419. "\n"
  1420. " These options interact with: -c netdata.conf\n"
  1421. " If -c netdata.conf is given on the command line,\n"
  1422. " before -W set... the user may overwrite command\n"
  1423. " line parameters at netdata.conf\n"
  1424. " If -c netdata.conf is given after (or missing)\n"
  1425. " -W set... the user cannot overwrite the command line\n"
  1426. " parameters."
  1427. "\n"
  1428. );
  1429. return 1;
  1430. }
  1431. const char *section = argv[optind];
  1432. const char *key = argv[optind + 1];
  1433. const char *value = argv[optind + 2];
  1434. optind += 3;
  1435. // set this one as the default
  1436. // only if it is not already set in the config file
  1437. // so the caller can use -c netdata.conf before or
  1438. // after this parameter to prevent or allow overwriting
  1439. // variables at netdata.conf
  1440. config_set_default(section, key, value);
  1441. // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value);
  1442. }
  1443. else if(strcmp(optarg, "set2") == 0) {
  1444. if(optind + 4 > argc) {
  1445. fprintf(stderr, "%s", "\nUSAGE: -W set 'conf_file' 'section' 'key' 'value'\n\n"
  1446. " Overwrites settings of netdata.conf or cloud.conf\n"
  1447. "\n"
  1448. " These options interact with: -c netdata.conf\n"
  1449. " If -c netdata.conf is given on the command line,\n"
  1450. " before -W set... the user may overwrite command\n"
  1451. " line parameters at netdata.conf\n"
  1452. " If -c netdata.conf is given after (or missing)\n"
  1453. " -W set... the user cannot overwrite the command line\n"
  1454. " parameters."
  1455. " conf_file can be \"cloud\" or \"netdata\".\n"
  1456. "\n"
  1457. );
  1458. return 1;
  1459. }
  1460. const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */
  1461. struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config;
  1462. const char *section = argv[optind + 1];
  1463. const char *key = argv[optind + 2];
  1464. const char *value = argv[optind + 3];
  1465. optind += 4;
  1466. // set this one as the default
  1467. // only if it is not already set in the config file
  1468. // so the caller can use -c netdata.conf before or
  1469. // after this parameter to prevent or allow overwriting
  1470. // variables at netdata.conf
  1471. appconfig_set_default(tmp_config, section, key, value);
  1472. // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value);
  1473. }
  1474. else if(strcmp(optarg, "get") == 0) {
  1475. if(optind + 3 > argc) {
  1476. fprintf(stderr, "%s", "\nUSAGE: -W get 'section' 'key' 'value'\n\n"
  1477. " Prints settings of netdata.conf.\n"
  1478. "\n"
  1479. " These options interact with: -c netdata.conf\n"
  1480. " -c netdata.conf has to be given before -W get.\n"
  1481. "\n"
  1482. );
  1483. return 1;
  1484. }
  1485. if(!config_loaded) {
  1486. fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n");
  1487. load_netdata_conf(NULL, 0, &user);
  1488. }
  1489. get_netdata_configured_variables();
  1490. const char *section = argv[optind];
  1491. const char *key = argv[optind + 1];
  1492. const char *def = argv[optind + 2];
  1493. const char *value = config_get(section, key, def);
  1494. printf("%s\n", value);
  1495. return 0;
  1496. }
  1497. else if(strcmp(optarg, "get2") == 0) {
  1498. if(optind + 4 > argc) {
  1499. fprintf(stderr, "%s", "\nUSAGE: -W get2 'conf_file' 'section' 'key' 'value'\n\n"
  1500. " Prints settings of netdata.conf or cloud.conf\n"
  1501. "\n"
  1502. " These options interact with: -c netdata.conf\n"
  1503. " -c netdata.conf has to be given before -W get2.\n"
  1504. " conf_file can be \"cloud\" or \"netdata\".\n"
  1505. "\n"
  1506. );
  1507. return 1;
  1508. }
  1509. if(!config_loaded) {
  1510. fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n");
  1511. load_netdata_conf(NULL, 0, &user);
  1512. load_cloud_conf(1);
  1513. }
  1514. get_netdata_configured_variables();
  1515. const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */
  1516. struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config;
  1517. const char *section = argv[optind + 1];
  1518. const char *key = argv[optind + 2];
  1519. const char *def = argv[optind + 3];
  1520. const char *value = appconfig_get(tmp_config, section, key, def);
  1521. printf("%s\n", value);
  1522. return 0;
  1523. }
  1524. else if(strncmp(optarg, claim_string, strlen(claim_string)) == 0) {
  1525. /* will trigger a claiming attempt when the agent is initialized */
  1526. claiming_pending_arguments = optarg + strlen(claim_string);
  1527. }
  1528. else if(strcmp(optarg, "buildinfo") == 0) {
  1529. print_build_info();
  1530. return 0;
  1531. }
  1532. else if(strcmp(optarg, "buildinfojson") == 0) {
  1533. print_build_info_json();
  1534. return 0;
  1535. }
  1536. else if(strcmp(optarg, "keepopenfds") == 0) {
  1537. // Internal dev option to skip closing inherited
  1538. // open FDs. Useful, when we want to run the agent
  1539. // under profiling tools that open/maintain their
  1540. // own FDs.
  1541. close_open_fds = false;
  1542. } else {
  1543. fprintf(stderr, "Unknown -W parameter '%s'\n", optarg);
  1544. return help(1);
  1545. }
  1546. }
  1547. break;
  1548. default: /* ? */
  1549. fprintf(stderr, "Unknown parameter '%c'\n", opt);
  1550. return help(1);
  1551. }
  1552. }
  1553. }
  1554. if (close_open_fds == true) {
  1555. // close all open file descriptors, except the standard ones
  1556. // the caller may have left open files (lxc-attach has this issue)
  1557. for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR);
  1558. }
  1559. if(!config_loaded) {
  1560. load_netdata_conf(NULL, 0, &user);
  1561. load_cloud_conf(0);
  1562. }
  1563. // ------------------------------------------------------------------------
  1564. // initialize netdata
  1565. {
  1566. char *pmax = config_get(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for plugins", "1");
  1567. if(pmax && *pmax)
  1568. setenv("MALLOC_ARENA_MAX", pmax, 1);
  1569. #if defined(HAVE_C_MALLOPT)
  1570. i = (int)config_get_number(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for netdata", 1);
  1571. if(i > 0)
  1572. mallopt(M_ARENA_MAX, 1);
  1573. #ifdef NETDATA_INTERNAL_CHECKS
  1574. mallopt(M_PERTURB, 0x5A);
  1575. // mallopt(M_MXFAST, 0);
  1576. #endif
  1577. #endif
  1578. // set libuv worker threads
  1579. libuv_worker_threads = (int)get_netdata_cpus() * 6;
  1580. if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS)
  1581. libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  1582. if(libuv_worker_threads > MAX_LIBUV_WORKER_THREADS)
  1583. libuv_worker_threads = MAX_LIBUV_WORKER_THREADS;
  1584. libuv_worker_threads = config_get_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads);
  1585. if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS) {
  1586. libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  1587. config_set_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads);
  1588. }
  1589. {
  1590. char buf[20 + 1];
  1591. snprintfz(buf, sizeof(buf) - 1, "%d", libuv_worker_threads);
  1592. setenv("UV_THREADPOOL_SIZE", buf, 1);
  1593. }
  1594. // prepare configuration environment variables for the plugins
  1595. get_netdata_configured_variables();
  1596. set_global_environment();
  1597. // work while we are cd into config_dir
  1598. // to allow the plugins refer to their config
  1599. // files using relative filenames
  1600. if(chdir(netdata_configured_user_config_dir) == -1)
  1601. fatal("Cannot cd to '%s'", netdata_configured_user_config_dir);
  1602. // Get execution path before switching user to avoid permission issues
  1603. get_netdata_execution_path();
  1604. }
  1605. {
  1606. // --------------------------------------------------------------------
  1607. // get the debugging flags from the configuration file
  1608. char *flags = config_get(CONFIG_SECTION_LOGS, "debug flags", "0x0000000000000000");
  1609. setenv("NETDATA_DEBUG_FLAGS", flags, 1);
  1610. debug_flags = strtoull(flags, NULL, 0);
  1611. netdata_log_debug(D_OPTIONS, "Debug flags set to '0x%" PRIX64 "'.", debug_flags);
  1612. if(debug_flags != 0) {
  1613. struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
  1614. if(setrlimit(RLIMIT_CORE, &rl) != 0)
  1615. netdata_log_error("Cannot request unlimited core dumps for debugging... Proceeding anyway...");
  1616. #ifdef HAVE_SYS_PRCTL_H
  1617. prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
  1618. #endif
  1619. }
  1620. // --------------------------------------------------------------------
  1621. // get log filenames and settings
  1622. log_init();
  1623. nd_log_limits_unlimited();
  1624. // initialize the log files
  1625. nd_log_initialize();
  1626. netdata_log_info("Netdata agent version \""VERSION"\" is starting");
  1627. ieee754_doubles = is_system_ieee754_double();
  1628. if(!ieee754_doubles)
  1629. globally_disabled_capabilities |= STREAM_CAP_IEEE754;
  1630. aral_judy_init();
  1631. get_system_timezone();
  1632. bearer_tokens_init();
  1633. replication_initialize();
  1634. rrd_functions_inflight_init();
  1635. // --------------------------------------------------------------------
  1636. // get the certificate and start security
  1637. #ifdef ENABLE_HTTPS
  1638. security_init();
  1639. #endif
  1640. // --------------------------------------------------------------------
  1641. // This is the safest place to start the SILENCERS structure
  1642. set_silencers_filename();
  1643. health_initialize_global_silencers();
  1644. // --------------------------------------------------------------------
  1645. // Initialize ML configuration
  1646. delta_startup_time("initialize ML");
  1647. ml_init();
  1648. // --------------------------------------------------------------------
  1649. // setup process signals
  1650. // block signals while initializing threads.
  1651. // this causes the threads to block signals.
  1652. delta_startup_time("initialize signals");
  1653. signals_block();
  1654. signals_init(); // setup the signals we want to use
  1655. // --------------------------------------------------------------------
  1656. // check which threads are enabled and initialize them
  1657. delta_startup_time("initialize static threads");
  1658. // setup threads configs
  1659. default_stacksize = netdata_threads_init();
  1660. #ifdef NETDATA_INTERNAL_CHECKS
  1661. config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring", true);
  1662. config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", true);
  1663. #endif
  1664. if(config_get_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", false))
  1665. // this has to run before starting any other threads that use workers
  1666. workers_utilization_enable();
  1667. for (i = 0; static_threads[i].name != NULL ; i++) {
  1668. struct netdata_static_thread *st = &static_threads[i];
  1669. if(st->config_name)
  1670. st->enabled = config_get_boolean(st->config_section, st->config_name, st->enabled);
  1671. if(st->enabled && st->init_routine)
  1672. st->init_routine();
  1673. if(st->env_name)
  1674. setenv(st->env_name, st->enabled?"YES":"NO", 1);
  1675. if(st->global_variable)
  1676. *st->global_variable = (st->enabled) ? true : false;
  1677. }
  1678. // --------------------------------------------------------------------
  1679. // create the listening sockets
  1680. delta_startup_time("initialize web server");
  1681. web_client_api_v1_init();
  1682. web_server_threading_selection();
  1683. if(web_server_mode != WEB_SERVER_MODE_NONE)
  1684. api_listen_sockets_setup();
  1685. #ifdef ENABLE_H2O
  1686. delta_startup_time("initialize h2o server");
  1687. for (int i = 0; static_threads[i].name; i++) {
  1688. if (static_threads[i].start_routine == h2o_main)
  1689. static_threads[i].enabled = httpd_is_enabled();
  1690. }
  1691. #endif
  1692. }
  1693. delta_startup_time("set resource limits");
  1694. #ifdef NETDATA_INTERNAL_CHECKS
  1695. if(debug_flags != 0) {
  1696. struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
  1697. if(setrlimit(RLIMIT_CORE, &rl) != 0)
  1698. netdata_log_error("Cannot request unlimited core dumps for debugging... Proceeding anyway...");
  1699. #ifdef HAVE_SYS_PRCTL_H
  1700. prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
  1701. #endif
  1702. }
  1703. #endif /* NETDATA_INTERNAL_CHECKS */
  1704. set_nofile_limit(&rlimit_nofile);
  1705. delta_startup_time("become daemon");
  1706. // fork, switch user, create pid file, set process priority
  1707. if(become_daemon(dont_fork, user) == -1)
  1708. fatal("Cannot daemonize myself.");
  1709. // The "HOME" env var points to the root's home dir because Netdata starts as root. Can't use "HOME".
  1710. struct passwd *pw = getpwuid(getuid());
  1711. if (config_exists(CONFIG_SECTION_DIRECTORIES, "home") || !pw || !pw->pw_dir) {
  1712. netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", netdata_configured_home_dir);
  1713. } else {
  1714. netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", pw->pw_dir);
  1715. }
  1716. setenv("HOME", netdata_configured_home_dir, 1);
  1717. dyncfg_init(true);
  1718. netdata_log_info("netdata started on pid %d.", getpid());
  1719. delta_startup_time("initialize threads after fork");
  1720. netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize));
  1721. // initialize internal registry
  1722. delta_startup_time("initialize registry");
  1723. registry_init();
  1724. // fork the spawn server
  1725. delta_startup_time("fork the spawn server");
  1726. spawn_init();
  1727. /*
  1728. * Libuv uv_spawn() uses SIGCHLD internally:
  1729. * https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485
  1730. * and inadvertently replaces the netdata signal handler which was setup during initialization.
  1731. * Thusly, we must explicitly restore the signal handler for SIGCHLD.
  1732. * Warning: extreme care is needed when mixing and matching POSIX and libuv.
  1733. */
  1734. signals_restore_SIGCHLD();
  1735. // ------------------------------------------------------------------------
  1736. // initialize rrd, registry, health, rrdpush, etc.
  1737. delta_startup_time("collecting system info");
  1738. netdata_anonymous_statistics_enabled=-1;
  1739. struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info));
  1740. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED);
  1741. get_system_info(system_info);
  1742. (void) registry_get_this_machine_guid();
  1743. system_info->hops = 0;
  1744. get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist);
  1745. delta_startup_time("initialize RRD structures");
  1746. if(rrd_init(netdata_configured_hostname, system_info, false)) {
  1747. set_late_global_environment(system_info);
  1748. fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname);
  1749. }
  1750. delta_startup_time("check for incomplete shutdown");
  1751. char agent_crash_file[FILENAME_MAX + 1];
  1752. char agent_incomplete_shutdown_file[FILENAME_MAX + 1];
  1753. snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir);
  1754. int incomplete_shutdown_detected = (unlink(agent_incomplete_shutdown_file) == 0);
  1755. snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir);
  1756. int crash_detected = (unlink(agent_crash_file) == 0);
  1757. int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC, 444);
  1758. if (fd >= 0)
  1759. close(fd);
  1760. // ------------------------------------------------------------------------
  1761. // Claim netdata agent to a cloud endpoint
  1762. delta_startup_time("collect claiming info");
  1763. if (claiming_pending_arguments)
  1764. claim_agent(claiming_pending_arguments, false, NULL);
  1765. load_claiming_state();
  1766. // ------------------------------------------------------------------------
  1767. // enable log flood protection
  1768. nd_log_limits_reset();
  1769. // Load host labels
  1770. delta_startup_time("collect host labels");
  1771. reload_host_labels();
  1772. // ------------------------------------------------------------------------
  1773. // spawn the threads
  1774. delta_startup_time("start the static threads");
  1775. web_server_config_options();
  1776. netdata_zero_metrics_enabled = config_get_boolean_ondemand(CONFIG_SECTION_DB, "enable zero metrics", CONFIG_BOOLEAN_NO);
  1777. set_late_global_environment(system_info);
  1778. for (i = 0; static_threads[i].name != NULL ; i++) {
  1779. struct netdata_static_thread *st = &static_threads[i];
  1780. if(st->enabled) {
  1781. st->thread = mallocz(sizeof(netdata_thread_t));
  1782. netdata_log_debug(D_SYSTEM, "Starting thread %s.", st->name);
  1783. netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st);
  1784. }
  1785. else
  1786. netdata_log_debug(D_SYSTEM, "Not starting thread %s.", st->name);
  1787. }
  1788. ml_start_threads();
  1789. // ------------------------------------------------------------------------
  1790. // Initialize netdata agent command serving from cli and signals
  1791. delta_startup_time("initialize commands API");
  1792. commands_init();
  1793. delta_startup_time("ready");
  1794. usec_t ready_ut = now_monotonic_usec();
  1795. netdata_log_info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS);
  1796. netdata_ready = true;
  1797. analytics_statistic_t start_statistic = { "START", "-", "-" };
  1798. analytics_statistic_send(&start_statistic);
  1799. if (crash_detected) {
  1800. analytics_statistic_t crash_statistic = { "CRASH", "-", "-" };
  1801. analytics_statistic_send(&crash_statistic);
  1802. }
  1803. if (incomplete_shutdown_detected) {
  1804. analytics_statistic_t incomplete_shutdown_statistic = { "INCOMPLETE_SHUTDOWN", "-", "-" };
  1805. analytics_statistic_send(&incomplete_shutdown_statistic);
  1806. }
  1807. //check if ANALYTICS needs to start
  1808. if (netdata_anonymous_statistics_enabled == 1) {
  1809. for (i = 0; static_threads[i].name != NULL; i++) {
  1810. if (!strncmp(static_threads[i].name, "ANALYTICS", 9)) {
  1811. struct netdata_static_thread *st = &static_threads[i];
  1812. st->thread = mallocz(sizeof(netdata_thread_t));
  1813. st->enabled = 1;
  1814. netdata_log_debug(D_SYSTEM, "Starting thread %s.", st->name);
  1815. netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st);
  1816. }
  1817. }
  1818. }
  1819. // ------------------------------------------------------------------------
  1820. // Report ACLK build failure
  1821. #ifndef ENABLE_ACLK
  1822. netdata_log_error("This agent doesn't have ACLK.");
  1823. char filename[FILENAME_MAX + 1];
  1824. snprintfz(filename, FILENAME_MAX, "%s/.aclk_report_sent", netdata_configured_varlib_dir);
  1825. if (netdata_anonymous_statistics_enabled > 0 && access(filename, F_OK)) { // -1 -> not initialized
  1826. analytics_statistic_t statistic = { "ACLK_DISABLED", "-", "-" };
  1827. analytics_statistic_send(&statistic);
  1828. int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 444);
  1829. if (fd == -1)
  1830. netdata_log_error("Cannot create file '%s'. Please fix this.", filename);
  1831. else
  1832. close(fd);
  1833. }
  1834. #endif
  1835. // ------------------------------------------------------------------------
  1836. // initialize WebRTC
  1837. webrtc_initialize();
  1838. // ------------------------------------------------------------------------
  1839. // unblock signals
  1840. signals_unblock();
  1841. // ------------------------------------------------------------------------
  1842. // Handle signals
  1843. signals_handle();
  1844. // should never reach this point
  1845. // but we need it for rpmlint #2752
  1846. return 1;
  1847. }