main.c 94 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "common.h"
  3. #include "buildinfo.h"
  4. #include "static_threads.h"
  5. #include "database/engine/page_test.h"
  6. #if defined(ENV32BIT)
  7. #warning COMPILING 32BIT NETDATA
  8. #endif
  9. bool unittest_running = false;
  10. int netdata_zero_metrics_enabled;
  11. int netdata_anonymous_statistics_enabled;
  12. int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  13. bool ieee754_doubles = false;
  14. time_t netdata_start_time = 0;
  15. struct netdata_static_thread *static_threads;
  16. struct config netdata_config = {
  17. .first_section = NULL,
  18. .last_section = NULL,
  19. .mutex = NETDATA_MUTEX_INITIALIZER,
  20. .index = {
  21. .avl_tree = {
  22. .root = NULL,
  23. .compar = appconfig_section_compare
  24. },
  25. .rwlock = AVL_LOCK_INITIALIZER
  26. }
  27. };
  28. typedef struct service_thread {
  29. pid_t tid;
  30. SERVICE_THREAD_TYPE type;
  31. SERVICE_TYPE services;
  32. char name[NETDATA_THREAD_NAME_MAX + 1];
  33. bool stop_immediately;
  34. bool cancelled;
  35. union {
  36. netdata_thread_t netdata_thread;
  37. uv_thread_t uv_thread;
  38. };
  39. force_quit_t force_quit_callback;
  40. request_quit_t request_quit_callback;
  41. void *data;
  42. } SERVICE_THREAD;
  43. struct service_globals {
  44. SPINLOCK lock;
  45. Pvoid_t pid_judy;
  46. } service_globals = {
  47. .pid_judy = NULL,
  48. };
  49. SERVICE_THREAD *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused) {
  50. SERVICE_THREAD *sth = NULL;
  51. pid_t tid = gettid();
  52. spinlock_lock(&service_globals.lock);
  53. Pvoid_t *PValue = JudyLIns(&service_globals.pid_judy, tid, PJE0);
  54. if(!*PValue) {
  55. sth = callocz(1, sizeof(SERVICE_THREAD));
  56. sth->tid = tid;
  57. sth->type = thread_type;
  58. sth->request_quit_callback = request_quit_callback;
  59. sth->force_quit_callback = force_quit_callback;
  60. sth->data = data;
  61. os_thread_get_current_name_np(sth->name);
  62. *PValue = sth;
  63. switch(thread_type) {
  64. default:
  65. case SERVICE_THREAD_TYPE_NETDATA:
  66. sth->netdata_thread = netdata_thread_self();
  67. break;
  68. case SERVICE_THREAD_TYPE_EVENT_LOOP:
  69. case SERVICE_THREAD_TYPE_LIBUV:
  70. sth->uv_thread = uv_thread_self();
  71. break;
  72. }
  73. }
  74. else {
  75. sth = *PValue;
  76. }
  77. spinlock_unlock(&service_globals.lock);
  78. return sth;
  79. }
  80. void service_exits(void) {
  81. pid_t tid = gettid();
  82. spinlock_lock(&service_globals.lock);
  83. Pvoid_t *PValue = JudyLGet(service_globals.pid_judy, tid, PJE0);
  84. if(PValue) {
  85. freez(*PValue);
  86. JudyLDel(&service_globals.pid_judy, tid, PJE0);
  87. }
  88. spinlock_unlock(&service_globals.lock);
  89. }
  90. bool service_running(SERVICE_TYPE service) {
  91. static __thread SERVICE_THREAD *sth = NULL;
  92. if(unlikely(!sth))
  93. sth = service_register(SERVICE_THREAD_TYPE_NETDATA, NULL, NULL, NULL, false);
  94. sth->services |= service;
  95. return !(sth->stop_immediately || netdata_exit);
  96. }
  97. void service_signal_exit(SERVICE_TYPE service) {
  98. spinlock_lock(&service_globals.lock);
  99. Pvoid_t *PValue;
  100. Word_t tid = 0;
  101. bool first = true;
  102. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  103. SERVICE_THREAD *sth = *PValue;
  104. if((sth->services & service)) {
  105. sth->stop_immediately = true;
  106. if(sth->request_quit_callback) {
  107. spinlock_unlock(&service_globals.lock);
  108. sth->request_quit_callback(sth->data);
  109. spinlock_lock(&service_globals.lock);
  110. }
  111. }
  112. }
  113. spinlock_unlock(&service_globals.lock);
  114. }
  115. static void service_to_buffer(BUFFER *wb, SERVICE_TYPE service) {
  116. if(service & SERVICE_MAINTENANCE)
  117. buffer_strcat(wb, "MAINTENANCE ");
  118. if(service & SERVICE_COLLECTORS)
  119. buffer_strcat(wb, "COLLECTORS ");
  120. if(service & SERVICE_REPLICATION)
  121. buffer_strcat(wb, "REPLICATION ");
  122. if(service & ABILITY_DATA_QUERIES)
  123. buffer_strcat(wb, "DATA_QUERIES ");
  124. if(service & ABILITY_WEB_REQUESTS)
  125. buffer_strcat(wb, "WEB_REQUESTS ");
  126. if(service & SERVICE_WEB_SERVER)
  127. buffer_strcat(wb, "WEB_SERVER ");
  128. if(service & SERVICE_ACLK)
  129. buffer_strcat(wb, "ACLK ");
  130. if(service & SERVICE_HEALTH)
  131. buffer_strcat(wb, "HEALTH ");
  132. if(service & SERVICE_STREAMING)
  133. buffer_strcat(wb, "STREAMING ");
  134. if(service & ABILITY_STREAMING_CONNECTIONS)
  135. buffer_strcat(wb, "STREAMING_CONNECTIONS ");
  136. if(service & SERVICE_CONTEXT)
  137. buffer_strcat(wb, "CONTEXT ");
  138. if(service & SERVICE_ANALYTICS)
  139. buffer_strcat(wb, "ANALYTICS ");
  140. if(service & SERVICE_EXPORTERS)
  141. buffer_strcat(wb, "EXPORTERS ");
  142. if(service & SERVICE_HTTPD)
  143. buffer_strcat(wb, "HTTPD ");
  144. }
  145. static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) {
  146. BUFFER *service_list = buffer_create(1024, NULL);
  147. BUFFER *thread_list = buffer_create(1024, NULL);
  148. usec_t started_ut = now_monotonic_usec(), ended_ut;
  149. size_t running;
  150. SERVICE_TYPE running_services = 0;
  151. // cancel the threads
  152. running = 0;
  153. running_services = 0;
  154. {
  155. buffer_flush(thread_list);
  156. spinlock_lock(&service_globals.lock);
  157. Pvoid_t *PValue;
  158. Word_t tid = 0;
  159. bool first = true;
  160. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  161. SERVICE_THREAD *sth = *PValue;
  162. if(sth->services & service && sth->tid != gettid() && !sth->cancelled) {
  163. sth->cancelled = true;
  164. switch(sth->type) {
  165. default:
  166. case SERVICE_THREAD_TYPE_NETDATA:
  167. netdata_thread_cancel(sth->netdata_thread);
  168. break;
  169. case SERVICE_THREAD_TYPE_EVENT_LOOP:
  170. case SERVICE_THREAD_TYPE_LIBUV:
  171. break;
  172. }
  173. if(running)
  174. buffer_strcat(thread_list, ", ");
  175. buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid);
  176. running++;
  177. running_services |= sth->services & service;
  178. if(sth->force_quit_callback) {
  179. spinlock_unlock(&service_globals.lock);
  180. sth->force_quit_callback(sth->data);
  181. spinlock_lock(&service_globals.lock);
  182. continue;
  183. }
  184. }
  185. }
  186. spinlock_unlock(&service_globals.lock);
  187. }
  188. service_signal_exit(service);
  189. // signal them to stop
  190. size_t last_running = 0;
  191. size_t stale_time_ut = 0;
  192. usec_t sleep_ut = 50 * USEC_PER_MS;
  193. size_t log_countdown_ut = sleep_ut;
  194. do {
  195. if(running != last_running)
  196. stale_time_ut = 0;
  197. last_running = running;
  198. running = 0;
  199. running_services = 0;
  200. buffer_flush(thread_list);
  201. spinlock_lock(&service_globals.lock);
  202. Pvoid_t *PValue;
  203. Word_t tid = 0;
  204. bool first = true;
  205. while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
  206. SERVICE_THREAD *sth = *PValue;
  207. if(sth->services & service && sth->tid != gettid()) {
  208. if(running)
  209. buffer_strcat(thread_list, ", ");
  210. buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid);
  211. running_services |= sth->services & service;
  212. running++;
  213. }
  214. }
  215. spinlock_unlock(&service_globals.lock);
  216. if(running) {
  217. log_countdown_ut -= (log_countdown_ut >= sleep_ut) ? sleep_ut : log_countdown_ut;
  218. if(log_countdown_ut == 0 || running != last_running) {
  219. log_countdown_ut = 20 * sleep_ut;
  220. buffer_flush(service_list);
  221. service_to_buffer(service_list, running_services);
  222. netdata_log_info("SERVICE CONTROL: waiting for the following %zu services [ %s] to exit: %s",
  223. running, buffer_tostring(service_list),
  224. running <= 10 ? buffer_tostring(thread_list) : "");
  225. }
  226. sleep_usec(sleep_ut);
  227. stale_time_ut += sleep_ut;
  228. }
  229. ended_ut = now_monotonic_usec();
  230. } while(running && (ended_ut - started_ut < timeout_ut || stale_time_ut < timeout_ut));
  231. if(running) {
  232. buffer_flush(service_list);
  233. service_to_buffer(service_list, running_services);
  234. netdata_log_info("SERVICE CONTROL: "
  235. "the following %zu service(s) [ %s] take too long to exit: %s; "
  236. "giving up on them...",
  237. running, buffer_tostring(service_list),
  238. buffer_tostring(thread_list));
  239. }
  240. buffer_free(thread_list);
  241. buffer_free(service_list);
  242. return (running == 0);
  243. }
  244. #define delta_shutdown_time(msg) \
  245. { \
  246. usec_t now_ut = now_monotonic_usec(); \
  247. if(prev_msg) \
  248. netdata_log_info("NETDATA SHUTDOWN: in %7llu ms, %s%s - next: %s", (now_ut - last_ut) / USEC_PER_MS, (timeout)?"(TIMEOUT) ":"", prev_msg, msg); \
  249. else \
  250. netdata_log_info("NETDATA SHUTDOWN: next: %s", msg); \
  251. last_ut = now_ut; \
  252. prev_msg = msg; \
  253. timeout = false; \
  254. }
  255. void web_client_cache_destroy(void);
  256. void netdata_cleanup_and_exit(int ret) {
  257. usec_t started_ut = now_monotonic_usec();
  258. usec_t last_ut = started_ut;
  259. const char *prev_msg = NULL;
  260. bool timeout = false;
  261. nd_log_limits_unlimited();
  262. netdata_log_info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret);
  263. send_statistics("EXIT", ret?"ERROR":"OK","-");
  264. delta_shutdown_time("create shutdown file");
  265. char agent_crash_file[FILENAME_MAX + 1];
  266. char agent_incomplete_shutdown_file[FILENAME_MAX + 1];
  267. snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir);
  268. snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir);
  269. (void) rename(agent_crash_file, agent_incomplete_shutdown_file);
  270. #ifdef ENABLE_DBENGINE
  271. if(dbengine_enabled) {
  272. delta_shutdown_time("dbengine exit mode");
  273. for (size_t tier = 0; tier < storage_tiers; tier++)
  274. rrdeng_exit_mode(multidb_ctx[tier]);
  275. }
  276. #endif
  277. delta_shutdown_time("close webrtc connections");
  278. webrtc_close_all_connections();
  279. delta_shutdown_time("disable maintenance, new queries, new web requests, new streaming connections and aclk");
  280. service_signal_exit(
  281. SERVICE_MAINTENANCE
  282. | ABILITY_DATA_QUERIES
  283. | ABILITY_WEB_REQUESTS
  284. | ABILITY_STREAMING_CONNECTIONS
  285. | SERVICE_ACLK
  286. | SERVICE_ACLKSYNC
  287. );
  288. delta_shutdown_time("stop replication, exporters, health and web servers threads");
  289. timeout = !service_wait_exit(
  290. SERVICE_EXPORTERS
  291. | SERVICE_HEALTH
  292. | SERVICE_WEB_SERVER
  293. | SERVICE_HTTPD
  294. , 3 * USEC_PER_SEC);
  295. delta_shutdown_time("stop collectors and streaming threads");
  296. timeout = !service_wait_exit(
  297. SERVICE_COLLECTORS
  298. | SERVICE_STREAMING
  299. , 3 * USEC_PER_SEC);
  300. delta_shutdown_time("stop replication threads");
  301. timeout = !service_wait_exit(
  302. SERVICE_REPLICATION // replication has to be stopped after STREAMING, because it cleans up ARAL
  303. , 3 * USEC_PER_SEC);
  304. delta_shutdown_time("prepare metasync shutdown");
  305. metadata_sync_shutdown_prepare();
  306. delta_shutdown_time("disable ML detection and training threads");
  307. ml_stop_threads();
  308. ml_fini();
  309. delta_shutdown_time("stop context thread");
  310. timeout = !service_wait_exit(
  311. SERVICE_CONTEXT
  312. , 3 * USEC_PER_SEC);
  313. delta_shutdown_time("stop maintenance thread");
  314. timeout = !service_wait_exit(
  315. SERVICE_MAINTENANCE
  316. , 3 * USEC_PER_SEC);
  317. delta_shutdown_time("clear web client cache");
  318. web_client_cache_destroy();
  319. delta_shutdown_time("clean rrdhost database");
  320. rrdhost_cleanup_all();
  321. delta_shutdown_time("stop aclk threads");
  322. timeout = !service_wait_exit(
  323. SERVICE_ACLK
  324. , 3 * USEC_PER_SEC);
  325. delta_shutdown_time("stop all remaining worker threads");
  326. timeout = !service_wait_exit(~0, 10 * USEC_PER_SEC);
  327. delta_shutdown_time("cancel main threads");
  328. cancel_main_threads();
  329. if(!ret) {
  330. // exit cleanly
  331. #ifdef ENABLE_DBENGINE
  332. if(dbengine_enabled) {
  333. delta_shutdown_time("flush dbengine tiers");
  334. for (size_t tier = 0; tier < storage_tiers; tier++)
  335. rrdeng_prepare_exit(multidb_ctx[tier]);
  336. for (size_t tier = 0; tier < storage_tiers; tier++) {
  337. if (!multidb_ctx[tier])
  338. continue;
  339. completion_wait_for(&multidb_ctx[tier]->quiesce.completion);
  340. completion_destroy(&multidb_ctx[tier]->quiesce.completion);
  341. }
  342. }
  343. #endif
  344. // free the database
  345. delta_shutdown_time("stop collection for all hosts");
  346. // rrdhost_free_all();
  347. rrd_finalize_collection_for_all_hosts();
  348. delta_shutdown_time("stop metasync threads");
  349. metadata_sync_shutdown();
  350. #ifdef ENABLE_DBENGINE
  351. if(dbengine_enabled) {
  352. delta_shutdown_time("wait for dbengine collectors to finish");
  353. size_t running = 1;
  354. size_t count = 10;
  355. while(running && count) {
  356. running = 0;
  357. for (size_t tier = 0; tier < storage_tiers; tier++)
  358. running += rrdeng_collectors_running(multidb_ctx[tier]);
  359. if(running) {
  360. nd_log_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS);
  361. nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
  362. "waiting for %zu collectors to finish", running);
  363. // sleep_usec(100 * USEC_PER_MS);
  364. cleanup_destroyed_dictionaries();
  365. }
  366. count--;
  367. }
  368. delta_shutdown_time("wait for dbengine main cache to finish flushing");
  369. while (pgc_hot_and_dirty_entries(main_cache)) {
  370. pgc_flush_all_hot_and_dirty_pages(main_cache, PGC_SECTION_ALL);
  371. sleep_usec(100 * USEC_PER_MS);
  372. }
  373. delta_shutdown_time("stop dbengine tiers");
  374. for (size_t tier = 0; tier < storage_tiers; tier++)
  375. rrdeng_exit(multidb_ctx[tier]);
  376. rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL);
  377. }
  378. #endif
  379. }
  380. delta_shutdown_time("close SQL context db");
  381. sql_close_context_database();
  382. delta_shutdown_time("closed SQL main db");
  383. sql_close_database();
  384. // unlink the pid
  385. if(pidfile[0]) {
  386. delta_shutdown_time("remove pid file");
  387. if(unlink(pidfile) != 0)
  388. netdata_log_error("EXIT: cannot unlink pidfile '%s'.", pidfile);
  389. }
  390. #ifdef ENABLE_HTTPS
  391. delta_shutdown_time("free openssl structures");
  392. netdata_ssl_cleanup();
  393. #endif
  394. delta_shutdown_time("remove incomplete shutdown file");
  395. (void) unlink(agent_incomplete_shutdown_file);
  396. delta_shutdown_time("exit");
  397. usec_t ended_ut = now_monotonic_usec();
  398. netdata_log_info("NETDATA SHUTDOWN: completed in %llu ms - netdata is now exiting - bye bye...", (ended_ut - started_ut) / USEC_PER_MS);
  399. exit(ret);
  400. }
  401. void web_server_threading_selection(void) {
  402. web_server_mode = web_server_mode_id(config_get(CONFIG_SECTION_WEB, "mode", web_server_mode_name(web_server_mode)));
  403. int static_threaded = (web_server_mode == WEB_SERVER_MODE_STATIC_THREADED);
  404. int i;
  405. for (i = 0; static_threads[i].name; i++) {
  406. if (static_threads[i].start_routine == socket_listen_main_static_threaded)
  407. static_threads[i].enabled = static_threaded;
  408. }
  409. }
  410. int make_dns_decision(const char *section_name, const char *config_name, const char *default_value, SIMPLE_PATTERN *p)
  411. {
  412. char *value = config_get(section_name,config_name,default_value);
  413. if(!strcmp("yes",value))
  414. return 1;
  415. if(!strcmp("no",value))
  416. return 0;
  417. if(strcmp("heuristic",value))
  418. netdata_log_error("Invalid configuration option '%s' for '%s'/'%s'. Valid options are 'yes', 'no' and 'heuristic'. Proceeding with 'heuristic'",
  419. value, section_name, config_name);
  420. return simple_pattern_is_potential_name(p);
  421. }
  422. void web_server_config_options(void)
  423. {
  424. web_client_timeout =
  425. (int)config_get_number(CONFIG_SECTION_WEB, "disconnect idle clients after seconds", web_client_timeout);
  426. web_client_first_request_timeout =
  427. (int)config_get_number(CONFIG_SECTION_WEB, "timeout for first request", web_client_first_request_timeout);
  428. web_client_streaming_rate_t =
  429. config_get_number(CONFIG_SECTION_WEB, "accept a streaming request every seconds", web_client_streaming_rate_t);
  430. respect_web_browser_do_not_track_policy =
  431. config_get_boolean(CONFIG_SECTION_WEB, "respect do not track policy", respect_web_browser_do_not_track_policy);
  432. web_x_frame_options = config_get(CONFIG_SECTION_WEB, "x-frame-options response header", "");
  433. if(!*web_x_frame_options)
  434. web_x_frame_options = NULL;
  435. web_allow_connections_from =
  436. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow connections from", "localhost *"),
  437. NULL, SIMPLE_PATTERN_EXACT, true);
  438. web_allow_connections_dns =
  439. make_dns_decision(CONFIG_SECTION_WEB, "allow connections by dns", "heuristic", web_allow_connections_from);
  440. web_allow_dashboard_from =
  441. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow dashboard from", "localhost *"),
  442. NULL, SIMPLE_PATTERN_EXACT, true);
  443. web_allow_dashboard_dns =
  444. make_dns_decision(CONFIG_SECTION_WEB, "allow dashboard by dns", "heuristic", web_allow_dashboard_from);
  445. web_allow_badges_from =
  446. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow badges from", "*"), NULL, SIMPLE_PATTERN_EXACT,
  447. true);
  448. web_allow_badges_dns =
  449. make_dns_decision(CONFIG_SECTION_WEB, "allow badges by dns", "heuristic", web_allow_badges_from);
  450. web_allow_registry_from =
  451. simple_pattern_create(config_get(CONFIG_SECTION_REGISTRY, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT,
  452. true);
  453. web_allow_registry_dns = make_dns_decision(CONFIG_SECTION_REGISTRY, "allow by dns", "heuristic",
  454. web_allow_registry_from);
  455. web_allow_streaming_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow streaming from", "*"),
  456. NULL, SIMPLE_PATTERN_EXACT, true);
  457. web_allow_streaming_dns = make_dns_decision(CONFIG_SECTION_WEB, "allow streaming by dns", "heuristic",
  458. web_allow_streaming_from);
  459. // Note the default is not heuristic, the wildcards could match DNS but the intent is ip-addresses.
  460. web_allow_netdataconf_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow netdata.conf from",
  461. "localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.*"
  462. " 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.*"
  463. " 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.*"
  464. " 172.31.* UNKNOWN"), NULL, SIMPLE_PATTERN_EXACT,
  465. true);
  466. web_allow_netdataconf_dns =
  467. make_dns_decision(CONFIG_SECTION_WEB, "allow netdata.conf by dns", "no", web_allow_netdataconf_from);
  468. web_allow_mgmt_from =
  469. simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow management from", "localhost"),
  470. NULL, SIMPLE_PATTERN_EXACT, true);
  471. web_allow_mgmt_dns =
  472. make_dns_decision(CONFIG_SECTION_WEB, "allow management by dns","heuristic",web_allow_mgmt_from);
  473. web_enable_gzip = config_get_boolean(CONFIG_SECTION_WEB, "enable gzip compression", web_enable_gzip);
  474. char *s = config_get(CONFIG_SECTION_WEB, "gzip compression strategy", "default");
  475. if(!strcmp(s, "default"))
  476. web_gzip_strategy = Z_DEFAULT_STRATEGY;
  477. else if(!strcmp(s, "filtered"))
  478. web_gzip_strategy = Z_FILTERED;
  479. else if(!strcmp(s, "huffman only"))
  480. web_gzip_strategy = Z_HUFFMAN_ONLY;
  481. else if(!strcmp(s, "rle"))
  482. web_gzip_strategy = Z_RLE;
  483. else if(!strcmp(s, "fixed"))
  484. web_gzip_strategy = Z_FIXED;
  485. else {
  486. netdata_log_error("Invalid compression strategy '%s'. Valid strategies are 'default', 'filtered', 'huffman only', 'rle' and 'fixed'. Proceeding with 'default'.", s);
  487. web_gzip_strategy = Z_DEFAULT_STRATEGY;
  488. }
  489. web_gzip_level = (int)config_get_number(CONFIG_SECTION_WEB, "gzip compression level", 3);
  490. if(web_gzip_level < 1) {
  491. netdata_log_error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 1 (fastest compression).", web_gzip_level);
  492. web_gzip_level = 1;
  493. }
  494. else if(web_gzip_level > 9) {
  495. netdata_log_error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 9 (best compression).", web_gzip_level);
  496. web_gzip_level = 9;
  497. }
  498. }
  499. // killpid kills pid with SIGTERM.
  500. int killpid(pid_t pid) {
  501. int ret;
  502. netdata_log_debug(D_EXIT, "Request to kill pid %d", pid);
  503. int signal = SIGTERM;
  504. //#ifdef NETDATA_INTERNAL_CHECKS
  505. // if(service_running(SERVICE_COLLECTORS))
  506. // signal = SIGABRT;
  507. //#endif
  508. errno = 0;
  509. ret = kill(pid, signal);
  510. if (ret == -1) {
  511. switch(errno) {
  512. case ESRCH:
  513. // We wanted the process to exit so just let the caller handle.
  514. return ret;
  515. case EPERM:
  516. netdata_log_error("Cannot kill pid %d, but I do not have enough permissions.", pid);
  517. break;
  518. default:
  519. netdata_log_error("Cannot kill pid %d, but I received an error.", pid);
  520. break;
  521. }
  522. }
  523. return ret;
  524. }
  525. static void set_nofile_limit(struct rlimit *rl) {
  526. // get the num files allowed
  527. if(getrlimit(RLIMIT_NOFILE, rl) != 0) {
  528. netdata_log_error("getrlimit(RLIMIT_NOFILE) failed");
  529. return;
  530. }
  531. netdata_log_info("resources control: allowed file descriptors: soft = %zu, max = %zu",
  532. (size_t) rl->rlim_cur, (size_t) rl->rlim_max);
  533. // make the soft/hard limits equal
  534. rl->rlim_cur = rl->rlim_max;
  535. if (setrlimit(RLIMIT_NOFILE, rl) != 0) {
  536. netdata_log_error("setrlimit(RLIMIT_NOFILE, { %zu, %zu }) failed", (size_t)rl->rlim_cur, (size_t)rl->rlim_max);
  537. }
  538. // sanity check to make sure we have enough file descriptors available to open
  539. if (getrlimit(RLIMIT_NOFILE, rl) != 0) {
  540. netdata_log_error("getrlimit(RLIMIT_NOFILE) failed");
  541. return;
  542. }
  543. if (rl->rlim_cur < 1024)
  544. netdata_log_error("Number of open file descriptors allowed for this process is too low (RLIMIT_NOFILE=%zu)", (size_t)rl->rlim_cur);
  545. }
  546. void cancel_main_threads() {
  547. nd_log_limits_unlimited();
  548. int i, found = 0;
  549. usec_t max = 5 * USEC_PER_SEC, step = 100000;
  550. for (i = 0; static_threads[i].name != NULL ; i++) {
  551. if (static_threads[i].enabled == NETDATA_MAIN_THREAD_RUNNING) {
  552. if (static_threads[i].thread) {
  553. netdata_log_info("EXIT: Stopping main thread: %s", static_threads[i].name);
  554. netdata_thread_cancel(*static_threads[i].thread);
  555. } else {
  556. netdata_log_info("EXIT: No thread running (marking as EXITED): %s", static_threads[i].name);
  557. static_threads[i].enabled = NETDATA_MAIN_THREAD_EXITED;
  558. }
  559. found++;
  560. }
  561. }
  562. netdata_exit = 1;
  563. while(found && max > 0) {
  564. max -= step;
  565. netdata_log_info("Waiting %d threads to finish...", found);
  566. sleep_usec(step);
  567. found = 0;
  568. for (i = 0; static_threads[i].name != NULL ; i++) {
  569. if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED)
  570. found++;
  571. }
  572. }
  573. if(found) {
  574. for (i = 0; static_threads[i].name != NULL ; i++) {
  575. if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED)
  576. netdata_log_error("Main thread %s takes too long to exit. Giving up...", static_threads[i].name);
  577. }
  578. }
  579. else
  580. netdata_log_info("All threads finished.");
  581. for (i = 0; static_threads[i].name != NULL ; i++)
  582. freez(static_threads[i].thread);
  583. freez(static_threads);
  584. }
  585. struct option_def option_definitions[] = {
  586. // opt description arg name default value
  587. { 'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME},
  588. { 'D', "Do not fork. Run in the foreground.", NULL, "run in the background"},
  589. { 'd', "Fork. Run in the background.", NULL, "run in the background"},
  590. { 'h', "Display this help message.", NULL, NULL},
  591. { 'P', "File to save a pid while running.", "filename", "do not save pid to a file"},
  592. { 'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"},
  593. { 'p', "API/Web port to use.", "port", "19999"},
  594. { 's', "Prefix for /proc and /sys (for containers).", "path", "no prefix"},
  595. { 't', "The internal clock of netdata.", "seconds", "1"},
  596. { 'u', "Run as user.", "username", "netdata"},
  597. { 'v', "Print netdata version and exit.", NULL, NULL},
  598. { 'V', "Print netdata version and exit.", NULL, NULL},
  599. { 'W', "See Advanced options below.", "options", NULL},
  600. };
  601. int help(int exitcode) {
  602. FILE *stream;
  603. if(exitcode == 0)
  604. stream = stdout;
  605. else
  606. stream = stderr;
  607. int num_opts = sizeof(option_definitions) / sizeof(struct option_def);
  608. int i;
  609. int max_len_arg = 0;
  610. // Compute maximum argument length
  611. for( i = 0; i < num_opts; i++ ) {
  612. if(option_definitions[i].arg_name) {
  613. int len_arg = (int)strlen(option_definitions[i].arg_name);
  614. if(len_arg > max_len_arg) max_len_arg = len_arg;
  615. }
  616. }
  617. if(max_len_arg > 30) max_len_arg = 30;
  618. if(max_len_arg < 20) max_len_arg = 20;
  619. fprintf(stream, "%s", "\n"
  620. " ^\n"
  621. " |.-. .-. .-. .-. . netdata \n"
  622. " | '-' '-' '-' '-' real-time performance monitoring, done right! \n"
  623. " +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n"
  624. "\n"
  625. " Copyright (C) 2016-2023, Netdata, Inc. <info@netdata.cloud>\n"
  626. " Released under GNU General Public License v3 or later.\n"
  627. " All rights reserved.\n"
  628. "\n"
  629. " Home Page : https://netdata.cloud\n"
  630. " Source Code: https://github.com/netdata/netdata\n"
  631. " Docs : https://learn.netdata.cloud\n"
  632. " Support : https://github.com/netdata/netdata/issues\n"
  633. " License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n"
  634. "\n"
  635. " Twitter : https://twitter.com/netdatahq\n"
  636. " LinkedIn : https://linkedin.com/company/netdata-cloud/\n"
  637. " Facebook : https://facebook.com/linuxnetdata/\n"
  638. "\n"
  639. "\n"
  640. );
  641. fprintf(stream, " SYNOPSIS: netdata [options]\n");
  642. fprintf(stream, "\n");
  643. fprintf(stream, " Options:\n\n");
  644. // Output options description.
  645. for( i = 0; i < num_opts; i++ ) {
  646. fprintf(stream, " -%c %-*s %s", option_definitions[i].val, max_len_arg, option_definitions[i].arg_name ? option_definitions[i].arg_name : "", option_definitions[i].description);
  647. if(option_definitions[i].default_value) {
  648. fprintf(stream, "\n %c %-*s Default: %s\n", ' ', max_len_arg, "", option_definitions[i].default_value);
  649. } else {
  650. fprintf(stream, "\n");
  651. }
  652. fprintf(stream, "\n");
  653. }
  654. fprintf(stream, "\n Advanced options:\n\n"
  655. " -W stacksize=N Set the stacksize (in bytes).\n\n"
  656. " -W debug_flags=N Set runtime tracing to debug.log.\n\n"
  657. " -W unittest Run internal unittests and exit.\n\n"
  658. " -W sqlite-meta-recover Run recovery on the metadata database and exit.\n\n"
  659. " -W sqlite-compact Reclaim metadata database unused space and exit.\n\n"
  660. " -W sqlite-analyze Run update statistics and exit.\n\n"
  661. #ifdef ENABLE_DBENGINE
  662. " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n"
  663. " -W stresstest=A,B,C,D,E,F,G\n"
  664. " Run a DB engine stress test for A seconds,\n"
  665. " with B writers and C readers, with a ramp up\n"
  666. " time of D seconds for writers, a page cache\n"
  667. " size of E MiB, an optional disk space limit\n"
  668. " of F MiB, G libuv workers (default 16) and exit.\n\n"
  669. #endif
  670. " -W set section option value\n"
  671. " set netdata.conf option from the command line.\n\n"
  672. " -W buildinfo Print the version, the configure options,\n"
  673. " a list of optional features, and whether they\n"
  674. " are enabled or not.\n\n"
  675. " -W buildinfojson Print the version, the configure options,\n"
  676. " a list of optional features, and whether they\n"
  677. " are enabled or not, in JSON format.\n\n"
  678. " -W simple-pattern pattern string\n"
  679. " Check if string matches pattern and exit.\n\n"
  680. " -W \"claim -token=TOKEN -rooms=ROOM1,ROOM2\"\n"
  681. " Claim the agent to the workspace rooms pointed to by TOKEN and ROOM*.\n\n"
  682. );
  683. fprintf(stream, "\n Signals netdata handles:\n\n"
  684. " - HUP Close and reopen log files.\n"
  685. " - USR1 Save internal DB to disk.\n"
  686. " - USR2 Reload health configuration.\n"
  687. "\n"
  688. );
  689. fflush(stream);
  690. return exitcode;
  691. }
  692. #ifdef ENABLE_HTTPS
  693. static void security_init(){
  694. char filename[FILENAME_MAX + 1];
  695. snprintfz(filename, FILENAME_MAX, "%s/ssl/key.pem",netdata_configured_user_config_dir);
  696. netdata_ssl_security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename);
  697. snprintfz(filename, FILENAME_MAX, "%s/ssl/cert.pem",netdata_configured_user_config_dir);
  698. netdata_ssl_security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename);
  699. tls_version = config_get(CONFIG_SECTION_WEB, "tls version", "1.3");
  700. tls_ciphers = config_get(CONFIG_SECTION_WEB, "tls ciphers", "none");
  701. netdata_ssl_initialize_openssl();
  702. }
  703. #endif
  704. static void log_init(void) {
  705. nd_log_set_facility(config_get(CONFIG_SECTION_LOGS, "facility", "daemon"));
  706. time_t period = ND_LOG_DEFAULT_THROTTLE_PERIOD;
  707. size_t logs = ND_LOG_DEFAULT_THROTTLE_LOGS;
  708. period = config_get_number(CONFIG_SECTION_LOGS, "logs flood protection period", period);
  709. logs = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "logs to trigger flood protection", (long long int)logs);
  710. nd_log_set_flood_protection(logs, period);
  711. const char *netdata_log_level = getenv("NETDATA_LOG_LEVEL");
  712. netdata_log_level = netdata_log_level ? nd_log_id2priority(nd_log_priority2id(netdata_log_level)) : NDLP_INFO_STR;
  713. nd_log_set_priority_level(config_get(CONFIG_SECTION_LOGS, "level", netdata_log_level));
  714. char filename[FILENAME_MAX + 1];
  715. snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir);
  716. nd_log_set_user_settings(NDLS_DEBUG, config_get(CONFIG_SECTION_LOGS, "debug", filename));
  717. bool with_journal = is_stderr_connected_to_journal() /* || nd_log_journal_socket_available() */;
  718. if(with_journal)
  719. snprintfz(filename, FILENAME_MAX, "journal");
  720. else
  721. snprintfz(filename, FILENAME_MAX, "%s/daemon.log", netdata_configured_log_dir);
  722. nd_log_set_user_settings(NDLS_DAEMON, config_get(CONFIG_SECTION_LOGS, "daemon", filename));
  723. if(with_journal)
  724. snprintfz(filename, FILENAME_MAX, "journal");
  725. else
  726. snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir);
  727. nd_log_set_user_settings(NDLS_COLLECTORS, config_get(CONFIG_SECTION_LOGS, "collector", filename));
  728. snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir);
  729. nd_log_set_user_settings(NDLS_ACCESS, config_get(CONFIG_SECTION_LOGS, "access", filename));
  730. if(with_journal)
  731. snprintfz(filename, FILENAME_MAX, "journal");
  732. else
  733. snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir);
  734. nd_log_set_user_settings(NDLS_HEALTH, config_get(CONFIG_SECTION_LOGS, "health", filename));
  735. #ifdef ENABLE_ACLK
  736. aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO);
  737. if (aclklog_enabled) {
  738. snprintfz(filename, FILENAME_MAX, "%s/aclk.log", netdata_configured_log_dir);
  739. nd_log_set_user_settings(NDLS_ACLK, config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename));
  740. }
  741. #endif
  742. }
  743. char *initialize_lock_directory_path(char *prefix)
  744. {
  745. char filename[FILENAME_MAX + 1];
  746. snprintfz(filename, FILENAME_MAX, "%s/lock", prefix);
  747. return config_get(CONFIG_SECTION_DIRECTORIES, "lock", filename);
  748. }
  749. static void backwards_compatible_config() {
  750. // move [global] options to the [web] section
  751. config_move(CONFIG_SECTION_GLOBAL, "http port listen backlog",
  752. CONFIG_SECTION_WEB, "listen backlog");
  753. config_move(CONFIG_SECTION_GLOBAL, "bind socket to IP",
  754. CONFIG_SECTION_WEB, "bind to");
  755. config_move(CONFIG_SECTION_GLOBAL, "bind to",
  756. CONFIG_SECTION_WEB, "bind to");
  757. config_move(CONFIG_SECTION_GLOBAL, "port",
  758. CONFIG_SECTION_WEB, "default port");
  759. config_move(CONFIG_SECTION_GLOBAL, "default port",
  760. CONFIG_SECTION_WEB, "default port");
  761. config_move(CONFIG_SECTION_GLOBAL, "disconnect idle web clients after seconds",
  762. CONFIG_SECTION_WEB, "disconnect idle clients after seconds");
  763. config_move(CONFIG_SECTION_GLOBAL, "respect web browser do not track policy",
  764. CONFIG_SECTION_WEB, "respect do not track policy");
  765. config_move(CONFIG_SECTION_GLOBAL, "web x-frame-options header",
  766. CONFIG_SECTION_WEB, "x-frame-options response header");
  767. config_move(CONFIG_SECTION_GLOBAL, "enable web responses gzip compression",
  768. CONFIG_SECTION_WEB, "enable gzip compression");
  769. config_move(CONFIG_SECTION_GLOBAL, "web compression strategy",
  770. CONFIG_SECTION_WEB, "gzip compression strategy");
  771. config_move(CONFIG_SECTION_GLOBAL, "web compression level",
  772. CONFIG_SECTION_WEB, "gzip compression level");
  773. config_move(CONFIG_SECTION_GLOBAL, "config directory",
  774. CONFIG_SECTION_DIRECTORIES, "config");
  775. config_move(CONFIG_SECTION_GLOBAL, "stock config directory",
  776. CONFIG_SECTION_DIRECTORIES, "stock config");
  777. config_move(CONFIG_SECTION_GLOBAL, "log directory",
  778. CONFIG_SECTION_DIRECTORIES, "log");
  779. config_move(CONFIG_SECTION_GLOBAL, "web files directory",
  780. CONFIG_SECTION_DIRECTORIES, "web");
  781. config_move(CONFIG_SECTION_GLOBAL, "cache directory",
  782. CONFIG_SECTION_DIRECTORIES, "cache");
  783. config_move(CONFIG_SECTION_GLOBAL, "lib directory",
  784. CONFIG_SECTION_DIRECTORIES, "lib");
  785. config_move(CONFIG_SECTION_GLOBAL, "home directory",
  786. CONFIG_SECTION_DIRECTORIES, "home");
  787. config_move(CONFIG_SECTION_GLOBAL, "lock directory",
  788. CONFIG_SECTION_DIRECTORIES, "lock");
  789. config_move(CONFIG_SECTION_GLOBAL, "plugins directory",
  790. CONFIG_SECTION_DIRECTORIES, "plugins");
  791. config_move(CONFIG_SECTION_HEALTH, "health configuration directory",
  792. CONFIG_SECTION_DIRECTORIES, "health config");
  793. config_move(CONFIG_SECTION_HEALTH, "stock health configuration directory",
  794. CONFIG_SECTION_DIRECTORIES, "stock health config");
  795. config_move(CONFIG_SECTION_REGISTRY, "registry db directory",
  796. CONFIG_SECTION_DIRECTORIES, "registry");
  797. config_move(CONFIG_SECTION_GLOBAL, "debug log",
  798. CONFIG_SECTION_LOGS, "debug");
  799. config_move(CONFIG_SECTION_GLOBAL, "error log",
  800. CONFIG_SECTION_LOGS, "error");
  801. config_move(CONFIG_SECTION_GLOBAL, "access log",
  802. CONFIG_SECTION_LOGS, "access");
  803. config_move(CONFIG_SECTION_GLOBAL, "facility log",
  804. CONFIG_SECTION_LOGS, "facility");
  805. config_move(CONFIG_SECTION_GLOBAL, "errors flood protection period",
  806. CONFIG_SECTION_LOGS, "errors flood protection period");
  807. config_move(CONFIG_SECTION_GLOBAL, "errors to trigger flood protection",
  808. CONFIG_SECTION_LOGS, "errors to trigger flood protection");
  809. config_move(CONFIG_SECTION_GLOBAL, "debug flags",
  810. CONFIG_SECTION_LOGS, "debug flags");
  811. config_move(CONFIG_SECTION_GLOBAL, "TZ environment variable",
  812. CONFIG_SECTION_ENV_VARS, "TZ");
  813. config_move(CONFIG_SECTION_PLUGINS, "PATH environment variable",
  814. CONFIG_SECTION_ENV_VARS, "PATH");
  815. config_move(CONFIG_SECTION_PLUGINS, "PYTHONPATH environment variable",
  816. CONFIG_SECTION_ENV_VARS, "PYTHONPATH");
  817. config_move(CONFIG_SECTION_STATSD, "enabled",
  818. CONFIG_SECTION_PLUGINS, "statsd");
  819. config_move(CONFIG_SECTION_GLOBAL, "memory mode",
  820. CONFIG_SECTION_DB, "mode");
  821. config_move(CONFIG_SECTION_GLOBAL, "history",
  822. CONFIG_SECTION_DB, "retention");
  823. config_move(CONFIG_SECTION_GLOBAL, "update every",
  824. CONFIG_SECTION_DB, "update every");
  825. config_move(CONFIG_SECTION_GLOBAL, "page cache size",
  826. CONFIG_SECTION_DB, "dbengine page cache size MB");
  827. config_move(CONFIG_SECTION_DB, "page cache size",
  828. CONFIG_SECTION_DB, "dbengine page cache size MB");
  829. config_move(CONFIG_SECTION_GLOBAL, "page cache uses malloc",
  830. CONFIG_SECTION_DB, "dbengine page cache with malloc");
  831. config_move(CONFIG_SECTION_DB, "page cache with malloc",
  832. CONFIG_SECTION_DB, "dbengine page cache with malloc");
  833. config_move(CONFIG_SECTION_GLOBAL, "dbengine disk space",
  834. CONFIG_SECTION_DB, "dbengine disk space MB");
  835. config_move(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space",
  836. CONFIG_SECTION_DB, "dbengine multihost disk space MB");
  837. config_move(CONFIG_SECTION_GLOBAL, "memory deduplication (ksm)",
  838. CONFIG_SECTION_DB, "memory deduplication (ksm)");
  839. config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch timeout",
  840. CONFIG_SECTION_DB, "dbengine page fetch timeout secs");
  841. config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch retries",
  842. CONFIG_SECTION_DB, "dbengine page fetch retries");
  843. config_move(CONFIG_SECTION_GLOBAL, "dbengine extent pages",
  844. CONFIG_SECTION_DB, "dbengine pages per extent");
  845. config_move(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds",
  846. CONFIG_SECTION_DB, "cleanup obsolete charts after secs");
  847. config_move(CONFIG_SECTION_GLOBAL, "gap when lost iterations above",
  848. CONFIG_SECTION_DB, "gap when lost iterations above");
  849. config_move(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds",
  850. CONFIG_SECTION_DB, "cleanup orphan hosts after secs");
  851. config_move(CONFIG_SECTION_GLOBAL, "delete obsolete charts files",
  852. CONFIG_SECTION_DB, "delete obsolete charts files");
  853. config_move(CONFIG_SECTION_GLOBAL, "delete orphan hosts files",
  854. CONFIG_SECTION_DB, "delete orphan hosts files");
  855. config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics",
  856. CONFIG_SECTION_DB, "enable zero metrics");
  857. config_move(CONFIG_SECTION_LOGS, "error",
  858. CONFIG_SECTION_LOGS, "daemon");
  859. config_move(CONFIG_SECTION_LOGS, "severity level",
  860. CONFIG_SECTION_LOGS, "level");
  861. config_move(CONFIG_SECTION_LOGS, "errors to trigger flood protection",
  862. CONFIG_SECTION_LOGS, "logs to trigger flood protection");
  863. config_move(CONFIG_SECTION_LOGS, "errors flood protection period",
  864. CONFIG_SECTION_LOGS, "logs flood protection period");
  865. config_move(CONFIG_SECTION_HEALTH, "is ephemeral",
  866. CONFIG_SECTION_GLOBAL, "is ephemeral node");
  867. config_move(CONFIG_SECTION_HEALTH, "has unstable connection",
  868. CONFIG_SECTION_GLOBAL, "has unstable connection");
  869. }
  870. static int get_hostname(char *buf, size_t buf_size) {
  871. if (netdata_configured_host_prefix && *netdata_configured_host_prefix) {
  872. char filename[FILENAME_MAX + 1];
  873. snprintfz(filename, FILENAME_MAX, "%s/etc/hostname", netdata_configured_host_prefix);
  874. if (!read_file(filename, buf, buf_size)) {
  875. trim(buf);
  876. return 0;
  877. }
  878. }
  879. return gethostname(buf, buf_size);
  880. }
  881. static void get_netdata_configured_variables() {
  882. backwards_compatible_config();
  883. // ------------------------------------------------------------------------
  884. // get the hostname
  885. netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", "");
  886. verify_netdata_host_prefix(true);
  887. char buf[HOSTNAME_MAX + 1];
  888. if (get_hostname(buf, HOSTNAME_MAX))
  889. netdata_log_error("Cannot get machine hostname.");
  890. netdata_configured_hostname = config_get(CONFIG_SECTION_GLOBAL, "hostname", buf);
  891. netdata_log_debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname);
  892. // ------------------------------------------------------------------------
  893. // get default database update frequency
  894. default_rrd_update_every = (int) config_get_number(CONFIG_SECTION_DB, "update every", UPDATE_EVERY);
  895. if(default_rrd_update_every < 1 || default_rrd_update_every > 600) {
  896. netdata_log_error("Invalid data collection frequency (update every) %d given. Defaulting to %d.", default_rrd_update_every, UPDATE_EVERY);
  897. default_rrd_update_every = UPDATE_EVERY;
  898. config_set_number(CONFIG_SECTION_DB, "update every", default_rrd_update_every);
  899. }
  900. // ------------------------------------------------------------------------
  901. // get default memory mode for the database
  902. {
  903. const char *mode = config_get(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode));
  904. default_rrd_memory_mode = rrd_memory_mode_id(mode);
  905. if(strcmp(mode, rrd_memory_mode_name(default_rrd_memory_mode)) != 0) {
  906. netdata_log_error("Invalid memory mode '%s' given. Using '%s'", mode, rrd_memory_mode_name(default_rrd_memory_mode));
  907. config_set(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode));
  908. }
  909. }
  910. // ------------------------------------------------------------------------
  911. // get default database size
  912. if(default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && default_rrd_memory_mode != RRD_MEMORY_MODE_NONE) {
  913. default_rrd_history_entries = (int)config_get_number(
  914. CONFIG_SECTION_DB, "retention",
  915. align_entries_to_pagesize(default_rrd_memory_mode, RRD_DEFAULT_HISTORY_ENTRIES));
  916. long h = align_entries_to_pagesize(default_rrd_memory_mode, default_rrd_history_entries);
  917. if (h != default_rrd_history_entries) {
  918. config_set_number(CONFIG_SECTION_DB, "retention", h);
  919. default_rrd_history_entries = (int)h;
  920. }
  921. }
  922. // ------------------------------------------------------------------------
  923. // get system paths
  924. netdata_configured_user_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "config", netdata_configured_user_config_dir);
  925. netdata_configured_stock_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "stock config", netdata_configured_stock_config_dir);
  926. netdata_configured_log_dir = config_get(CONFIG_SECTION_DIRECTORIES, "log", netdata_configured_log_dir);
  927. netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir);
  928. netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir);
  929. netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir);
  930. netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir);
  931. {
  932. pluginsd_initialize_plugin_directories();
  933. netdata_configured_primary_plugins_dir = plugin_directories[PLUGINSD_STOCK_PLUGINS_DIRECTORY_PATH];
  934. }
  935. #ifdef ENABLE_DBENGINE
  936. // ------------------------------------------------------------------------
  937. // get default Database Engine page type
  938. const char *page_type = config_get(CONFIG_SECTION_DB, "dbengine page type", "raw");
  939. if (strcmp(page_type, "gorilla") == 0) {
  940. tier_page_type[0] = PAGE_GORILLA_METRICS;
  941. } else if (strcmp(page_type, "raw") != 0) {
  942. netdata_log_error("Invalid dbengine page type ''%s' given. Defaulting to 'raw'.", page_type);
  943. }
  944. // ------------------------------------------------------------------------
  945. // get default Database Engine page cache size in MiB
  946. default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
  947. default_rrdeng_extent_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine extent cache size MB", default_rrdeng_extent_cache_mb);
  948. db_engine_journal_check = config_get_boolean(CONFIG_SECTION_DB, "dbengine enable journal integrity check", CONFIG_BOOLEAN_NO);
  949. if(default_rrdeng_extent_cache_mb < 0)
  950. default_rrdeng_extent_cache_mb = 0;
  951. if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) {
  952. netdata_log_error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB);
  953. default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
  954. config_set_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
  955. }
  956. // ------------------------------------------------------------------------
  957. // get default Database Engine disk space quota in MiB
  958. default_rrdeng_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
  959. if(default_rrdeng_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
  960. netdata_log_error("Invalid dbengine disk space %d given. Defaulting to %d.", default_rrdeng_disk_quota_mb, RRDENG_MIN_DISK_SPACE_MB);
  961. default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB;
  962. config_set_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb);
  963. }
  964. default_multidb_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", compute_multidb_diskspace());
  965. if(default_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
  966. netdata_log_error("Invalid multidb disk space %d given. Defaulting to %d.", default_multidb_disk_quota_mb, default_rrdeng_disk_quota_mb);
  967. default_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb;
  968. config_set_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", default_multidb_disk_quota_mb);
  969. }
  970. #else
  971. if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
  972. error_report("RRD_MEMORY_MODE_DBENGINE is not supported in this platform. The agent will use db mode 'save' instead.");
  973. default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE;
  974. }
  975. #endif
  976. // --------------------------------------------------------------------
  977. // get KSM settings
  978. #ifdef MADV_MERGEABLE
  979. enable_ksm = config_get_boolean(CONFIG_SECTION_DB, "memory deduplication (ksm)", enable_ksm);
  980. #endif
  981. // --------------------------------------------------------------------
  982. // metric correlations
  983. enable_metric_correlations = config_get_boolean(CONFIG_SECTION_GLOBAL, "enable metric correlations", enable_metric_correlations);
  984. default_metric_correlations_method = weights_string_to_method(config_get(
  985. CONFIG_SECTION_GLOBAL, "metric correlations method",
  986. weights_method_to_string(default_metric_correlations_method)));
  987. // --------------------------------------------------------------------
  988. rrdset_free_obsolete_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
  989. rrdhost_free_ephemeral_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup ephemeral hosts after secs", rrdhost_free_ephemeral_time_s);
  990. // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short
  991. // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at
  992. // https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information.
  993. if (rrdset_free_obsolete_time_s < 10) {
  994. rrdset_free_obsolete_time_s = 10;
  995. netdata_log_info("The \"cleanup obsolete charts after seconds\" option was set to 10 seconds.");
  996. config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
  997. }
  998. gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above);
  999. if (gap_when_lost_iterations_above < 1) {
  1000. gap_when_lost_iterations_above = 1;
  1001. config_set_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above);
  1002. }
  1003. gap_when_lost_iterations_above += 2;
  1004. // --------------------------------------------------------------------
  1005. // get various system parameters
  1006. get_system_HZ();
  1007. get_system_cpus_uncached();
  1008. get_system_pid_max();
  1009. }
  1010. static void post_conf_load(char **user)
  1011. {
  1012. // --------------------------------------------------------------------
  1013. // get the user we should run
  1014. // IMPORTANT: this is required before web_files_uid()
  1015. if(getuid() == 0) {
  1016. *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", NETDATA_USER);
  1017. }
  1018. else {
  1019. struct passwd *passwd = getpwuid(getuid());
  1020. *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", (passwd && passwd->pw_name)?passwd->pw_name:"");
  1021. }
  1022. }
  1023. static bool load_netdata_conf(char *filename, char overwrite_used, char **user) {
  1024. errno = 0;
  1025. int ret = 0;
  1026. if(filename && *filename) {
  1027. ret = config_load(filename, overwrite_used, NULL);
  1028. if(!ret)
  1029. netdata_log_error("CONFIG: cannot load config file '%s'.", filename);
  1030. }
  1031. else {
  1032. filename = strdupz_path_subpath(netdata_configured_user_config_dir, "netdata.conf");
  1033. ret = config_load(filename, overwrite_used, NULL);
  1034. if(!ret) {
  1035. netdata_log_info("CONFIG: cannot load user config '%s'. Will try the stock version.", filename);
  1036. freez(filename);
  1037. filename = strdupz_path_subpath(netdata_configured_stock_config_dir, "netdata.conf");
  1038. ret = config_load(filename, overwrite_used, NULL);
  1039. if(!ret)
  1040. netdata_log_info("CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename);
  1041. }
  1042. freez(filename);
  1043. }
  1044. post_conf_load(user);
  1045. return ret;
  1046. }
  1047. // coverity[ +tainted_string_sanitize_content : arg-0 ]
  1048. static inline void coverity_remove_taint(char *s)
  1049. {
  1050. (void)s;
  1051. }
  1052. int get_system_info(struct rrdhost_system_info *system_info) {
  1053. char *script;
  1054. script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("system-info.sh") + 2));
  1055. sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "system-info.sh");
  1056. if (unlikely(access(script, R_OK) != 0)) {
  1057. netdata_log_error("System info script %s not found.",script);
  1058. freez(script);
  1059. return 1;
  1060. }
  1061. pid_t command_pid;
  1062. FILE *fp_child_input;
  1063. FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input);
  1064. if(fp_child_output) {
  1065. char line[200 + 1];
  1066. // Removed the double strlens, if the Coverity tainted string warning reappears I'll revert.
  1067. // One time init code, but I'm curious about the warning...
  1068. while (fgets(line, 200, fp_child_output) != NULL) {
  1069. char *value=line;
  1070. while (*value && *value != '=') value++;
  1071. if (*value=='=') {
  1072. *value='\0';
  1073. value++;
  1074. char *end = value;
  1075. while (*end && *end != '\n') end++;
  1076. *end = '\0'; // Overwrite newline if present
  1077. coverity_remove_taint(line); // I/O is controlled result of system_info.sh - not tainted
  1078. coverity_remove_taint(value);
  1079. if(unlikely(rrdhost_set_system_info_variable(system_info, line, value))) {
  1080. netdata_log_error("Unexpected environment variable %s=%s", line, value);
  1081. } else {
  1082. setenv(line, value, 1);
  1083. }
  1084. }
  1085. }
  1086. netdata_pclose(fp_child_input, fp_child_output, command_pid);
  1087. }
  1088. freez(script);
  1089. return 0;
  1090. }
  1091. void set_silencers_filename() {
  1092. char filename[FILENAME_MAX + 1];
  1093. snprintfz(filename, FILENAME_MAX, "%s/health.silencers.json", netdata_configured_varlib_dir);
  1094. silencers_filename = config_get(CONFIG_SECTION_HEALTH, "silencers file", filename);
  1095. }
  1096. /* Any config setting that can be accessed without a default value i.e. configget(...,...,NULL) *MUST*
  1097. be set in this procedure to be called in all the relevant code paths.
  1098. */
  1099. #define delta_startup_time(msg) \
  1100. { \
  1101. usec_t now_ut = now_monotonic_usec(); \
  1102. if(prev_msg) \
  1103. netdata_log_info("NETDATA STARTUP: in %7llu ms, %s - next: %s", (now_ut - last_ut) / USEC_PER_MS, prev_msg, msg); \
  1104. else \
  1105. netdata_log_info("NETDATA STARTUP: next: %s", msg); \
  1106. last_ut = now_ut; \
  1107. prev_msg = msg; \
  1108. }
  1109. int buffer_unittest(void);
  1110. int pgc_unittest(void);
  1111. int mrg_unittest(void);
  1112. int julytest(void);
  1113. int pluginsd_parser_unittest(void);
  1114. void replication_initialize(void);
  1115. void bearer_tokens_init(void);
  1116. int unittest_rrdpush_compressions(void);
  1117. int uuid_unittest(void);
  1118. int main(int argc, char **argv) {
  1119. // initialize the system clocks
  1120. clocks_init();
  1121. netdata_start_time = now_realtime_sec();
  1122. usec_t started_ut = now_monotonic_usec();
  1123. usec_t last_ut = started_ut;
  1124. const char *prev_msg = NULL;
  1125. int i;
  1126. int config_loaded = 0;
  1127. int dont_fork = 0;
  1128. bool close_open_fds = true;
  1129. size_t default_stacksize;
  1130. char *user = NULL;
  1131. static_threads = static_threads_get();
  1132. netdata_ready = false;
  1133. // set the name for logging
  1134. program_name = "netdata";
  1135. if (argc > 1 && strcmp(argv[1], SPAWN_SERVER_COMMAND_LINE_ARGUMENT) == 0) {
  1136. // don't run netdata, this is the spawn server
  1137. spawn_server();
  1138. exit(0);
  1139. }
  1140. // parse options
  1141. {
  1142. int num_opts = sizeof(option_definitions) / sizeof(struct option_def);
  1143. char optstring[(num_opts * 2) + 1];
  1144. int string_i = 0;
  1145. for( i = 0; i < num_opts; i++ ) {
  1146. optstring[string_i] = option_definitions[i].val;
  1147. string_i++;
  1148. if(option_definitions[i].arg_name) {
  1149. optstring[string_i] = ':';
  1150. string_i++;
  1151. }
  1152. }
  1153. // terminate optstring
  1154. optstring[string_i] ='\0';
  1155. optstring[(num_opts *2)] ='\0';
  1156. int opt;
  1157. while( (opt = getopt(argc, argv, optstring)) != -1 ) {
  1158. switch(opt) {
  1159. case 'c':
  1160. if(!load_netdata_conf(optarg, 1, &user)) {
  1161. netdata_log_error("Cannot load configuration file %s.", optarg);
  1162. return 1;
  1163. }
  1164. else {
  1165. netdata_log_debug(D_OPTIONS, "Configuration loaded from %s.", optarg);
  1166. load_cloud_conf(1);
  1167. config_loaded = 1;
  1168. }
  1169. break;
  1170. case 'D':
  1171. dont_fork = 1;
  1172. break;
  1173. case 'd':
  1174. dont_fork = 0;
  1175. break;
  1176. case 'h':
  1177. return help(0);
  1178. case 'i':
  1179. config_set(CONFIG_SECTION_WEB, "bind to", optarg);
  1180. break;
  1181. case 'P':
  1182. strncpy(pidfile, optarg, FILENAME_MAX);
  1183. pidfile[FILENAME_MAX] = '\0';
  1184. break;
  1185. case 'p':
  1186. config_set(CONFIG_SECTION_GLOBAL, "default port", optarg);
  1187. break;
  1188. case 's':
  1189. config_set(CONFIG_SECTION_GLOBAL, "host access prefix", optarg);
  1190. break;
  1191. case 't':
  1192. config_set(CONFIG_SECTION_GLOBAL, "update every", optarg);
  1193. break;
  1194. case 'u':
  1195. config_set(CONFIG_SECTION_GLOBAL, "run as user", optarg);
  1196. break;
  1197. case 'v':
  1198. case 'V':
  1199. printf("%s %s\n", program_name, program_version);
  1200. return 0;
  1201. case 'W':
  1202. {
  1203. char* stacksize_string = "stacksize=";
  1204. char* debug_flags_string = "debug_flags=";
  1205. char* claim_string = "claim";
  1206. #ifdef ENABLE_DBENGINE
  1207. char* createdataset_string = "createdataset=";
  1208. char* stresstest_string = "stresstest=";
  1209. if(strcmp(optarg, "pgd-tests") == 0) {
  1210. return pgd_test(argc, argv);
  1211. }
  1212. #endif
  1213. if(strcmp(optarg, "sqlite-meta-recover") == 0) {
  1214. sql_init_database(DB_CHECK_RECOVER, 0);
  1215. return 0;
  1216. }
  1217. if(strcmp(optarg, "sqlite-compact") == 0) {
  1218. sql_init_database(DB_CHECK_RECLAIM_SPACE, 0);
  1219. return 0;
  1220. }
  1221. if(strcmp(optarg, "sqlite-analyze") == 0) {
  1222. sql_init_database(DB_CHECK_ANALYZE, 0);
  1223. return 0;
  1224. }
  1225. if(strcmp(optarg, "unittest") == 0) {
  1226. unittest_running = true;
  1227. if (pluginsd_parser_unittest())
  1228. return 1;
  1229. if (unit_test_static_threads())
  1230. return 1;
  1231. if (unit_test_buffer())
  1232. return 1;
  1233. if (unit_test_str2ld())
  1234. return 1;
  1235. if (buffer_unittest())
  1236. return 1;
  1237. if (unit_test_bitmaps())
  1238. return 1;
  1239. // No call to load the config file on this code-path
  1240. post_conf_load(&user);
  1241. get_netdata_configured_variables();
  1242. default_rrd_update_every = 1;
  1243. default_rrd_memory_mode = RRD_MEMORY_MODE_RAM;
  1244. default_health_enabled = 0;
  1245. storage_tiers = 1;
  1246. registry_init();
  1247. if(rrd_init("unittest", NULL, true)) {
  1248. fprintf(stderr, "rrd_init failed for unittest\n");
  1249. return 1;
  1250. }
  1251. default_rrdpush_enabled = 0;
  1252. if(run_all_mockup_tests()) return 1;
  1253. if(unit_test_storage()) return 1;
  1254. #ifdef ENABLE_DBENGINE
  1255. if(test_dbengine()) return 1;
  1256. #endif
  1257. if(test_sqlite()) return 1;
  1258. if(string_unittest(10000)) return 1;
  1259. if (dictionary_unittest(10000))
  1260. return 1;
  1261. if(aral_unittest(10000))
  1262. return 1;
  1263. if (rrdlabels_unittest())
  1264. return 1;
  1265. if (ctx_unittest())
  1266. return 1;
  1267. if (uuid_unittest())
  1268. return 1;
  1269. fprintf(stderr, "\n\nALL TESTS PASSED\n\n");
  1270. return 0;
  1271. }
  1272. else if(strcmp(optarg, "escapetest") == 0) {
  1273. return command_argument_sanitization_tests();
  1274. }
  1275. else if(strcmp(optarg, "dicttest") == 0) {
  1276. unittest_running = true;
  1277. return dictionary_unittest(10000);
  1278. }
  1279. else if(strcmp(optarg, "araltest") == 0) {
  1280. unittest_running = true;
  1281. return aral_unittest(10000);
  1282. }
  1283. else if(strcmp(optarg, "stringtest") == 0) {
  1284. unittest_running = true;
  1285. return string_unittest(10000);
  1286. }
  1287. else if(strcmp(optarg, "rrdlabelstest") == 0) {
  1288. unittest_running = true;
  1289. return rrdlabels_unittest();
  1290. }
  1291. else if(strcmp(optarg, "buffertest") == 0) {
  1292. unittest_running = true;
  1293. return buffer_unittest();
  1294. }
  1295. else if(strcmp(optarg, "uuidtest") == 0) {
  1296. unittest_running = true;
  1297. return uuid_unittest();
  1298. }
  1299. #ifdef ENABLE_DBENGINE
  1300. else if(strcmp(optarg, "mctest") == 0) {
  1301. unittest_running = true;
  1302. return mc_unittest();
  1303. }
  1304. else if(strcmp(optarg, "ctxtest") == 0) {
  1305. unittest_running = true;
  1306. return ctx_unittest();
  1307. }
  1308. else if(strcmp(optarg, "metatest") == 0) {
  1309. unittest_running = true;
  1310. return metadata_unittest();
  1311. }
  1312. else if(strcmp(optarg, "pgctest") == 0) {
  1313. unittest_running = true;
  1314. return pgc_unittest();
  1315. }
  1316. else if(strcmp(optarg, "mrgtest") == 0) {
  1317. unittest_running = true;
  1318. return mrg_unittest();
  1319. }
  1320. else if(strcmp(optarg, "julytest") == 0) {
  1321. unittest_running = true;
  1322. return julytest();
  1323. }
  1324. else if(strcmp(optarg, "parsertest") == 0) {
  1325. unittest_running = true;
  1326. return pluginsd_parser_unittest();
  1327. }
  1328. else if(strcmp(optarg, "rrdpush_compressions_test") == 0) {
  1329. unittest_running = true;
  1330. return unittest_rrdpush_compressions();
  1331. }
  1332. else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) {
  1333. optarg += strlen(createdataset_string);
  1334. unsigned history_seconds = strtoul(optarg, NULL, 0);
  1335. post_conf_load(&user);
  1336. get_netdata_configured_variables();
  1337. default_rrd_update_every = 1;
  1338. registry_init();
  1339. if(rrd_init("dbengine-dataset", NULL, true)) {
  1340. fprintf(stderr, "rrd_init failed for unittest\n");
  1341. return 1;
  1342. }
  1343. generate_dbengine_dataset(history_seconds);
  1344. return 0;
  1345. }
  1346. else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) {
  1347. char *endptr;
  1348. unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0,
  1349. page_cache_mb = 0, disk_space_mb = 0, workers = 16;
  1350. optarg += strlen(stresstest_string);
  1351. test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0);
  1352. if (',' == *endptr)
  1353. dset_charts = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1354. if (',' == *endptr)
  1355. query_threads = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1356. if (',' == *endptr)
  1357. ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1358. if (',' == *endptr)
  1359. page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1360. if (',' == *endptr)
  1361. disk_space_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1362. if (',' == *endptr)
  1363. workers = (unsigned)strtoul(endptr + 1, &endptr, 0);
  1364. if (workers > 1024)
  1365. workers = 1024;
  1366. char workers_str[16];
  1367. snprintf(workers_str, 15, "%u", workers);
  1368. setenv("UV_THREADPOOL_SIZE", workers_str, 1);
  1369. dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds,
  1370. page_cache_mb, disk_space_mb);
  1371. return 0;
  1372. }
  1373. #endif
  1374. else if(strcmp(optarg, "simple-pattern") == 0) {
  1375. if(optind + 2 > argc) {
  1376. fprintf(stderr, "%s", "\nUSAGE: -W simple-pattern 'pattern' 'string'\n\n"
  1377. " Checks if 'pattern' matches the given 'string'.\n"
  1378. " - 'pattern' can be one or more space separated words.\n"
  1379. " - each 'word' can contain one or more asterisks.\n"
  1380. " - words starting with '!' give negative matches.\n"
  1381. " - words are processed left to right\n"
  1382. "\n"
  1383. "Examples:\n"
  1384. "\n"
  1385. " > match all veth interfaces, except veth0:\n"
  1386. "\n"
  1387. " -W simple-pattern '!veth0 veth*' 'veth12'\n"
  1388. "\n"
  1389. "\n"
  1390. " > match all *.ext files directly in /path/:\n"
  1391. " (this will not match *.ext files in a subdir of /path/)\n"
  1392. "\n"
  1393. " -W simple-pattern '!/path/*/*.ext /path/*.ext' '/path/test.ext'\n"
  1394. "\n"
  1395. );
  1396. return 1;
  1397. }
  1398. const char *haystack = argv[optind];
  1399. const char *needle = argv[optind + 1];
  1400. size_t len = strlen(needle) + 1;
  1401. char wildcarded[len];
  1402. SIMPLE_PATTERN *p = simple_pattern_create(haystack, NULL, SIMPLE_PATTERN_EXACT, true);
  1403. SIMPLE_PATTERN_RESULT ret = simple_pattern_matches_extract(p, needle, wildcarded, len);
  1404. simple_pattern_free(p);
  1405. if(ret == SP_MATCHED_POSITIVE) {
  1406. fprintf(stdout, "RESULT: POSITIVE MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded);
  1407. return 0;
  1408. }
  1409. else if(ret == SP_MATCHED_NEGATIVE) {
  1410. fprintf(stdout, "RESULT: NEGATIVE MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded);
  1411. return 0;
  1412. }
  1413. else {
  1414. fprintf(stdout, "RESULT: NOT MATCHED - pattern '%s' does not match '%s', wildcarded '%s'\n", haystack, needle, wildcarded);
  1415. return 1;
  1416. }
  1417. }
  1418. else if(strncmp(optarg, stacksize_string, strlen(stacksize_string)) == 0) {
  1419. optarg += strlen(stacksize_string);
  1420. config_set(CONFIG_SECTION_GLOBAL, "pthread stack size", optarg);
  1421. }
  1422. else if(strncmp(optarg, debug_flags_string, strlen(debug_flags_string)) == 0) {
  1423. optarg += strlen(debug_flags_string);
  1424. config_set(CONFIG_SECTION_LOGS, "debug flags", optarg);
  1425. debug_flags = strtoull(optarg, NULL, 0);
  1426. }
  1427. else if(strcmp(optarg, "set") == 0) {
  1428. if(optind + 3 > argc) {
  1429. fprintf(stderr, "%s", "\nUSAGE: -W set 'section' 'key' 'value'\n\n"
  1430. " Overwrites settings of netdata.conf.\n"
  1431. "\n"
  1432. " These options interact with: -c netdata.conf\n"
  1433. " If -c netdata.conf is given on the command line,\n"
  1434. " before -W set... the user may overwrite command\n"
  1435. " line parameters at netdata.conf\n"
  1436. " If -c netdata.conf is given after (or missing)\n"
  1437. " -W set... the user cannot overwrite the command line\n"
  1438. " parameters."
  1439. "\n"
  1440. );
  1441. return 1;
  1442. }
  1443. const char *section = argv[optind];
  1444. const char *key = argv[optind + 1];
  1445. const char *value = argv[optind + 2];
  1446. optind += 3;
  1447. // set this one as the default
  1448. // only if it is not already set in the config file
  1449. // so the caller can use -c netdata.conf before or
  1450. // after this parameter to prevent or allow overwriting
  1451. // variables at netdata.conf
  1452. config_set_default(section, key, value);
  1453. // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value);
  1454. }
  1455. else if(strcmp(optarg, "set2") == 0) {
  1456. if(optind + 4 > argc) {
  1457. fprintf(stderr, "%s", "\nUSAGE: -W set 'conf_file' 'section' 'key' 'value'\n\n"
  1458. " Overwrites settings of netdata.conf or cloud.conf\n"
  1459. "\n"
  1460. " These options interact with: -c netdata.conf\n"
  1461. " If -c netdata.conf is given on the command line,\n"
  1462. " before -W set... the user may overwrite command\n"
  1463. " line parameters at netdata.conf\n"
  1464. " If -c netdata.conf is given after (or missing)\n"
  1465. " -W set... the user cannot overwrite the command line\n"
  1466. " parameters."
  1467. " conf_file can be \"cloud\" or \"netdata\".\n"
  1468. "\n"
  1469. );
  1470. return 1;
  1471. }
  1472. const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */
  1473. struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config;
  1474. const char *section = argv[optind + 1];
  1475. const char *key = argv[optind + 2];
  1476. const char *value = argv[optind + 3];
  1477. optind += 4;
  1478. // set this one as the default
  1479. // only if it is not already set in the config file
  1480. // so the caller can use -c netdata.conf before or
  1481. // after this parameter to prevent or allow overwriting
  1482. // variables at netdata.conf
  1483. appconfig_set_default(tmp_config, section, key, value);
  1484. // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value);
  1485. }
  1486. else if(strcmp(optarg, "get") == 0) {
  1487. if(optind + 3 > argc) {
  1488. fprintf(stderr, "%s", "\nUSAGE: -W get 'section' 'key' 'value'\n\n"
  1489. " Prints settings of netdata.conf.\n"
  1490. "\n"
  1491. " These options interact with: -c netdata.conf\n"
  1492. " -c netdata.conf has to be given before -W get.\n"
  1493. "\n"
  1494. );
  1495. return 1;
  1496. }
  1497. if(!config_loaded) {
  1498. fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n");
  1499. load_netdata_conf(NULL, 0, &user);
  1500. }
  1501. get_netdata_configured_variables();
  1502. const char *section = argv[optind];
  1503. const char *key = argv[optind + 1];
  1504. const char *def = argv[optind + 2];
  1505. const char *value = config_get(section, key, def);
  1506. printf("%s\n", value);
  1507. return 0;
  1508. }
  1509. else if(strcmp(optarg, "get2") == 0) {
  1510. if(optind + 4 > argc) {
  1511. fprintf(stderr, "%s", "\nUSAGE: -W get2 'conf_file' 'section' 'key' 'value'\n\n"
  1512. " Prints settings of netdata.conf or cloud.conf\n"
  1513. "\n"
  1514. " These options interact with: -c netdata.conf\n"
  1515. " -c netdata.conf has to be given before -W get2.\n"
  1516. " conf_file can be \"cloud\" or \"netdata\".\n"
  1517. "\n"
  1518. );
  1519. return 1;
  1520. }
  1521. if(!config_loaded) {
  1522. fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n");
  1523. load_netdata_conf(NULL, 0, &user);
  1524. load_cloud_conf(1);
  1525. }
  1526. get_netdata_configured_variables();
  1527. const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */
  1528. struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config;
  1529. const char *section = argv[optind + 1];
  1530. const char *key = argv[optind + 2];
  1531. const char *def = argv[optind + 3];
  1532. const char *value = appconfig_get(tmp_config, section, key, def);
  1533. printf("%s\n", value);
  1534. return 0;
  1535. }
  1536. else if(strncmp(optarg, claim_string, strlen(claim_string)) == 0) {
  1537. /* will trigger a claiming attempt when the agent is initialized */
  1538. claiming_pending_arguments = optarg + strlen(claim_string);
  1539. }
  1540. else if(strcmp(optarg, "buildinfo") == 0) {
  1541. print_build_info();
  1542. return 0;
  1543. }
  1544. else if(strcmp(optarg, "buildinfojson") == 0) {
  1545. print_build_info_json();
  1546. return 0;
  1547. }
  1548. else if(strcmp(optarg, "keepopenfds") == 0) {
  1549. // Internal dev option to skip closing inherited
  1550. // open FDs. Useful, when we want to run the agent
  1551. // under profiling tools that open/maintain their
  1552. // own FDs.
  1553. close_open_fds = false;
  1554. } else {
  1555. fprintf(stderr, "Unknown -W parameter '%s'\n", optarg);
  1556. return help(1);
  1557. }
  1558. }
  1559. break;
  1560. default: /* ? */
  1561. fprintf(stderr, "Unknown parameter '%c'\n", opt);
  1562. return help(1);
  1563. }
  1564. }
  1565. }
  1566. if (close_open_fds == true) {
  1567. // close all open file descriptors, except the standard ones
  1568. // the caller may have left open files (lxc-attach has this issue)
  1569. for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR);
  1570. }
  1571. if(!config_loaded) {
  1572. load_netdata_conf(NULL, 0, &user);
  1573. load_cloud_conf(0);
  1574. }
  1575. // ------------------------------------------------------------------------
  1576. // initialize netdata
  1577. {
  1578. char *pmax = config_get(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for plugins", "1");
  1579. if(pmax && *pmax)
  1580. setenv("MALLOC_ARENA_MAX", pmax, 1);
  1581. #if defined(HAVE_C_MALLOPT)
  1582. i = (int)config_get_number(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for netdata", 1);
  1583. if(i > 0)
  1584. mallopt(M_ARENA_MAX, 1);
  1585. #ifdef NETDATA_INTERNAL_CHECKS
  1586. mallopt(M_PERTURB, 0x5A);
  1587. // mallopt(M_MXFAST, 0);
  1588. #endif
  1589. #endif
  1590. // set libuv worker threads
  1591. libuv_worker_threads = (int)get_netdata_cpus() * 6;
  1592. if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS)
  1593. libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  1594. if(libuv_worker_threads > MAX_LIBUV_WORKER_THREADS)
  1595. libuv_worker_threads = MAX_LIBUV_WORKER_THREADS;
  1596. libuv_worker_threads = config_get_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads);
  1597. if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS) {
  1598. libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
  1599. config_set_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads);
  1600. }
  1601. {
  1602. char buf[20 + 1];
  1603. snprintfz(buf, sizeof(buf) - 1, "%d", libuv_worker_threads);
  1604. setenv("UV_THREADPOOL_SIZE", buf, 1);
  1605. }
  1606. // prepare configuration environment variables for the plugins
  1607. get_netdata_configured_variables();
  1608. set_global_environment();
  1609. // work while we are cd into config_dir
  1610. // to allow the plugins refer to their config
  1611. // files using relative filenames
  1612. if(chdir(netdata_configured_user_config_dir) == -1)
  1613. fatal("Cannot cd to '%s'", netdata_configured_user_config_dir);
  1614. // Get execution path before switching user to avoid permission issues
  1615. get_netdata_execution_path();
  1616. }
  1617. {
  1618. // --------------------------------------------------------------------
  1619. // get the debugging flags from the configuration file
  1620. char *flags = config_get(CONFIG_SECTION_LOGS, "debug flags", "0x0000000000000000");
  1621. setenv("NETDATA_DEBUG_FLAGS", flags, 1);
  1622. debug_flags = strtoull(flags, NULL, 0);
  1623. netdata_log_debug(D_OPTIONS, "Debug flags set to '0x%" PRIX64 "'.", debug_flags);
  1624. if(debug_flags != 0) {
  1625. struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
  1626. if(setrlimit(RLIMIT_CORE, &rl) != 0)
  1627. netdata_log_error("Cannot request unlimited core dumps for debugging... Proceeding anyway...");
  1628. #ifdef HAVE_SYS_PRCTL_H
  1629. prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
  1630. #endif
  1631. }
  1632. // --------------------------------------------------------------------
  1633. // get log filenames and settings
  1634. log_init();
  1635. nd_log_limits_unlimited();
  1636. // initialize the log files
  1637. nd_log_initialize();
  1638. netdata_log_info("Netdata agent version \""VERSION"\" is starting");
  1639. ieee754_doubles = is_system_ieee754_double();
  1640. if(!ieee754_doubles)
  1641. globally_disabled_capabilities |= STREAM_CAP_IEEE754;
  1642. aral_judy_init();
  1643. get_system_timezone();
  1644. bearer_tokens_init();
  1645. replication_initialize();
  1646. rrd_functions_inflight_init();
  1647. // --------------------------------------------------------------------
  1648. // get the certificate and start security
  1649. #ifdef ENABLE_HTTPS
  1650. security_init();
  1651. #endif
  1652. // --------------------------------------------------------------------
  1653. // This is the safest place to start the SILENCERS structure
  1654. set_silencers_filename();
  1655. health_initialize_global_silencers();
  1656. // // --------------------------------------------------------------------
  1657. // // Initialize ML configuration
  1658. //
  1659. // delta_startup_time("initialize ML");
  1660. // ml_init();
  1661. // --------------------------------------------------------------------
  1662. // setup process signals
  1663. // block signals while initializing threads.
  1664. // this causes the threads to block signals.
  1665. delta_startup_time("initialize signals");
  1666. signals_block();
  1667. signals_init(); // setup the signals we want to use
  1668. // --------------------------------------------------------------------
  1669. // check which threads are enabled and initialize them
  1670. delta_startup_time("initialize static threads");
  1671. // setup threads configs
  1672. default_stacksize = netdata_threads_init();
  1673. #ifdef NETDATA_INTERNAL_CHECKS
  1674. config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring", true);
  1675. config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", true);
  1676. #endif
  1677. if(config_get_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", false))
  1678. // this has to run before starting any other threads that use workers
  1679. workers_utilization_enable();
  1680. for (i = 0; static_threads[i].name != NULL ; i++) {
  1681. struct netdata_static_thread *st = &static_threads[i];
  1682. if(st->config_name)
  1683. st->enabled = config_get_boolean(st->config_section, st->config_name, st->enabled);
  1684. if(st->enabled && st->init_routine)
  1685. st->init_routine();
  1686. if(st->env_name)
  1687. setenv(st->env_name, st->enabled?"YES":"NO", 1);
  1688. if(st->global_variable)
  1689. *st->global_variable = (st->enabled) ? true : false;
  1690. }
  1691. // --------------------------------------------------------------------
  1692. // create the listening sockets
  1693. delta_startup_time("initialize web server");
  1694. web_client_api_v1_init();
  1695. web_server_threading_selection();
  1696. if(web_server_mode != WEB_SERVER_MODE_NONE) {
  1697. if (!api_listen_sockets_setup()) {
  1698. netdata_log_error("Cannot setup listen port(s). Is Netdata already running?");
  1699. exit(1);
  1700. }
  1701. }
  1702. // --------------------------------------------------------------------
  1703. // Initialize ML configuration
  1704. delta_startup_time("initialize ML");
  1705. ml_init();
  1706. #ifdef ENABLE_H2O
  1707. delta_startup_time("initialize h2o server");
  1708. for (int i = 0; static_threads[i].name; i++) {
  1709. if (static_threads[i].start_routine == h2o_main)
  1710. static_threads[i].enabled = httpd_is_enabled();
  1711. }
  1712. #endif
  1713. }
  1714. delta_startup_time("set resource limits");
  1715. #ifdef NETDATA_INTERNAL_CHECKS
  1716. if(debug_flags != 0) {
  1717. struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
  1718. if(setrlimit(RLIMIT_CORE, &rl) != 0)
  1719. netdata_log_error("Cannot request unlimited core dumps for debugging... Proceeding anyway...");
  1720. #ifdef HAVE_SYS_PRCTL_H
  1721. prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
  1722. #endif
  1723. }
  1724. #endif /* NETDATA_INTERNAL_CHECKS */
  1725. set_nofile_limit(&rlimit_nofile);
  1726. delta_startup_time("become daemon");
  1727. // fork, switch user, create pid file, set process priority
  1728. if(become_daemon(dont_fork, user) == -1)
  1729. fatal("Cannot daemonize myself.");
  1730. // The "HOME" env var points to the root's home dir because Netdata starts as root. Can't use "HOME".
  1731. struct passwd *pw = getpwuid(getuid());
  1732. if (config_exists(CONFIG_SECTION_DIRECTORIES, "home") || !pw || !pw->pw_dir) {
  1733. netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", netdata_configured_home_dir);
  1734. } else {
  1735. netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", pw->pw_dir);
  1736. }
  1737. setenv("HOME", netdata_configured_home_dir, 1);
  1738. dyn_conf_init();
  1739. netdata_log_info("netdata started on pid %d.", getpid());
  1740. delta_startup_time("initialize threads after fork");
  1741. netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize));
  1742. // initialize internal registry
  1743. delta_startup_time("initialize registry");
  1744. registry_init();
  1745. // fork the spawn server
  1746. delta_startup_time("fork the spawn server");
  1747. spawn_init();
  1748. /*
  1749. * Libuv uv_spawn() uses SIGCHLD internally:
  1750. * https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485
  1751. * and inadvertently replaces the netdata signal handler which was setup during initialization.
  1752. * Thusly, we must explicitly restore the signal handler for SIGCHLD.
  1753. * Warning: extreme care is needed when mixing and matching POSIX and libuv.
  1754. */
  1755. signals_restore_SIGCHLD();
  1756. // ------------------------------------------------------------------------
  1757. // initialize rrd, registry, health, rrdpush, etc.
  1758. delta_startup_time("collecting system info");
  1759. netdata_anonymous_statistics_enabled=-1;
  1760. struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info));
  1761. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED);
  1762. get_system_info(system_info);
  1763. (void) registry_get_this_machine_guid();
  1764. system_info->hops = 0;
  1765. get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist);
  1766. delta_startup_time("initialize RRD structures");
  1767. if(rrd_init(netdata_configured_hostname, system_info, false)) {
  1768. set_late_global_environment(system_info);
  1769. fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname);
  1770. }
  1771. delta_startup_time("check for incomplete shutdown");
  1772. char agent_crash_file[FILENAME_MAX + 1];
  1773. char agent_incomplete_shutdown_file[FILENAME_MAX + 1];
  1774. snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir);
  1775. int incomplete_shutdown_detected = (unlink(agent_incomplete_shutdown_file) == 0);
  1776. snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir);
  1777. int crash_detected = (unlink(agent_crash_file) == 0);
  1778. int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC, 444);
  1779. if (fd >= 0)
  1780. close(fd);
  1781. // ------------------------------------------------------------------------
  1782. // Claim netdata agent to a cloud endpoint
  1783. delta_startup_time("collect claiming info");
  1784. if (claiming_pending_arguments)
  1785. claim_agent(claiming_pending_arguments, false, NULL);
  1786. load_claiming_state();
  1787. // ------------------------------------------------------------------------
  1788. // enable log flood protection
  1789. nd_log_limits_reset();
  1790. // Load host labels
  1791. delta_startup_time("collect host labels");
  1792. reload_host_labels();
  1793. // ------------------------------------------------------------------------
  1794. // spawn the threads
  1795. delta_startup_time("start the static threads");
  1796. web_server_config_options();
  1797. netdata_zero_metrics_enabled = config_get_boolean_ondemand(CONFIG_SECTION_DB, "enable zero metrics", CONFIG_BOOLEAN_NO);
  1798. set_late_global_environment(system_info);
  1799. for (i = 0; static_threads[i].name != NULL ; i++) {
  1800. struct netdata_static_thread *st = &static_threads[i];
  1801. if(st->enabled) {
  1802. st->thread = mallocz(sizeof(netdata_thread_t));
  1803. netdata_log_debug(D_SYSTEM, "Starting thread %s.", st->name);
  1804. netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st);
  1805. }
  1806. else
  1807. netdata_log_debug(D_SYSTEM, "Not starting thread %s.", st->name);
  1808. }
  1809. ml_start_threads();
  1810. // ------------------------------------------------------------------------
  1811. // Initialize netdata agent command serving from cli and signals
  1812. delta_startup_time("initialize commands API");
  1813. commands_init();
  1814. delta_startup_time("ready");
  1815. usec_t ready_ut = now_monotonic_usec();
  1816. netdata_log_info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS);
  1817. netdata_ready = true;
  1818. send_statistics("START", "-", "-");
  1819. if (crash_detected)
  1820. send_statistics("CRASH", "-", "-");
  1821. if (incomplete_shutdown_detected)
  1822. send_statistics("INCOMPLETE_SHUTDOWN", "-", "-");
  1823. //check if ANALYTICS needs to start
  1824. if (netdata_anonymous_statistics_enabled == 1) {
  1825. for (i = 0; static_threads[i].name != NULL; i++) {
  1826. if (!strncmp(static_threads[i].name, "ANALYTICS", 9)) {
  1827. struct netdata_static_thread *st = &static_threads[i];
  1828. st->thread = mallocz(sizeof(netdata_thread_t));
  1829. st->enabled = 1;
  1830. netdata_log_debug(D_SYSTEM, "Starting thread %s.", st->name);
  1831. netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st);
  1832. }
  1833. }
  1834. }
  1835. // ------------------------------------------------------------------------
  1836. // Report ACLK build failure
  1837. #ifndef ENABLE_ACLK
  1838. netdata_log_error("This agent doesn't have ACLK.");
  1839. char filename[FILENAME_MAX + 1];
  1840. snprintfz(filename, FILENAME_MAX, "%s/.aclk_report_sent", netdata_configured_varlib_dir);
  1841. if (netdata_anonymous_statistics_enabled > 0 && access(filename, F_OK)) { // -1 -> not initialized
  1842. send_statistics("ACLK_DISABLED", "-", "-");
  1843. int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 444);
  1844. if (fd == -1)
  1845. netdata_log_error("Cannot create file '%s'. Please fix this.", filename);
  1846. else
  1847. close(fd);
  1848. }
  1849. #endif
  1850. // ------------------------------------------------------------------------
  1851. // initialize WebRTC
  1852. webrtc_initialize();
  1853. // ------------------------------------------------------------------------
  1854. // unblock signals
  1855. signals_unblock();
  1856. // ------------------------------------------------------------------------
  1857. // Handle signals
  1858. signals_handle();
  1859. // should never reach this point
  1860. // but we need it for rpmlint #2752
  1861. return 1;
  1862. }