rrdpush.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrdpush.h"
  3. #include "parser/parser.h"
  4. /*
  5. * rrdpush
  6. *
  7. * 3 threads are involved for all stream operations
  8. *
  9. * 1. a random data collection thread, calling rrdset_done_push()
  10. * this is called for each chart.
  11. *
  12. * the output of this work is kept in a BUFFER in RRDHOST
  13. * the sender thread is signalled via a pipe (also in RRDHOST)
  14. *
  15. * 2. a sender thread running at the sending netdata
  16. * this is spawned automatically on the first chart to be pushed
  17. *
  18. * It tries to push the metrics to the remote netdata, as fast
  19. * as possible (i.e. immediately after they are collected).
  20. *
  21. * 3. a receiver thread, running at the receiving netdata
  22. * this is spawned automatically when the sender connects to
  23. * the receiver.
  24. *
  25. */
  26. struct config stream_config = {
  27. .first_section = NULL,
  28. .last_section = NULL,
  29. .mutex = NETDATA_MUTEX_INITIALIZER,
  30. .index = {
  31. .avl_tree = {
  32. .root = NULL,
  33. .compar = appconfig_section_compare
  34. },
  35. .rwlock = AVL_LOCK_INITIALIZER
  36. }
  37. };
  38. unsigned int default_rrdpush_enabled = 0;
  39. #ifdef ENABLE_COMPRESSION
  40. unsigned int default_compression_enabled = 1;
  41. #endif
  42. char *default_rrdpush_destination = NULL;
  43. char *default_rrdpush_api_key = NULL;
  44. char *default_rrdpush_send_charts_matching = NULL;
  45. #ifdef ENABLE_HTTPS
  46. int netdata_use_ssl_on_stream = NETDATA_SSL_OPTIONAL;
  47. char *netdata_ssl_ca_path = NULL;
  48. char *netdata_ssl_ca_file = NULL;
  49. #endif
  50. static void load_stream_conf() {
  51. errno = 0;
  52. char *filename = strdupz_path_subpath(netdata_configured_user_config_dir, "stream.conf");
  53. if(!appconfig_load(&stream_config, filename, 0, NULL)) {
  54. info("CONFIG: cannot load user config '%s'. Will try stock config.", filename);
  55. freez(filename);
  56. filename = strdupz_path_subpath(netdata_configured_stock_config_dir, "stream.conf");
  57. if(!appconfig_load(&stream_config, filename, 0, NULL))
  58. info("CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename);
  59. }
  60. freez(filename);
  61. }
  62. int rrdpush_init() {
  63. // --------------------------------------------------------------------
  64. // load stream.conf
  65. load_stream_conf();
  66. default_rrdpush_enabled = (unsigned int)appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, "enabled", default_rrdpush_enabled);
  67. default_rrdpush_destination = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "destination", "");
  68. default_rrdpush_api_key = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "api key", "");
  69. default_rrdpush_send_charts_matching = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "send charts matching", "*");
  70. rrdhost_free_orphan_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds", rrdhost_free_orphan_time);
  71. #ifdef ENABLE_COMPRESSION
  72. default_compression_enabled = (unsigned int)appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM,
  73. "enable compression", default_compression_enabled);
  74. #endif
  75. if(default_rrdpush_enabled && (!default_rrdpush_destination || !*default_rrdpush_destination || !default_rrdpush_api_key || !*default_rrdpush_api_key)) {
  76. error("STREAM [send]: cannot enable sending thread - information is missing.");
  77. default_rrdpush_enabled = 0;
  78. }
  79. #ifdef ENABLE_HTTPS
  80. if (netdata_use_ssl_on_stream == NETDATA_SSL_OPTIONAL) {
  81. if (default_rrdpush_destination){
  82. char *test = strstr(default_rrdpush_destination,":SSL");
  83. if(test){
  84. *test = 0X00;
  85. netdata_use_ssl_on_stream = NETDATA_SSL_FORCE;
  86. }
  87. }
  88. }
  89. char *invalid_certificate = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "ssl skip certificate verification", "no");
  90. if ( !strcmp(invalid_certificate,"yes")){
  91. if (netdata_validate_server == NETDATA_SSL_VALID_CERTIFICATE){
  92. info("Netdata is configured to accept invalid SSL certificate.");
  93. netdata_validate_server = NETDATA_SSL_INVALID_CERTIFICATE;
  94. }
  95. }
  96. netdata_ssl_ca_path = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CApath", "/etc/ssl/certs/");
  97. netdata_ssl_ca_file = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CAfile", "/etc/ssl/certs/certs.pem");
  98. #endif
  99. return default_rrdpush_enabled;
  100. }
  101. // data collection happens from multiple threads
  102. // each of these threads calls rrdset_done()
  103. // which in turn calls rrdset_done_push()
  104. // which uses this pipe to notify the streaming thread
  105. // that there are more data ready to be sent
  106. #define PIPE_READ 0
  107. #define PIPE_WRITE 1
  108. // to have the remote netdata re-sync the charts
  109. // to its current clock, we send for this many
  110. // iterations a BEGIN line without microseconds
  111. // this is for the first iterations of each chart
  112. unsigned int remote_clock_resync_iterations = 60;
  113. static inline int should_send_chart_matching(RRDSET *st) {
  114. // Do not stream anomaly rates charts.
  115. if (unlikely(st->state->is_ar_chart))
  116. return false;
  117. if (rrdset_flag_check(st, RRDSET_FLAG_ANOMALY_DETECTION))
  118. return ml_streaming_enabled();
  119. if(!rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_SEND|RRDSET_FLAG_UPSTREAM_IGNORE)) {
  120. RRDHOST *host = st->rrdhost;
  121. if(simple_pattern_matches(host->rrdpush_send_charts_matching, st->id) ||
  122. simple_pattern_matches(host->rrdpush_send_charts_matching, st->name)) {
  123. rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_IGNORE);
  124. rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND);
  125. }
  126. else {
  127. rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND);
  128. rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE);
  129. }
  130. }
  131. return(rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_SEND));
  132. }
  133. int configured_as_parent() {
  134. struct section *section = NULL;
  135. int is_parent = 0;
  136. appconfig_wrlock(&stream_config);
  137. for (section = stream_config.first_section; section; section = section->next) {
  138. uuid_t uuid;
  139. if (uuid_parse(section->name, uuid) != -1 &&
  140. appconfig_get_boolean_by_section(section, "enabled", 0)) {
  141. is_parent = 1;
  142. break;
  143. }
  144. }
  145. appconfig_unlock(&stream_config);
  146. return is_parent;
  147. }
  148. // checks if the current chart definition has been sent
  149. static inline int need_to_send_chart_definition(RRDSET *st) {
  150. rrdset_check_rdlock(st);
  151. if(unlikely(!(rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_EXPOSED))))
  152. return 1;
  153. RRDDIM *rd;
  154. rrddim_foreach_read(rd, st) {
  155. if(unlikely(!rd->exposed)) {
  156. #ifdef NETDATA_INTERNAL_CHECKS
  157. info("host '%s', chart '%s', dimension '%s' flag 'exposed' triggered chart refresh to upstream", st->rrdhost->hostname, st->id, rd->id);
  158. #endif
  159. return 1;
  160. }
  161. }
  162. return 0;
  163. }
  164. // chart labels
  165. void rrdpush_send_clabels(RRDHOST *host, RRDSET *st) {
  166. struct label_index *labels_c = &st->state->labels;
  167. if (labels_c) {
  168. netdata_rwlock_rdlock(&host->labels.labels_rwlock);
  169. struct label *lbl = labels_c->head;
  170. while(lbl) {
  171. buffer_sprintf(host->sender->build,
  172. "CLABEL \"%s\" \"%s\" %d\n", lbl->key, lbl->value, (int)lbl->label_source);
  173. lbl = lbl->next;
  174. }
  175. if (labels_c->head)
  176. buffer_sprintf(host->sender->build,"CLABEL_COMMIT\n");
  177. netdata_rwlock_unlock(&host->labels.labels_rwlock);
  178. }
  179. }
  180. // Send the current chart definition.
  181. // Assumes that collector thread has already called sender_start for mutex / buffer state.
  182. static inline void rrdpush_send_chart_definition_nolock(RRDSET *st) {
  183. RRDHOST *host = st->rrdhost;
  184. rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_EXPOSED);
  185. // properly set the name for the remote end to parse it
  186. char *name = "";
  187. if(likely(st->name)) {
  188. if(unlikely(strcmp(st->id, st->name))) {
  189. // they differ
  190. name = strchr(st->name, '.');
  191. if(name)
  192. name++;
  193. else
  194. name = "";
  195. }
  196. }
  197. // send the chart
  198. buffer_sprintf(
  199. host->sender->build
  200. , "CHART \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" %ld %d \"%s %s %s %s\" \"%s\" \"%s\"\n"
  201. , st->id
  202. , name
  203. , st->title
  204. , st->units
  205. , st->family
  206. , st->context
  207. , rrdset_type_name(st->chart_type)
  208. , st->priority
  209. , st->update_every
  210. , rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)?"obsolete":""
  211. , rrdset_flag_check(st, RRDSET_FLAG_DETAIL)?"detail":""
  212. , rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)?"store_first":""
  213. , rrdset_flag_check(st, RRDSET_FLAG_HIDDEN)?"hidden":""
  214. , (st->plugin_name)?st->plugin_name:""
  215. , (st->module_name)?st->module_name:""
  216. );
  217. // send the chart labels
  218. if (host->sender->version >= STREAM_VERSION_CLABELS)
  219. rrdpush_send_clabels(host, st);
  220. // send the dimensions
  221. RRDDIM *rd;
  222. rrddim_foreach_read(rd, st) {
  223. buffer_sprintf(
  224. host->sender->build
  225. , "DIMENSION \"%s\" \"%s\" \"%s\" " COLLECTED_NUMBER_FORMAT " " COLLECTED_NUMBER_FORMAT " \"%s %s %s\"\n"
  226. , rd->id
  227. , rd->name
  228. , rrd_algorithm_name(rd->algorithm)
  229. , rd->multiplier
  230. , rd->divisor
  231. , rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)?"obsolete":""
  232. , rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN)?"hidden":""
  233. , rrddim_flag_check(rd, RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS)?"noreset":""
  234. );
  235. rd->exposed = 1;
  236. }
  237. // send the chart local custom variables
  238. RRDSETVAR *rs;
  239. for(rs = st->variables; rs ;rs = rs->next) {
  240. if(unlikely(rs->type == RRDVAR_TYPE_CALCULATED && rs->options & RRDVAR_OPTION_CUSTOM_CHART_VAR)) {
  241. calculated_number *value = (calculated_number *) rs->value;
  242. buffer_sprintf(
  243. host->sender->build
  244. , "VARIABLE CHART %s = " CALCULATED_NUMBER_FORMAT "\n"
  245. , rs->variable
  246. , *value
  247. );
  248. }
  249. }
  250. st->upstream_resync_time = st->last_collected_time.tv_sec + (remote_clock_resync_iterations * st->update_every);
  251. }
  252. // sends the current chart dimensions
  253. static inline void rrdpush_send_chart_metrics_nolock(RRDSET *st, struct sender_state *s) {
  254. RRDHOST *host = st->rrdhost;
  255. buffer_sprintf(host->sender->build, "BEGIN \"%s\" %llu", st->id, (st->last_collected_time.tv_sec > st->upstream_resync_time)?st->usec_since_last_update:0);
  256. if (s->version >= VERSION_GAP_FILLING)
  257. buffer_sprintf(host->sender->build, " %"PRId64"\n", (int64_t)st->last_collected_time.tv_sec);
  258. else
  259. buffer_strcat(host->sender->build, "\n");
  260. RRDDIM *rd;
  261. rrddim_foreach_read(rd, st) {
  262. if(rd->updated && rd->exposed)
  263. buffer_sprintf(host->sender->build
  264. , "SET \"%s\" = " COLLECTED_NUMBER_FORMAT "\n"
  265. , rd->id
  266. , rd->collected_value
  267. );
  268. }
  269. buffer_strcat(host->sender->build, "END\n");
  270. }
  271. static void rrdpush_sender_thread_spawn(RRDHOST *host);
  272. // Called from the internal collectors to mark a chart obsolete.
  273. void rrdset_push_chart_definition_now(RRDSET *st) {
  274. RRDHOST *host = st->rrdhost;
  275. if(unlikely(!host->rrdpush_send_enabled || !should_send_chart_matching(st)))
  276. return;
  277. rrdset_rdlock(st);
  278. sender_start(host->sender);
  279. rrdpush_send_chart_definition_nolock(st);
  280. sender_commit(host->sender);
  281. rrdset_unlock(st);
  282. }
  283. void rrdset_done_push(RRDSET *st) {
  284. if(unlikely(!should_send_chart_matching(st)))
  285. return;
  286. RRDHOST *host = st->rrdhost;
  287. if(unlikely(host->rrdpush_send_enabled && !host->rrdpush_sender_spawn))
  288. rrdpush_sender_thread_spawn(host);
  289. // Handle non-connected case
  290. if(unlikely(!host->rrdpush_sender_connected)) {
  291. if(unlikely(!host->rrdpush_sender_error_shown))
  292. error("STREAM %s [send]: not ready - discarding collected metrics.", host->hostname);
  293. host->rrdpush_sender_error_shown = 1;
  294. return;
  295. }
  296. else if(unlikely(host->rrdpush_sender_error_shown)) {
  297. info("STREAM %s [send]: sending metrics...", host->hostname);
  298. host->rrdpush_sender_error_shown = 0;
  299. }
  300. sender_start(host->sender);
  301. if(need_to_send_chart_definition(st))
  302. rrdpush_send_chart_definition_nolock(st);
  303. rrdpush_send_chart_metrics_nolock(st, host->sender);
  304. // signal the sender there are more data
  305. if(host->rrdpush_sender_pipe[PIPE_WRITE] != -1 && write(host->rrdpush_sender_pipe[PIPE_WRITE], " ", 1) == -1)
  306. error("STREAM %s [send]: cannot write to internal pipe", host->hostname);
  307. sender_commit(host->sender);
  308. }
  309. // labels
  310. void rrdpush_send_labels(RRDHOST *host) {
  311. if (!host->labels.head || !(host->labels.labels_flag & LABEL_FLAG_UPDATE_STREAM) || (host->labels.labels_flag & LABEL_FLAG_STOP_STREAM))
  312. return;
  313. sender_start(host->sender);
  314. rrdhost_rdlock(host);
  315. netdata_rwlock_rdlock(&host->labels.labels_rwlock);
  316. struct label *label_i = host->labels.head;
  317. while(label_i) {
  318. buffer_sprintf(host->sender->build
  319. , "LABEL \"%s\" = %d %s\n"
  320. , label_i->key
  321. , (int)label_i->label_source
  322. , label_i->value);
  323. label_i = label_i->next;
  324. }
  325. buffer_sprintf(host->sender->build
  326. , "OVERWRITE %s\n", "labels");
  327. netdata_rwlock_unlock(&host->labels.labels_rwlock);
  328. rrdhost_unlock(host);
  329. sender_commit(host->sender);
  330. if(host->rrdpush_sender_pipe[PIPE_WRITE] != -1 && write(host->rrdpush_sender_pipe[PIPE_WRITE], " ", 1) == -1)
  331. error("STREAM %s [send]: cannot write to internal pipe", host->hostname);
  332. host->labels.labels_flag &= ~LABEL_FLAG_UPDATE_STREAM;
  333. }
  334. void rrdpush_claimed_id(RRDHOST *host)
  335. {
  336. if(unlikely(!host->rrdpush_send_enabled || !host->rrdpush_sender_connected))
  337. return;
  338. if(host->sender->version < STREAM_VERSION_CLAIM)
  339. return;
  340. sender_start(host->sender);
  341. rrdhost_aclk_state_lock(host);
  342. buffer_sprintf(host->sender->build, "CLAIMED_ID %s %s\n", host->machine_guid, (host->aclk_state.claimed_id ? host->aclk_state.claimed_id : "NULL") );
  343. rrdhost_aclk_state_unlock(host);
  344. sender_commit(host->sender);
  345. // signal the sender there are more data
  346. if(host->rrdpush_sender_pipe[PIPE_WRITE] != -1 && write(host->rrdpush_sender_pipe[PIPE_WRITE], " ", 1) == -1)
  347. error("STREAM %s [send]: cannot write to internal pipe", host->hostname);
  348. }
  349. int connect_to_one_of_destinations(
  350. struct rrdpush_destinations *destinations,
  351. int default_port,
  352. struct timeval *timeout,
  353. size_t *reconnects_counter,
  354. char *connected_to,
  355. size_t connected_to_size,
  356. struct rrdpush_destinations **destination)
  357. {
  358. int sock = -1;
  359. for (struct rrdpush_destinations *d = destinations; d; d = d->next) {
  360. if (d->disabled_no_proper_reply) {
  361. d->disabled_no_proper_reply = 0;
  362. continue;
  363. } else if (d->disabled_because_of_localhost) {
  364. continue;
  365. } else if (d->disabled_already_streaming && (d->disabled_already_streaming + 30 > now_realtime_sec())) {
  366. continue;
  367. } else if (d->disabled_because_of_denied_access) {
  368. d->disabled_because_of_denied_access = 0;
  369. continue;
  370. }
  371. if (reconnects_counter)
  372. *reconnects_counter += 1;
  373. sock = connect_to_this(d->destination, default_port, timeout);
  374. if (sock != -1) {
  375. if (connected_to && connected_to_size) {
  376. strncpy(connected_to, d->destination, connected_to_size);
  377. connected_to[connected_to_size - 1] = '\0';
  378. }
  379. *destination = d;
  380. break;
  381. }
  382. }
  383. return sock;
  384. }
  385. struct rrdpush_destinations *destinations_init(const char *dests) {
  386. const char *s = dests;
  387. struct rrdpush_destinations *destinations = NULL, *prev = NULL;
  388. while(*s) {
  389. const char *e = s;
  390. // skip path, moving both s(tart) and e(nd)
  391. if(*e == '/')
  392. while(!isspace(*e) && *e != ',') s = ++e;
  393. // skip separators, moving both s(tart) and e(nd)
  394. while(isspace(*e) || *e == ',') s = ++e;
  395. // move e(nd) to the first separator
  396. while(*e && !isspace(*e) && *e != ',' && *e != '/') e++;
  397. // is there anything?
  398. if(!*s || s == e) break;
  399. char buf[e - s + 1];
  400. strncpyz(buf, s, e - s);
  401. struct rrdpush_destinations *d = callocz(1, sizeof(struct rrdpush_destinations));
  402. strncpyz(d->destination, buf, sizeof(d->destination)-1);
  403. d->disabled_no_proper_reply = 0;
  404. d->disabled_because_of_localhost = 0;
  405. d->disabled_already_streaming = 0;
  406. d->disabled_because_of_denied_access = 0;
  407. d->next = NULL;
  408. if (!destinations) {
  409. destinations = d;
  410. } else {
  411. prev->next = d;
  412. }
  413. prev = d;
  414. s = e;
  415. }
  416. return destinations;
  417. }
  418. // ----------------------------------------------------------------------------
  419. // rrdpush sender thread
  420. // Either the receiver lost the connection or the host is being destroyed.
  421. // The sender mutex guards thread creation, any spurious data is wiped on reconnection.
  422. void rrdpush_sender_thread_stop(RRDHOST *host) {
  423. netdata_mutex_lock(&host->sender->mutex);
  424. netdata_thread_t thr = 0;
  425. if(host->rrdpush_sender_spawn) {
  426. info("STREAM %s [send]: signaling sending thread to stop...", host->hostname);
  427. // signal the thread that we want to join it
  428. host->rrdpush_sender_join = 1;
  429. // copy the thread id, so that we will be waiting for the right one
  430. // even if a new one has been spawn
  431. thr = host->rrdpush_sender_thread;
  432. // signal it to cancel
  433. netdata_thread_cancel(host->rrdpush_sender_thread);
  434. }
  435. netdata_mutex_unlock(&host->sender->mutex);
  436. if(thr != 0) {
  437. info("STREAM %s [send]: waiting for the sending thread to stop...", host->hostname);
  438. void *result;
  439. netdata_thread_join(thr, &result);
  440. info("STREAM %s [send]: sending thread has exited.", host->hostname);
  441. }
  442. }
  443. // ----------------------------------------------------------------------------
  444. // rrdpush receiver thread
  445. void log_stream_connection(const char *client_ip, const char *client_port, const char *api_key, const char *machine_guid, const char *host, const char *msg) {
  446. log_access("STREAM: %d '[%s]:%s' '%s' host '%s' api key '%s' machine guid '%s'", gettid(), client_ip, client_port, msg, host, api_key, machine_guid);
  447. }
  448. static void rrdpush_sender_thread_spawn(RRDHOST *host) {
  449. netdata_mutex_lock(&host->sender->mutex);
  450. if(!host->rrdpush_sender_spawn) {
  451. char tag[NETDATA_THREAD_TAG_MAX + 1];
  452. snprintfz(tag, NETDATA_THREAD_TAG_MAX, "STREAM_SENDER[%s]", host->hostname);
  453. if(netdata_thread_create(&host->rrdpush_sender_thread, tag, NETDATA_THREAD_OPTION_JOINABLE, rrdpush_sender_thread, (void *) host->sender))
  454. error("STREAM %s [send]: failed to create new thread for client.", host->hostname);
  455. else
  456. host->rrdpush_sender_spawn = 1;
  457. }
  458. netdata_mutex_unlock(&host->sender->mutex);
  459. }
  460. int rrdpush_receiver_permission_denied(struct web_client *w) {
  461. // we always respond with the same message and error code
  462. // to prevent an attacker from gaining info about the error
  463. buffer_flush(w->response.data);
  464. buffer_sprintf(w->response.data, "You are not permitted to access this. Check the logs for more info.");
  465. return 401;
  466. }
  467. int rrdpush_receiver_too_busy_now(struct web_client *w) {
  468. // we always respond with the same message and error code
  469. // to prevent an attacker from gaining info about the error
  470. buffer_flush(w->response.data);
  471. buffer_sprintf(w->response.data, "The server is too busy now to accept this request. Try later.");
  472. return 503;
  473. }
  474. void *rrdpush_receiver_thread(void *ptr);
  475. int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) {
  476. info("clients wants to STREAM metrics.");
  477. char *key = NULL, *hostname = NULL, *registry_hostname = NULL, *machine_guid = NULL, *os = "unknown", *timezone = "unknown", *abbrev_timezone = "UTC", *tags = NULL;
  478. int32_t utc_offset = 0;
  479. int update_every = default_rrd_update_every;
  480. uint32_t stream_version = UINT_MAX;
  481. char buf[GUID_LEN + 1];
  482. struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info));
  483. system_info->hops = 1;
  484. while(url) {
  485. char *value = mystrsep(&url, "&");
  486. if(!value || !*value) continue;
  487. char *name = mystrsep(&value, "=");
  488. if(!name || !*name) continue;
  489. if(!value || !*value) continue;
  490. if(!strcmp(name, "key"))
  491. key = value;
  492. else if(!strcmp(name, "hostname"))
  493. hostname = value;
  494. else if(!strcmp(name, "registry_hostname"))
  495. registry_hostname = value;
  496. else if(!strcmp(name, "machine_guid"))
  497. machine_guid = value;
  498. else if(!strcmp(name, "update_every"))
  499. update_every = (int)strtoul(value, NULL, 0);
  500. else if(!strcmp(name, "os"))
  501. os = value;
  502. else if(!strcmp(name, "timezone"))
  503. timezone = value;
  504. else if(!strcmp(name, "abbrev_timezone"))
  505. abbrev_timezone = value;
  506. else if(!strcmp(name, "utc_offset"))
  507. utc_offset = (int32_t)strtol(value, NULL, 0);
  508. else if(!strcmp(name, "hops"))
  509. system_info->hops = (uint16_t) strtoul(value, NULL, 0);
  510. else if(!strcmp(name, "ml_capable"))
  511. system_info->ml_capable = strtoul(value, NULL, 0);
  512. else if(!strcmp(name, "ml_enabled"))
  513. system_info->ml_enabled = strtoul(value, NULL, 0);
  514. else if(!strcmp(name, "mc_version"))
  515. system_info->mc_version = strtoul(value, NULL, 0);
  516. else if(!strcmp(name, "tags"))
  517. tags = value;
  518. else if(!strcmp(name, "ver"))
  519. stream_version = MIN((uint32_t) strtoul(value, NULL, 0), STREAMING_PROTOCOL_CURRENT_VERSION);
  520. else {
  521. // An old Netdata child does not have a compatible streaming protocol, map to something sane.
  522. if (!strcmp(name, "NETDATA_SYSTEM_OS_NAME"))
  523. name = "NETDATA_HOST_OS_NAME";
  524. else if (!strcmp(name, "NETDATA_SYSTEM_OS_ID"))
  525. name = "NETDATA_HOST_OS_ID";
  526. else if (!strcmp(name, "NETDATA_SYSTEM_OS_ID_LIKE"))
  527. name = "NETDATA_HOST_OS_ID_LIKE";
  528. else if (!strcmp(name, "NETDATA_SYSTEM_OS_VERSION"))
  529. name = "NETDATA_HOST_OS_VERSION";
  530. else if (!strcmp(name, "NETDATA_SYSTEM_OS_VERSION_ID"))
  531. name = "NETDATA_HOST_OS_VERSION_ID";
  532. else if (!strcmp(name, "NETDATA_SYSTEM_OS_DETECTION"))
  533. name = "NETDATA_HOST_OS_DETECTION";
  534. else if(!strcmp(name, "NETDATA_PROTOCOL_VERSION") && stream_version == UINT_MAX) {
  535. stream_version = 1;
  536. }
  537. if (unlikely(rrdhost_set_system_info_variable(system_info, name, value))) {
  538. info("STREAM [receive from [%s]:%s]: request has parameter '%s' = '%s', which is not used.",
  539. w->client_ip, w->client_port, name, value);
  540. }
  541. }
  542. }
  543. if (stream_version == UINT_MAX)
  544. stream_version = 0;
  545. if(!key || !*key) {
  546. rrdhost_system_info_free(system_info);
  547. log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname)?hostname:"-", "ACCESS DENIED - NO KEY");
  548. error("STREAM [receive from [%s]:%s]: request without an API key. Forbidding access.", w->client_ip, w->client_port);
  549. return rrdpush_receiver_permission_denied(w);
  550. }
  551. if(!hostname || !*hostname) {
  552. rrdhost_system_info_free(system_info);
  553. log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname)?hostname:"-", "ACCESS DENIED - NO HOSTNAME");
  554. error("STREAM [receive from [%s]:%s]: request without a hostname. Forbidding access.", w->client_ip, w->client_port);
  555. return rrdpush_receiver_permission_denied(w);
  556. }
  557. if(!machine_guid || !*machine_guid) {
  558. rrdhost_system_info_free(system_info);
  559. log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname)?hostname:"-", "ACCESS DENIED - NO MACHINE GUID");
  560. error("STREAM [receive from [%s]:%s]: request without a machine GUID. Forbidding access.", w->client_ip, w->client_port);
  561. return rrdpush_receiver_permission_denied(w);
  562. }
  563. if(regenerate_guid(key, buf) == -1) {
  564. rrdhost_system_info_free(system_info);
  565. log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname)?hostname:"-", "ACCESS DENIED - INVALID KEY");
  566. error("STREAM [receive from [%s]:%s]: API key '%s' is not valid GUID (use the command uuidgen to generate one). Forbidding access.", w->client_ip, w->client_port, key);
  567. return rrdpush_receiver_permission_denied(w);
  568. }
  569. if(regenerate_guid(machine_guid, buf) == -1) {
  570. rrdhost_system_info_free(system_info);
  571. log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname)?hostname:"-", "ACCESS DENIED - INVALID MACHINE GUID");
  572. error("STREAM [receive from [%s]:%s]: machine GUID '%s' is not GUID. Forbidding access.", w->client_ip, w->client_port, machine_guid);
  573. return rrdpush_receiver_permission_denied(w);
  574. }
  575. if(!appconfig_get_boolean(&stream_config, key, "enabled", 0)) {
  576. rrdhost_system_info_free(system_info);
  577. log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname)?hostname:"-", "ACCESS DENIED - KEY NOT ENABLED");
  578. error("STREAM [receive from [%s]:%s]: API key '%s' is not allowed. Forbidding access.", w->client_ip, w->client_port, key);
  579. return rrdpush_receiver_permission_denied(w);
  580. }
  581. {
  582. SIMPLE_PATTERN *key_allow_from = simple_pattern_create(appconfig_get(&stream_config, key, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT);
  583. if(key_allow_from) {
  584. if(!simple_pattern_matches(key_allow_from, w->client_ip)) {
  585. simple_pattern_free(key_allow_from);
  586. rrdhost_system_info_free(system_info);
  587. log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname) ? hostname : "-", "ACCESS DENIED - KEY NOT ALLOWED FROM THIS IP");
  588. error("STREAM [receive from [%s]:%s]: API key '%s' is not permitted from this IP. Forbidding access.", w->client_ip, w->client_port, key);
  589. return rrdpush_receiver_permission_denied(w);
  590. }
  591. simple_pattern_free(key_allow_from);
  592. }
  593. }
  594. if(!appconfig_get_boolean(&stream_config, machine_guid, "enabled", 1)) {
  595. rrdhost_system_info_free(system_info);
  596. log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname)?hostname:"-", "ACCESS DENIED - MACHINE GUID NOT ENABLED");
  597. error("STREAM [receive from [%s]:%s]: machine GUID '%s' is not allowed. Forbidding access.", w->client_ip, w->client_port, machine_guid);
  598. return rrdpush_receiver_permission_denied(w);
  599. }
  600. {
  601. SIMPLE_PATTERN *machine_allow_from = simple_pattern_create(appconfig_get(&stream_config, machine_guid, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT);
  602. if(machine_allow_from) {
  603. if(!simple_pattern_matches(machine_allow_from, w->client_ip)) {
  604. simple_pattern_free(machine_allow_from);
  605. rrdhost_system_info_free(system_info);
  606. log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname) ? hostname : "-", "ACCESS DENIED - MACHINE GUID NOT ALLOWED FROM THIS IP");
  607. error("STREAM [receive from [%s]:%s]: Machine GUID '%s' is not permitted from this IP. Forbidding access.", w->client_ip, w->client_port, machine_guid);
  608. return rrdpush_receiver_permission_denied(w);
  609. }
  610. simple_pattern_free(machine_allow_from);
  611. }
  612. }
  613. if(unlikely(web_client_streaming_rate_t > 0)) {
  614. static netdata_mutex_t stream_rate_mutex = NETDATA_MUTEX_INITIALIZER;
  615. static volatile time_t last_stream_accepted_t = 0;
  616. netdata_mutex_lock(&stream_rate_mutex);
  617. time_t now = now_realtime_sec();
  618. if(unlikely(last_stream_accepted_t == 0))
  619. last_stream_accepted_t = now;
  620. if(now - last_stream_accepted_t < web_client_streaming_rate_t) {
  621. netdata_mutex_unlock(&stream_rate_mutex);
  622. rrdhost_system_info_free(system_info);
  623. error("STREAM [receive from [%s]:%s]: too busy to accept new streaming request. Will be allowed in %ld secs.", w->client_ip, w->client_port, (long)(web_client_streaming_rate_t - (now - last_stream_accepted_t)));
  624. return rrdpush_receiver_too_busy_now(w);
  625. }
  626. last_stream_accepted_t = now;
  627. netdata_mutex_unlock(&stream_rate_mutex);
  628. }
  629. /*
  630. * Quick path for rejecting multiple connections. The lock taken is fine-grained - it only protects the receiver
  631. * pointer within the host (if a host exists). This protects against multiple concurrent web requests hitting
  632. * separate threads within the web-server and landing here. The lock guards the thread-shutdown sequence that
  633. * detaches the receiver from the host. If the host is being created (first time-access) then we also use the
  634. * lock to prevent race-hazard (two threads try to create the host concurrently, one wins and the other does a
  635. * lookup to the now-attached structure).
  636. */
  637. struct receiver_state *rpt = callocz(1, sizeof(*rpt));
  638. rrd_rdlock();
  639. RRDHOST *host = rrdhost_find_by_guid(machine_guid, 0);
  640. if (unlikely(host && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) /* Ignore archived hosts. */
  641. host = NULL;
  642. if (host) {
  643. rrdhost_wrlock(host);
  644. netdata_mutex_lock(&host->receiver_lock);
  645. rrdhost_flag_clear(host, RRDHOST_FLAG_ORPHAN);
  646. host->senders_disconnected_time = 0;
  647. if (host->receiver != NULL) {
  648. time_t age = now_realtime_sec() - host->receiver->last_msg_t;
  649. if (age > 30) {
  650. host->receiver->shutdown = 1;
  651. shutdown(host->receiver->fd, SHUT_RDWR);
  652. host->receiver = NULL; // Thread holds reference to structure
  653. info(
  654. "STREAM %s [receive from [%s]:%s]: multiple connections for same host detected - "
  655. "existing connection is dead (%"PRId64" sec), accepting new connection.",
  656. host->hostname,
  657. w->client_ip,
  658. w->client_port,
  659. (int64_t)age);
  660. }
  661. else {
  662. netdata_mutex_unlock(&host->receiver_lock);
  663. rrdhost_unlock(host);
  664. rrd_unlock();
  665. log_stream_connection(w->client_ip, w->client_port, key, host->machine_guid, host->hostname,
  666. "REJECTED - ALREADY CONNECTED");
  667. info(
  668. "STREAM %s [receive from [%s]:%s]: multiple connections for same host detected - "
  669. "existing connection is active (within last %"PRId64" sec), rejecting new connection.",
  670. host->hostname,
  671. w->client_ip,
  672. w->client_port,
  673. (int64_t)age);
  674. // Have not set WEB_CLIENT_FLAG_DONT_CLOSE_SOCKET - caller should clean up
  675. buffer_flush(w->response.data);
  676. buffer_strcat(w->response.data, "This GUID is already streaming to this server");
  677. freez(rpt);
  678. return 409;
  679. }
  680. }
  681. host->receiver = rpt;
  682. netdata_mutex_unlock(&host->receiver_lock);
  683. rrdhost_unlock(host);
  684. }
  685. rrd_unlock();
  686. rpt->last_msg_t = now_realtime_sec();
  687. rpt->host = host;
  688. rpt->fd = w->ifd;
  689. rpt->key = strdupz(key);
  690. rpt->hostname = strdupz(hostname);
  691. rpt->registry_hostname = strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname);
  692. rpt->machine_guid = strdupz(machine_guid);
  693. rpt->os = strdupz(os);
  694. rpt->timezone = strdupz(timezone);
  695. rpt->abbrev_timezone = strdupz(abbrev_timezone);
  696. rpt->utc_offset = utc_offset;
  697. rpt->tags = (tags)?strdupz(tags):NULL;
  698. rpt->client_ip = strdupz(w->client_ip);
  699. rpt->client_port = strdupz(w->client_port);
  700. rpt->update_every = update_every;
  701. rpt->system_info = system_info;
  702. rpt->stream_version = stream_version;
  703. #ifdef ENABLE_HTTPS
  704. rpt->ssl.conn = w->ssl.conn;
  705. rpt->ssl.flags = w->ssl.flags;
  706. w->ssl.conn = NULL;
  707. w->ssl.flags = NETDATA_SSL_START;
  708. #endif
  709. if(w->user_agent && w->user_agent[0]) {
  710. char *t = strchr(w->user_agent, '/');
  711. if(t && *t) {
  712. *t = '\0';
  713. t++;
  714. }
  715. rpt->program_name = strdupz(w->user_agent);
  716. if(t && *t) rpt->program_version = strdupz(t);
  717. }
  718. debug(D_SYSTEM, "starting STREAM receive thread.");
  719. char tag[FILENAME_MAX + 1];
  720. snprintfz(tag, FILENAME_MAX, "STREAM_RECEIVER[%s,[%s]:%s]", rpt->hostname, w->client_ip, w->client_port);
  721. if(netdata_thread_create(&rpt->thread, tag, NETDATA_THREAD_OPTION_DEFAULT, rrdpush_receiver_thread, (void *)rpt))
  722. error("Failed to create new STREAM receive thread for client.");
  723. // prevent the caller from closing the streaming socket
  724. if(web_server_mode == WEB_SERVER_MODE_STATIC_THREADED) {
  725. web_client_flag_set(w, WEB_CLIENT_FLAG_DONT_CLOSE_SOCKET);
  726. }
  727. else {
  728. if(w->ifd == w->ofd)
  729. w->ifd = w->ofd = -1;
  730. else
  731. w->ifd = -1;
  732. }
  733. buffer_flush(w->response.data);
  734. return 200;
  735. }