receiver.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrdpush.h"
  3. extern struct config stream_config;
  4. void receiver_state_free(struct receiver_state *rpt) {
  5. freez(rpt->key);
  6. freez(rpt->hostname);
  7. freez(rpt->registry_hostname);
  8. freez(rpt->machine_guid);
  9. freez(rpt->os);
  10. freez(rpt->timezone);
  11. freez(rpt->abbrev_timezone);
  12. freez(rpt->tags);
  13. freez(rpt->client_ip);
  14. freez(rpt->client_port);
  15. freez(rpt->program_name);
  16. freez(rpt->program_version);
  17. #ifdef ENABLE_HTTPS
  18. netdata_ssl_close(&rpt->ssl);
  19. #endif
  20. if(rpt->fd != -1) {
  21. internal_error(true, "closing socket...");
  22. close(rpt->fd);
  23. }
  24. #ifdef ENABLE_RRDPUSH_COMPRESSION
  25. rrdpush_decompressor_destroy(&rpt->decompressor);
  26. #endif
  27. if(rpt->system_info)
  28. rrdhost_system_info_free(rpt->system_info);
  29. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_receivers, sizeof(*rpt), __ATOMIC_RELAXED);
  30. freez(rpt);
  31. }
  32. #include "collectors/plugins.d/pluginsd_parser.h"
  33. // IMPORTANT: to add workers, you have to edit WORKER_PARSER_FIRST_JOB accordingly
  34. #define WORKER_RECEIVER_JOB_BYTES_READ (WORKER_PARSER_FIRST_JOB - 1)
  35. #define WORKER_RECEIVER_JOB_BYTES_UNCOMPRESSED (WORKER_PARSER_FIRST_JOB - 2)
  36. // this has to be the same at parser.h
  37. #define WORKER_RECEIVER_JOB_REPLICATION_COMPLETION (WORKER_PARSER_FIRST_JOB - 3)
  38. #if WORKER_PARSER_FIRST_JOB < 1
  39. #error The define WORKER_PARSER_FIRST_JOB needs to be at least 1
  40. #endif
  41. static inline int read_stream(struct receiver_state *r, char* buffer, size_t size) {
  42. if(unlikely(!size)) {
  43. internal_error(true, "%s() asked to read zero bytes", __FUNCTION__);
  44. return 0;
  45. }
  46. int tries = 100;
  47. ssize_t bytes_read;
  48. do {
  49. errno = 0;
  50. #ifdef ENABLE_HTTPS
  51. if (SSL_connection(&r->ssl))
  52. bytes_read = netdata_ssl_read(&r->ssl, buffer, size);
  53. else
  54. bytes_read = read(r->fd, buffer, size);
  55. #else
  56. bytes_read = read(r->fd, buffer, size);
  57. #endif
  58. } while(bytes_read < 0 && errno == EINTR && tries--);
  59. if((bytes_read == 0 || bytes_read == -1) && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINPROGRESS)) {
  60. netdata_log_error("STREAM: %s(): timeout while waiting for data on socket!", __FUNCTION__);
  61. bytes_read = -3;
  62. }
  63. else if (bytes_read == 0) {
  64. netdata_log_error("STREAM: %s(): EOF while reading data from socket!", __FUNCTION__);
  65. bytes_read = -1;
  66. }
  67. else if (bytes_read < 0) {
  68. netdata_log_error("STREAM: %s() failed to read from socket!", __FUNCTION__);
  69. bytes_read = -2;
  70. }
  71. return (int)bytes_read;
  72. }
  73. static inline bool receiver_read_uncompressed(struct receiver_state *r) {
  74. #ifdef NETDATA_INTERNAL_CHECKS
  75. if(r->reader.read_buffer[r->reader.read_len] != '\0')
  76. fatal("%s(): read_buffer does not start with zero", __FUNCTION__ );
  77. #endif
  78. int bytes_read = read_stream(r, r->reader.read_buffer + r->reader.read_len, sizeof(r->reader.read_buffer) - r->reader.read_len - 1);
  79. if(unlikely(bytes_read <= 0))
  80. return false;
  81. worker_set_metric(WORKER_RECEIVER_JOB_BYTES_READ, (NETDATA_DOUBLE)bytes_read);
  82. worker_set_metric(WORKER_RECEIVER_JOB_BYTES_UNCOMPRESSED, (NETDATA_DOUBLE)bytes_read);
  83. r->reader.read_len += bytes_read;
  84. r->reader.read_buffer[r->reader.read_len] = '\0';
  85. return true;
  86. }
  87. #ifdef ENABLE_RRDPUSH_COMPRESSION
  88. static inline bool receiver_read_compressed(struct receiver_state *r) {
  89. internal_fatal(r->reader.read_buffer[r->reader.read_len] != '\0',
  90. "%s: read_buffer does not start with zero #2", __FUNCTION__ );
  91. // first use any available uncompressed data
  92. if (likely(rrdpush_decompressed_bytes_in_buffer(&r->decompressor))) {
  93. size_t available = sizeof(r->reader.read_buffer) - r->reader.read_len - 1;
  94. if (likely(available)) {
  95. size_t len = rrdpush_decompressor_get(&r->decompressor, r->reader.read_buffer + r->reader.read_len, available);
  96. if (unlikely(!len)) {
  97. internal_error(true, "decompressor returned zero length #1");
  98. return false;
  99. }
  100. r->reader.read_len += (int)len;
  101. r->reader.read_buffer[r->reader.read_len] = '\0';
  102. }
  103. else
  104. internal_fatal(true, "The line to read is too big! Already have %zd bytes in read_buffer.", r->reader.read_len);
  105. return true;
  106. }
  107. // no decompressed data available
  108. // read the compression signature of the next block
  109. if(unlikely(r->reader.read_len + r->decompressor.signature_size > sizeof(r->reader.read_buffer) - 1)) {
  110. internal_error(true, "The last incomplete line does not leave enough room for the next compression header! "
  111. "Already have %zd bytes in read_buffer.", r->reader.read_len);
  112. return false;
  113. }
  114. // read the compression signature from the stream
  115. // we have to do a loop here, because read_stream() may return less than the data we need
  116. int bytes_read = 0;
  117. do {
  118. int ret = read_stream(r, r->reader.read_buffer + r->reader.read_len + bytes_read, r->decompressor.signature_size - bytes_read);
  119. if (unlikely(ret <= 0))
  120. return false;
  121. bytes_read += ret;
  122. } while(unlikely(bytes_read < (int)r->decompressor.signature_size));
  123. worker_set_metric(WORKER_RECEIVER_JOB_BYTES_READ, (NETDATA_DOUBLE)bytes_read);
  124. if(unlikely(bytes_read != (int)r->decompressor.signature_size))
  125. fatal("read %d bytes, but expected compression signature of size %zu", bytes_read, r->decompressor.signature_size);
  126. size_t compressed_message_size = rrdpush_decompressor_start(&r->decompressor, r->reader.read_buffer + r->reader.read_len, bytes_read);
  127. if (unlikely(!compressed_message_size)) {
  128. internal_error(true, "multiplexed uncompressed data in compressed stream!");
  129. r->reader.read_len += bytes_read;
  130. r->reader.read_buffer[r->reader.read_len] = '\0';
  131. return true;
  132. }
  133. if(unlikely(compressed_message_size > COMPRESSION_MAX_MSG_SIZE)) {
  134. netdata_log_error("received a compressed message of %zu bytes, which is bigger than the max compressed message size supported of %zu. Ignoring message.",
  135. compressed_message_size, (size_t)COMPRESSION_MAX_MSG_SIZE);
  136. return false;
  137. }
  138. // delete compression header from our read buffer
  139. r->reader.read_buffer[r->reader.read_len] = '\0';
  140. // Read the entire compressed block of compressed data
  141. char compressed[compressed_message_size];
  142. size_t compressed_bytes_read = 0;
  143. do {
  144. size_t start = compressed_bytes_read;
  145. size_t remaining = compressed_message_size - start;
  146. int last_read_bytes = read_stream(r, &compressed[start], remaining);
  147. if (unlikely(last_read_bytes <= 0)) {
  148. internal_error(true, "read_stream() failed #2, with code %d", last_read_bytes);
  149. return false;
  150. }
  151. compressed_bytes_read += last_read_bytes;
  152. } while(unlikely(compressed_message_size > compressed_bytes_read));
  153. worker_set_metric(WORKER_RECEIVER_JOB_BYTES_READ, (NETDATA_DOUBLE)compressed_bytes_read);
  154. // decompress the compressed block
  155. size_t bytes_to_parse = rrdpush_decompress(&r->decompressor, compressed, compressed_bytes_read);
  156. if (unlikely(!bytes_to_parse)) {
  157. internal_error(true, "no bytes to parse.");
  158. return false;
  159. }
  160. worker_set_metric(WORKER_RECEIVER_JOB_BYTES_UNCOMPRESSED, (NETDATA_DOUBLE)bytes_to_parse);
  161. // fill read buffer with decompressed data
  162. size_t len = (int) rrdpush_decompressor_get(&r->decompressor, r->reader.read_buffer + r->reader.read_len, sizeof(r->reader.read_buffer) - r->reader.read_len - 1);
  163. if (unlikely(!len)) {
  164. internal_error(true, "decompressor returned zero length #2");
  165. return false;
  166. }
  167. r->reader.read_len += (int)len;
  168. r->reader.read_buffer[r->reader.read_len] = '\0';
  169. return true;
  170. }
  171. #else // !ENABLE_RRDPUSH_COMPRESSION
  172. static inline bool receiver_read_compressed(struct receiver_state *r) {
  173. return receiver_read_uncompressed(r);
  174. }
  175. #endif // ENABLE_RRDPUSH_COMPRESSION
  176. /* Produce a full line if one exists, statefully return where we start next time.
  177. * When we hit the end of the buffer with a partial line move it to the beginning for the next fill.
  178. */
  179. inline char *buffered_reader_next_line(struct buffered_reader *reader, char *dst, size_t dst_size) {
  180. size_t start = reader->pos;
  181. char *ss = &reader->read_buffer[start];
  182. char *se = &reader->read_buffer[reader->read_len];
  183. char *ds = dst;
  184. char *de = &dst[dst_size - 2];
  185. if(ss >= se) {
  186. *ds = '\0';
  187. reader->pos = 0;
  188. reader->read_len = 0;
  189. reader->read_buffer[reader->read_len] = '\0';
  190. return NULL;
  191. }
  192. // copy all bytes to buffer
  193. while(ss < se && ds < de && *ss != '\n')
  194. *ds++ = *ss++;
  195. // if we have a newline, return the buffer
  196. if(ss < se && ds < de && *ss == '\n') {
  197. // newline found in the r->read_buffer
  198. *ds++ = *ss++; // copy the newline too
  199. *ds = '\0';
  200. reader->pos = ss - reader->read_buffer;
  201. return dst;
  202. }
  203. // if the destination is full, oops!
  204. if(ds == de) {
  205. netdata_log_error("STREAM: received line exceeds %d bytes. Truncating it.", PLUGINSD_LINE_MAX);
  206. *ds = '\0';
  207. reader->pos = ss - reader->read_buffer;
  208. return dst;
  209. }
  210. // no newline found in the r->read_buffer
  211. // move everything to the beginning
  212. memmove(reader->read_buffer, &reader->read_buffer[start], reader->read_len - start);
  213. reader->read_len -= (int)start;
  214. reader->read_buffer[reader->read_len] = '\0';
  215. *ds = '\0';
  216. reader->pos = 0;
  217. return NULL;
  218. }
  219. bool plugin_is_enabled(struct plugind *cd);
  220. static void receiver_set_exit_reason(struct receiver_state *rpt, STREAM_HANDSHAKE reason, bool force) {
  221. if(force || !rpt->exit.reason)
  222. rpt->exit.reason = reason;
  223. }
  224. static inline bool receiver_should_stop(struct receiver_state *rpt) {
  225. static __thread size_t counter = 0;
  226. if(unlikely(rpt->exit.shutdown)) {
  227. receiver_set_exit_reason(rpt, STREAM_HANDSHAKE_DISCONNECT_SHUTDOWN, false);
  228. return true;
  229. }
  230. if(unlikely(!service_running(SERVICE_STREAMING))) {
  231. receiver_set_exit_reason(rpt, STREAM_HANDSHAKE_DISCONNECT_NETDATA_EXIT, false);
  232. return true;
  233. }
  234. if(unlikely((counter++ % 1000) == 0)) {
  235. // check every 1000 lines read
  236. netdata_thread_testcancel();
  237. rpt->last_msg_t = now_monotonic_sec();
  238. }
  239. return false;
  240. }
  241. static size_t streaming_parser(struct receiver_state *rpt, struct plugind *cd, int fd, void *ssl) {
  242. size_t result = 0;
  243. PARSER *parser = NULL;
  244. {
  245. PARSER_USER_OBJECT user = {
  246. .enabled = plugin_is_enabled(cd),
  247. .host = rpt->host,
  248. .opaque = rpt,
  249. .cd = cd,
  250. .trust_durations = 1,
  251. .capabilities = rpt->capabilities,
  252. };
  253. parser = parser_init(&user, NULL, NULL, fd, PARSER_INPUT_SPLIT, ssl);
  254. }
  255. pluginsd_keywords_init(parser, PARSER_INIT_STREAMING);
  256. rrd_collector_started();
  257. // this keeps the parser with its current value
  258. // so, parser needs to be allocated before pushing it
  259. netdata_thread_cleanup_push(pluginsd_process_thread_cleanup, parser);
  260. bool compressed_connection = false;
  261. #ifdef ENABLE_RRDPUSH_COMPRESSION
  262. if(stream_has_capability(rpt, STREAM_CAP_COMPRESSION)) {
  263. compressed_connection = true;
  264. rrdpush_decompressor_reset(&rpt->decompressor);
  265. }
  266. else
  267. rrdpush_decompressor_destroy(&rpt->decompressor);
  268. #endif
  269. buffered_reader_init(&rpt->reader);
  270. char buffer[PLUGINSD_LINE_MAX + 2] = "";
  271. while(!receiver_should_stop(rpt)) {
  272. if(!buffered_reader_next_line(&rpt->reader, buffer, PLUGINSD_LINE_MAX + 2)) {
  273. bool have_new_data = compressed_connection ? receiver_read_compressed(rpt) : receiver_read_uncompressed(rpt);
  274. if(unlikely(!have_new_data)) {
  275. receiver_set_exit_reason(rpt, STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_ERROR, false);
  276. break;
  277. }
  278. continue;
  279. }
  280. if (unlikely(parser_action(parser, buffer))) {
  281. internal_error(true, "parser_action() failed on keyword '%s'.", buffer);
  282. receiver_set_exit_reason(rpt, STREAM_HANDSHAKE_DISCONNECT_PARSER_FAILED, false);
  283. break;
  284. }
  285. }
  286. result = parser->user.data_collections_count;
  287. // free parser with the pop function
  288. netdata_thread_cleanup_pop(1);
  289. return result;
  290. }
  291. static void rrdpush_receiver_replication_reset(RRDHOST *host) {
  292. RRDSET *st;
  293. rrdset_foreach_read(st, host) {
  294. rrdset_flag_clear(st, RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS);
  295. rrdset_flag_set(st, RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED);
  296. }
  297. rrdset_foreach_done(st);
  298. rrdhost_receiver_replicating_charts_zero(host);
  299. }
  300. static bool rrdhost_set_receiver(RRDHOST *host, struct receiver_state *rpt) {
  301. bool signal_rrdcontext = false;
  302. bool set_this = false;
  303. netdata_mutex_lock(&host->receiver_lock);
  304. if (!host->receiver) {
  305. rrdhost_flag_clear(host, RRDHOST_FLAG_ORPHAN);
  306. host->rrdpush_receiver_connection_counter++;
  307. __atomic_add_fetch(&localhost->connected_children_count, 1, __ATOMIC_RELAXED);
  308. host->receiver = rpt;
  309. rpt->host = host;
  310. host->child_connect_time = now_realtime_sec();
  311. host->child_disconnected_time = 0;
  312. host->child_last_chart_command = 0;
  313. host->trigger_chart_obsoletion_check = 1;
  314. if (rpt->config.health_enabled != CONFIG_BOOLEAN_NO) {
  315. if (rpt->config.alarms_delay > 0) {
  316. host->health.health_delay_up_to = now_realtime_sec() + rpt->config.alarms_delay;
  317. netdata_log_health(
  318. "[%s]: Postponing health checks for %" PRId64 " seconds, because it was just connected.",
  319. rrdhost_hostname(host),
  320. (int64_t) rpt->config.alarms_delay);
  321. }
  322. }
  323. host->health_log.health_log_history = rpt->config.alarms_history;
  324. // this is a test
  325. // if(rpt->hops <= host->sender->hops)
  326. // rrdpush_sender_thread_stop(host, "HOPS MISMATCH", false);
  327. signal_rrdcontext = true;
  328. rrdpush_receiver_replication_reset(host);
  329. rrdhost_flag_clear(rpt->host, RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED);
  330. aclk_queue_node_info(rpt->host, true);
  331. rrdpush_reset_destinations_postpone_time(host);
  332. set_this = true;
  333. }
  334. netdata_mutex_unlock(&host->receiver_lock);
  335. if(signal_rrdcontext)
  336. rrdcontext_host_child_connected(host);
  337. return set_this;
  338. }
  339. static void rrdhost_clear_receiver(struct receiver_state *rpt) {
  340. bool signal_rrdcontext = false;
  341. RRDHOST *host = rpt->host;
  342. if(host) {
  343. netdata_mutex_lock(&host->receiver_lock);
  344. // Make sure that we detach this thread and don't kill a freshly arriving receiver
  345. if(host->receiver == rpt) {
  346. __atomic_sub_fetch(&localhost->connected_children_count, 1, __ATOMIC_RELAXED);
  347. rrdhost_flag_set(rpt->host, RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED);
  348. host->trigger_chart_obsoletion_check = 0;
  349. host->child_connect_time = 0;
  350. host->child_disconnected_time = now_realtime_sec();
  351. if (rpt->config.health_enabled == CONFIG_BOOLEAN_AUTO)
  352. host->health.health_enabled = 0;
  353. rrdpush_sender_thread_stop(host, STREAM_HANDSHAKE_DISCONNECT_RECEIVER_LEFT, false);
  354. signal_rrdcontext = true;
  355. rrdpush_receiver_replication_reset(host);
  356. rrdhost_flag_set(host, RRDHOST_FLAG_ORPHAN);
  357. host->receiver = NULL;
  358. host->rrdpush_last_receiver_exit_reason = rpt->exit.reason;
  359. }
  360. netdata_mutex_unlock(&host->receiver_lock);
  361. if(signal_rrdcontext)
  362. rrdcontext_host_child_disconnected(host);
  363. rrdpush_reset_destinations_postpone_time(host);
  364. }
  365. }
  366. bool stop_streaming_receiver(RRDHOST *host, STREAM_HANDSHAKE reason) {
  367. bool ret = false;
  368. netdata_mutex_lock(&host->receiver_lock);
  369. if(host->receiver) {
  370. if(!host->receiver->exit.shutdown) {
  371. host->receiver->exit.shutdown = true;
  372. receiver_set_exit_reason(host->receiver, reason, true);
  373. shutdown(host->receiver->fd, SHUT_RDWR);
  374. }
  375. netdata_thread_cancel(host->receiver->thread);
  376. }
  377. int count = 2000;
  378. while (host->receiver && count-- > 0) {
  379. netdata_mutex_unlock(&host->receiver_lock);
  380. // let the lock for the receiver thread to exit
  381. sleep_usec(1 * USEC_PER_MS);
  382. netdata_mutex_lock(&host->receiver_lock);
  383. }
  384. if(host->receiver)
  385. netdata_log_error("STREAM '%s' [receive from [%s]:%s]: "
  386. "thread %d takes too long to stop, giving up..."
  387. , rrdhost_hostname(host)
  388. , host->receiver->client_ip, host->receiver->client_port
  389. , host->receiver->tid);
  390. else
  391. ret = true;
  392. netdata_mutex_unlock(&host->receiver_lock);
  393. return ret;
  394. }
  395. static void rrdpush_send_error_on_taken_over_connection(struct receiver_state *rpt, const char *msg) {
  396. (void) send_timeout(
  397. #ifdef ENABLE_HTTPS
  398. &rpt->ssl,
  399. #endif
  400. rpt->fd,
  401. (char *)msg,
  402. strlen(msg),
  403. 0,
  404. 5);
  405. }
  406. void rrdpush_receive_log_status(struct receiver_state *rpt, const char *msg, const char *status) {
  407. log_stream_connection(rpt->client_ip, rpt->client_port,
  408. (rpt->key && *rpt->key)? rpt->key : "-",
  409. (rpt->machine_guid && *rpt->machine_guid) ? rpt->machine_guid : "-",
  410. (rpt->hostname && *rpt->hostname) ? rpt->hostname : "-",
  411. status);
  412. netdata_log_info("STREAM '%s' [receive from [%s]:%s]: "
  413. "%s. "
  414. "STATUS: %s%s%s%s"
  415. , rpt->hostname
  416. , rpt->client_ip, rpt->client_port
  417. , msg
  418. , status
  419. , rpt->exit.reason != STREAM_HANDSHAKE_NEVER?" (":""
  420. , stream_handshake_error_to_string(rpt->exit.reason)
  421. , rpt->exit.reason != STREAM_HANDSHAKE_NEVER?")":""
  422. );
  423. }
  424. static void rrdpush_receive(struct receiver_state *rpt)
  425. {
  426. rpt->config.mode = default_rrd_memory_mode;
  427. rpt->config.history = default_rrd_history_entries;
  428. rpt->config.health_enabled = (int)default_health_enabled;
  429. rpt->config.alarms_delay = 60;
  430. rpt->config.alarms_history = HEALTH_LOG_DEFAULT_HISTORY;
  431. rpt->config.rrdpush_enabled = (int)default_rrdpush_enabled;
  432. rpt->config.rrdpush_destination = default_rrdpush_destination;
  433. rpt->config.rrdpush_api_key = default_rrdpush_api_key;
  434. rpt->config.rrdpush_send_charts_matching = default_rrdpush_send_charts_matching;
  435. rpt->config.rrdpush_enable_replication = default_rrdpush_enable_replication;
  436. rpt->config.rrdpush_seconds_to_replicate = default_rrdpush_seconds_to_replicate;
  437. rpt->config.rrdpush_replication_step = default_rrdpush_replication_step;
  438. rpt->config.update_every = (int)appconfig_get_number(&stream_config, rpt->machine_guid, "update every", rpt->config.update_every);
  439. if(rpt->config.update_every < 0) rpt->config.update_every = 1;
  440. rpt->config.history = (int)appconfig_get_number(&stream_config, rpt->key, "default history", rpt->config.history);
  441. rpt->config.history = (int)appconfig_get_number(&stream_config, rpt->machine_guid, "history", rpt->config.history);
  442. if(rpt->config.history < 5) rpt->config.history = 5;
  443. rpt->config.mode = rrd_memory_mode_id(appconfig_get(&stream_config, rpt->key, "default memory mode", rrd_memory_mode_name(rpt->config.mode)));
  444. rpt->config.mode = rrd_memory_mode_id(appconfig_get(&stream_config, rpt->machine_guid, "memory mode", rrd_memory_mode_name(rpt->config.mode)));
  445. if (unlikely(rpt->config.mode == RRD_MEMORY_MODE_DBENGINE && !dbengine_enabled)) {
  446. netdata_log_error("STREAM '%s' [receive from %s:%s]: "
  447. "dbengine is not enabled, falling back to default."
  448. , rpt->hostname
  449. , rpt->client_ip, rpt->client_port
  450. );
  451. rpt->config.mode = default_rrd_memory_mode;
  452. }
  453. rpt->config.health_enabled = appconfig_get_boolean_ondemand(&stream_config, rpt->key, "health enabled by default", rpt->config.health_enabled);
  454. rpt->config.health_enabled = appconfig_get_boolean_ondemand(&stream_config, rpt->machine_guid, "health enabled", rpt->config.health_enabled);
  455. rpt->config.alarms_delay = appconfig_get_number(&stream_config, rpt->key, "default postpone alarms on connect seconds", rpt->config.alarms_delay);
  456. rpt->config.alarms_delay = appconfig_get_number(&stream_config, rpt->machine_guid, "postpone alarms on connect seconds", rpt->config.alarms_delay);
  457. rpt->config.alarms_history = appconfig_get_number(&stream_config, rpt->key, "default health log history", rpt->config.alarms_history);
  458. rpt->config.alarms_history = appconfig_get_number(&stream_config, rpt->machine_guid, "health log history", rpt->config.alarms_history);
  459. rpt->config.rrdpush_enabled = appconfig_get_boolean(&stream_config, rpt->key, "default proxy enabled", rpt->config.rrdpush_enabled);
  460. rpt->config.rrdpush_enabled = appconfig_get_boolean(&stream_config, rpt->machine_guid, "proxy enabled", rpt->config.rrdpush_enabled);
  461. rpt->config.rrdpush_destination = appconfig_get(&stream_config, rpt->key, "default proxy destination", rpt->config.rrdpush_destination);
  462. rpt->config.rrdpush_destination = appconfig_get(&stream_config, rpt->machine_guid, "proxy destination", rpt->config.rrdpush_destination);
  463. rpt->config.rrdpush_api_key = appconfig_get(&stream_config, rpt->key, "default proxy api key", rpt->config.rrdpush_api_key);
  464. rpt->config.rrdpush_api_key = appconfig_get(&stream_config, rpt->machine_guid, "proxy api key", rpt->config.rrdpush_api_key);
  465. rpt->config.rrdpush_send_charts_matching = appconfig_get(&stream_config, rpt->key, "default proxy send charts matching", rpt->config.rrdpush_send_charts_matching);
  466. rpt->config.rrdpush_send_charts_matching = appconfig_get(&stream_config, rpt->machine_guid, "proxy send charts matching", rpt->config.rrdpush_send_charts_matching);
  467. rpt->config.rrdpush_enable_replication = appconfig_get_boolean(&stream_config, rpt->key, "enable replication", rpt->config.rrdpush_enable_replication);
  468. rpt->config.rrdpush_enable_replication = appconfig_get_boolean(&stream_config, rpt->machine_guid, "enable replication", rpt->config.rrdpush_enable_replication);
  469. rpt->config.rrdpush_seconds_to_replicate = appconfig_get_number(&stream_config, rpt->key, "seconds to replicate", rpt->config.rrdpush_seconds_to_replicate);
  470. rpt->config.rrdpush_seconds_to_replicate = appconfig_get_number(&stream_config, rpt->machine_guid, "seconds to replicate", rpt->config.rrdpush_seconds_to_replicate);
  471. rpt->config.rrdpush_replication_step = appconfig_get_number(&stream_config, rpt->key, "seconds per replication step", rpt->config.rrdpush_replication_step);
  472. rpt->config.rrdpush_replication_step = appconfig_get_number(&stream_config, rpt->machine_guid, "seconds per replication step", rpt->config.rrdpush_replication_step);
  473. #ifdef ENABLE_RRDPUSH_COMPRESSION
  474. rpt->config.rrdpush_compression = default_rrdpush_compression_enabled;
  475. rpt->config.rrdpush_compression = appconfig_get_boolean(&stream_config, rpt->key, "enable compression", rpt->config.rrdpush_compression);
  476. rpt->config.rrdpush_compression = appconfig_get_boolean(&stream_config, rpt->machine_guid, "enable compression", rpt->config.rrdpush_compression);
  477. #endif // ENABLE_RRDPUSH_COMPRESSION
  478. (void)appconfig_set_default(&stream_config, rpt->machine_guid, "host tags", (rpt->tags)?rpt->tags:"");
  479. // find the host for this receiver
  480. {
  481. // this will also update the host with our system_info
  482. RRDHOST *host = rrdhost_find_or_create(
  483. rpt->hostname
  484. , rpt->registry_hostname
  485. , rpt->machine_guid
  486. , rpt->os
  487. , rpt->timezone
  488. , rpt->abbrev_timezone
  489. , rpt->utc_offset
  490. , rpt->tags
  491. , rpt->program_name
  492. , rpt->program_version
  493. , rpt->config.update_every
  494. , rpt->config.history
  495. , rpt->config.mode
  496. , (unsigned int)(rpt->config.health_enabled != CONFIG_BOOLEAN_NO)
  497. , (unsigned int)(rpt->config.rrdpush_enabled && rpt->config.rrdpush_destination && *rpt->config.rrdpush_destination && rpt->config.rrdpush_api_key && *rpt->config.rrdpush_api_key)
  498. , rpt->config.rrdpush_destination
  499. , rpt->config.rrdpush_api_key
  500. , rpt->config.rrdpush_send_charts_matching
  501. , rpt->config.rrdpush_enable_replication
  502. , rpt->config.rrdpush_seconds_to_replicate
  503. , rpt->config.rrdpush_replication_step
  504. , rpt->system_info
  505. , 0
  506. );
  507. if(!host) {
  508. rrdpush_receive_log_status(rpt, "failed to find/create host structure", "INTERNAL ERROR DROPPING CONNECTION");
  509. rrdpush_send_error_on_taken_over_connection(rpt, START_STREAMING_ERROR_INTERNAL_ERROR);
  510. goto cleanup;
  511. }
  512. if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD))) {
  513. rrdpush_receive_log_status(rpt, "host is initializing", "INITIALIZATION IN PROGRESS RETRY LATER");
  514. rrdpush_send_error_on_taken_over_connection(rpt, START_STREAMING_ERROR_INITIALIZATION);
  515. goto cleanup;
  516. }
  517. // system_info has been consumed by the host structure
  518. rpt->system_info = NULL;
  519. if(!rrdhost_set_receiver(host, rpt)) {
  520. rrdpush_receive_log_status(rpt, "host is already served by another receiver", "DUPLICATE RECEIVER DROPPING CONNECTION");
  521. rrdpush_send_error_on_taken_over_connection(rpt, START_STREAMING_ERROR_ALREADY_STREAMING);
  522. goto cleanup;
  523. }
  524. }
  525. #ifdef NETDATA_INTERNAL_CHECKS
  526. netdata_log_info("STREAM '%s' [receive from [%s]:%s]: "
  527. "client willing to stream metrics for host '%s' with machine_guid '%s': "
  528. "update every = %d, history = %d, memory mode = %s, health %s,%s tags '%s'"
  529. , rpt->hostname
  530. , rpt->client_ip
  531. , rpt->client_port
  532. , rrdhost_hostname(rpt->host)
  533. , rpt->host->machine_guid
  534. , rpt->host->rrd_update_every
  535. , rpt->host->rrd_history_entries
  536. , rrd_memory_mode_name(rpt->host->rrd_memory_mode)
  537. , (rpt->config.health_enabled == CONFIG_BOOLEAN_NO)?"disabled":((rpt->config.health_enabled == CONFIG_BOOLEAN_YES)?"enabled":"auto")
  538. #ifdef ENABLE_HTTPS
  539. , (rpt->ssl.conn != NULL) ? " SSL," : ""
  540. #else
  541. , ""
  542. #endif
  543. , rrdhost_tags(rpt->host)
  544. );
  545. #endif // NETDATA_INTERNAL_CHECKS
  546. struct plugind cd = {
  547. .update_every = default_rrd_update_every,
  548. .unsafe = {
  549. .spinlock = NETDATA_SPINLOCK_INITIALIZER,
  550. .running = true,
  551. .enabled = true,
  552. },
  553. .started_t = now_realtime_sec(),
  554. };
  555. // put the client IP and port into the buffers used by plugins.d
  556. snprintfz(cd.id, CONFIG_MAX_NAME, "%s:%s", rpt->client_ip, rpt->client_port);
  557. snprintfz(cd.filename, FILENAME_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  558. snprintfz(cd.fullfilename, FILENAME_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  559. snprintfz(cd.cmd, PLUGINSD_CMD_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  560. #ifdef ENABLE_RRDPUSH_COMPRESSION
  561. if (stream_has_capability(rpt, STREAM_CAP_COMPRESSION)) {
  562. if (!rpt->config.rrdpush_compression)
  563. rpt->capabilities &= ~STREAM_CAP_COMPRESSION;
  564. }
  565. #endif // ENABLE_RRDPUSH_COMPRESSION
  566. {
  567. // netdata_log_info("STREAM %s [receive from [%s]:%s]: initializing communication...", rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port);
  568. char initial_response[HTTP_HEADER_SIZE];
  569. if (stream_has_capability(rpt, STREAM_CAP_VCAPS)) {
  570. log_receiver_capabilities(rpt);
  571. sprintf(initial_response, "%s%u", START_STREAMING_PROMPT_VN, rpt->capabilities);
  572. }
  573. else if (stream_has_capability(rpt, STREAM_CAP_VN)) {
  574. log_receiver_capabilities(rpt);
  575. sprintf(initial_response, "%s%d", START_STREAMING_PROMPT_VN, stream_capabilities_to_vn(rpt->capabilities));
  576. }
  577. else if (stream_has_capability(rpt, STREAM_CAP_V2)) {
  578. log_receiver_capabilities(rpt);
  579. sprintf(initial_response, "%s", START_STREAMING_PROMPT_V2);
  580. }
  581. else { // stream_has_capability(rpt, STREAM_CAP_V1)
  582. log_receiver_capabilities(rpt);
  583. sprintf(initial_response, "%s", START_STREAMING_PROMPT_V1);
  584. }
  585. netdata_log_debug(D_STREAM, "Initial response to %s: %s", rpt->client_ip, initial_response);
  586. ssize_t bytes_sent = send_timeout(
  587. #ifdef ENABLE_HTTPS
  588. &rpt->ssl,
  589. #endif
  590. rpt->fd, initial_response, strlen(initial_response), 0, 60);
  591. if(bytes_sent != (ssize_t)strlen(initial_response)) {
  592. internal_error(true, "Cannot send response, got %zd bytes, expecting %zu bytes", bytes_sent, strlen(initial_response));
  593. rrdpush_receive_log_status(rpt, "cannot reply back", "CANT REPLY DROPPING CONNECTION");
  594. goto cleanup;
  595. }
  596. }
  597. {
  598. // remove the non-blocking flag from the socket
  599. if(sock_delnonblock(rpt->fd) < 0)
  600. netdata_log_error("STREAM '%s' [receive from [%s]:%s]: "
  601. "cannot remove the non-blocking flag from socket %d"
  602. , rrdhost_hostname(rpt->host)
  603. , rpt->client_ip, rpt->client_port
  604. , rpt->fd);
  605. struct timeval timeout;
  606. timeout.tv_sec = 600;
  607. timeout.tv_usec = 0;
  608. if (unlikely(setsockopt(rpt->fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof timeout) != 0))
  609. netdata_log_error("STREAM '%s' [receive from [%s]:%s]: "
  610. "cannot set timeout for socket %d"
  611. , rrdhost_hostname(rpt->host)
  612. , rpt->client_ip, rpt->client_port
  613. , rpt->fd);
  614. }
  615. rrdpush_receive_log_status(rpt, "ready to receive data", "CONNECTED");
  616. #ifdef ENABLE_ACLK
  617. // in case we have cloud connection we inform cloud
  618. // new child connected
  619. if (netdata_cloud_enabled)
  620. aclk_host_state_update(rpt->host, 1);
  621. #endif
  622. rrdhost_set_is_parent_label();
  623. // let it reconnect to parent immediately
  624. rrdpush_reset_destinations_postpone_time(rpt->host);
  625. size_t count = streaming_parser(rpt, &cd, rpt->fd,
  626. #ifdef ENABLE_HTTPS
  627. (rpt->ssl.conn) ? &rpt->ssl : NULL
  628. #else
  629. NULL
  630. #endif
  631. );
  632. receiver_set_exit_reason(rpt, STREAM_HANDSHAKE_DISCONNECT_PARSER_EXIT, false);
  633. {
  634. char msg[100 + 1];
  635. snprintfz(msg, 100, "disconnected (completed %zu updates)", count);
  636. rrdpush_receive_log_status(rpt, msg, "DISCONNECTED");
  637. }
  638. #ifdef ENABLE_ACLK
  639. // in case we have cloud connection we inform cloud
  640. // a child disconnected
  641. if (netdata_cloud_enabled)
  642. aclk_host_state_update(rpt->host, 0);
  643. #endif
  644. cleanup:
  645. ;
  646. }
  647. static void rrdpush_receiver_thread_cleanup(void *ptr) {
  648. struct receiver_state *rpt = (struct receiver_state *) ptr;
  649. worker_unregister();
  650. rrdhost_clear_receiver(rpt);
  651. netdata_log_info("STREAM '%s' [receive from [%s]:%s]: "
  652. "receive thread ended (task id %d)"
  653. , rpt->hostname ? rpt->hostname : "-"
  654. , rpt->client_ip ? rpt->client_ip : "-", rpt->client_port ? rpt->client_port : "-"
  655. , gettid());
  656. receiver_state_free(rpt);
  657. rrdhost_set_is_parent_label();
  658. }
  659. void *rrdpush_receiver_thread(void *ptr) {
  660. netdata_thread_cleanup_push(rrdpush_receiver_thread_cleanup, ptr);
  661. worker_register("STREAMRCV");
  662. worker_register_job_custom_metric(WORKER_RECEIVER_JOB_BYTES_READ, "received bytes", "bytes/s", WORKER_METRIC_INCREMENT);
  663. worker_register_job_custom_metric(WORKER_RECEIVER_JOB_BYTES_UNCOMPRESSED, "uncompressed bytes", "bytes/s", WORKER_METRIC_INCREMENT);
  664. worker_register_job_custom_metric(WORKER_RECEIVER_JOB_REPLICATION_COMPLETION, "replication completion", "%", WORKER_METRIC_ABSOLUTE);
  665. struct receiver_state *rpt = (struct receiver_state *)ptr;
  666. rpt->tid = gettid();
  667. netdata_log_info("STREAM %s [%s]:%s: receive thread created (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, rpt->tid);
  668. rrdpush_receive(rpt);
  669. netdata_thread_cleanup_pop(1);
  670. return NULL;
  671. }