receiver.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrdpush.h"
  3. // IMPORTANT: to add workers, you have to edit WORKER_PARSER_FIRST_JOB accordingly
  4. #define WORKER_RECEIVER_JOB_BYTES_READ (WORKER_PARSER_FIRST_JOB - 1)
  5. #define WORKER_RECEIVER_JOB_BYTES_UNCOMPRESSED (WORKER_PARSER_FIRST_JOB - 2)
  6. // this has to be the same at parser.h
  7. #define WORKER_RECEIVER_JOB_REPLICATION_COMPLETION (WORKER_PARSER_FIRST_JOB - 3)
  8. #if WORKER_PARSER_FIRST_JOB < 1
  9. #error The define WORKER_PARSER_FIRST_JOB needs to be at least 1
  10. #endif
  11. extern struct config stream_config;
  12. void receiver_state_free(struct receiver_state *rpt) {
  13. freez(rpt->key);
  14. freez(rpt->hostname);
  15. freez(rpt->registry_hostname);
  16. freez(rpt->machine_guid);
  17. freez(rpt->os);
  18. freez(rpt->timezone);
  19. freez(rpt->abbrev_timezone);
  20. freez(rpt->tags);
  21. freez(rpt->client_ip);
  22. freez(rpt->client_port);
  23. freez(rpt->program_name);
  24. freez(rpt->program_version);
  25. #ifdef ENABLE_HTTPS
  26. if(rpt->ssl.conn)
  27. SSL_free(rpt->ssl.conn);
  28. #endif
  29. #ifdef ENABLE_COMPRESSION
  30. if (rpt->decompressor)
  31. rpt->decompressor->destroy(&rpt->decompressor);
  32. #endif
  33. if(rpt->system_info)
  34. rrdhost_system_info_free(rpt->system_info);
  35. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_receivers, sizeof(*rpt), __ATOMIC_RELAXED);
  36. freez(rpt);
  37. }
  38. #include "collectors/plugins.d/pluginsd_parser.h"
  39. PARSER_RC streaming_claimed_id(char **words, size_t num_words, void *user)
  40. {
  41. const char *host_uuid_str = get_word(words, num_words, 1);
  42. const char *claim_id_str = get_word(words, num_words, 2);
  43. if (!host_uuid_str || !claim_id_str) {
  44. error("Command CLAIMED_ID came malformed, uuid = '%s', claim_id = '%s'",
  45. host_uuid_str ? host_uuid_str : "[unset]",
  46. claim_id_str ? claim_id_str : "[unset]");
  47. return PARSER_RC_ERROR;
  48. }
  49. uuid_t uuid;
  50. RRDHOST *host = ((PARSER_USER_OBJECT *)user)->host;
  51. // We don't need the parsed UUID
  52. // just do it to check the format
  53. if(uuid_parse(host_uuid_str, uuid)) {
  54. error("1st parameter (host GUID) to CLAIMED_ID command is not valid GUID. Received: \"%s\".", host_uuid_str);
  55. return PARSER_RC_ERROR;
  56. }
  57. if(uuid_parse(claim_id_str, uuid) && strcmp(claim_id_str, "NULL")) {
  58. error("2nd parameter (Claim ID) to CLAIMED_ID command is not valid GUID. Received: \"%s\".", claim_id_str);
  59. return PARSER_RC_ERROR;
  60. }
  61. if(strcmp(host_uuid_str, host->machine_guid)) {
  62. error("Claim ID is for host \"%s\" but it came over connection for \"%s\"", host_uuid_str, host->machine_guid);
  63. return PARSER_RC_OK; //the message is OK problem must be somewhere else
  64. }
  65. rrdhost_aclk_state_lock(host);
  66. if (host->aclk_state.claimed_id)
  67. freez(host->aclk_state.claimed_id);
  68. host->aclk_state.claimed_id = strcmp(claim_id_str, "NULL") ? strdupz(claim_id_str) : NULL;
  69. rrdhost_aclk_state_unlock(host);
  70. rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_CLAIMID |RRDHOST_FLAG_METADATA_UPDATE);
  71. rrdpush_claimed_id(host);
  72. return PARSER_RC_OK;
  73. }
  74. static int read_stream(struct receiver_state *r, char* buffer, size_t size) {
  75. if(unlikely(!size)) {
  76. internal_error(true, "%s() asked to read zero bytes", __FUNCTION__);
  77. return 0;
  78. }
  79. #ifdef ENABLE_HTTPS
  80. if (r->ssl.conn && r->ssl.flags == NETDATA_SSL_HANDSHAKE_COMPLETE)
  81. return (int)netdata_ssl_read(r->ssl.conn, buffer, size);
  82. #endif
  83. ssize_t bytes_read = read(r->fd, buffer, size);
  84. if(bytes_read == 0 && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINPROGRESS)) {
  85. error("STREAM: %s(): timeout while waiting for data on socket!", __FUNCTION__);
  86. bytes_read = -3;
  87. }
  88. else if (bytes_read == 0) {
  89. error("STREAM: %s(): EOF while reading data from socket!", __FUNCTION__);
  90. bytes_read = -1;
  91. }
  92. else if (bytes_read < 0) {
  93. error("STREAM: %s() failed to read from socket!", __FUNCTION__);
  94. bytes_read = -2;
  95. }
  96. // do {
  97. // bytes_read = (int) fread(buffer, 1, size, fp);
  98. // if (unlikely(bytes_read <= 0)) {
  99. // if(feof(fp)) {
  100. // internal_error(true, "%s(): fread() failed with EOF", __FUNCTION__);
  101. // bytes_read = -2;
  102. // }
  103. // else if(ferror(fp)) {
  104. // internal_error(true, "%s(): fread() failed with ERROR", __FUNCTION__);
  105. // bytes_read = -3;
  106. // }
  107. // else bytes_read = 0;
  108. // }
  109. // else
  110. // worker_set_metric(WORKER_RECEIVER_JOB_BYTES_READ, bytes_read);
  111. // } while(bytes_read == 0);
  112. return (int)bytes_read;
  113. }
  114. static bool receiver_read_uncompressed(struct receiver_state *r) {
  115. #ifdef NETDATA_INTERNAL_CHECKS
  116. if(r->read_buffer[r->read_len] != '\0')
  117. fatal("%s(): read_buffer does not start with zero", __FUNCTION__ );
  118. #endif
  119. int bytes_read = read_stream(r, r->read_buffer + r->read_len, sizeof(r->read_buffer) - r->read_len - 1);
  120. if(unlikely(bytes_read <= 0))
  121. return false;
  122. worker_set_metric(WORKER_RECEIVER_JOB_BYTES_READ, (NETDATA_DOUBLE)bytes_read);
  123. worker_set_metric(WORKER_RECEIVER_JOB_BYTES_UNCOMPRESSED, (NETDATA_DOUBLE)bytes_read);
  124. r->read_len += bytes_read;
  125. r->read_buffer[r->read_len] = '\0';
  126. return true;
  127. }
  128. #ifdef ENABLE_COMPRESSION
  129. static bool receiver_read_compressed(struct receiver_state *r) {
  130. #ifdef NETDATA_INTERNAL_CHECKS
  131. if(r->read_buffer[r->read_len] != '\0')
  132. fatal("%s: read_buffer does not start with zero #2", __FUNCTION__ );
  133. #endif
  134. // first use any available uncompressed data
  135. if (r->decompressor->decompressed_bytes_in_buffer(r->decompressor)) {
  136. size_t available = sizeof(r->read_buffer) - r->read_len - 1;
  137. if (available) {
  138. size_t len = r->decompressor->get(r->decompressor, r->read_buffer + r->read_len, available);
  139. if (!len) {
  140. internal_error(true, "decompressor returned zero length #1");
  141. return false;
  142. }
  143. r->read_len += (int)len;
  144. r->read_buffer[r->read_len] = '\0';
  145. }
  146. else
  147. internal_error(true, "The line to read is too big! Already have %d bytes in read_buffer.", r->read_len);
  148. return true;
  149. }
  150. // no decompressed data available
  151. // read the compression signature of the next block
  152. if(unlikely(r->read_len + r->decompressor->signature_size > sizeof(r->read_buffer) - 1)) {
  153. internal_error(true, "The last incomplete line does not leave enough room for the next compression header! Already have %d bytes in read_buffer.", r->read_len);
  154. return false;
  155. }
  156. // read the compression signature from the stream
  157. // we have to do a loop here, because read_stream() may return less than the data we need
  158. int bytes_read = 0;
  159. do {
  160. int ret = read_stream(r, r->read_buffer + r->read_len + bytes_read, r->decompressor->signature_size - bytes_read);
  161. if (unlikely(ret <= 0))
  162. return false;
  163. bytes_read += ret;
  164. } while(unlikely(bytes_read < (int)r->decompressor->signature_size));
  165. worker_set_metric(WORKER_RECEIVER_JOB_BYTES_READ, (NETDATA_DOUBLE)bytes_read);
  166. if(unlikely(bytes_read != (int)r->decompressor->signature_size))
  167. fatal("read %d bytes, but expected compression signature of size %zu", bytes_read, r->decompressor->signature_size);
  168. size_t compressed_message_size = r->decompressor->start(r->decompressor, r->read_buffer + r->read_len, bytes_read);
  169. if (unlikely(!compressed_message_size)) {
  170. internal_error(true, "multiplexed uncompressed data in compressed stream!");
  171. r->read_len += bytes_read;
  172. r->read_buffer[r->read_len] = '\0';
  173. return true;
  174. }
  175. if(unlikely(compressed_message_size > COMPRESSION_MAX_MSG_SIZE)) {
  176. error("received a compressed message of %zu bytes, which is bigger than the max compressed message size supported of %zu. Ignoring message.",
  177. compressed_message_size, (size_t)COMPRESSION_MAX_MSG_SIZE);
  178. return false;
  179. }
  180. // delete compression header from our read buffer
  181. r->read_buffer[r->read_len] = '\0';
  182. // Read the entire compressed block of compressed data
  183. char compressed[compressed_message_size];
  184. size_t compressed_bytes_read = 0;
  185. do {
  186. size_t start = compressed_bytes_read;
  187. size_t remaining = compressed_message_size - start;
  188. int last_read_bytes = read_stream(r, &compressed[start], remaining);
  189. if (unlikely(last_read_bytes <= 0)) {
  190. internal_error(true, "read_stream() failed #2, with code %d", last_read_bytes);
  191. return false;
  192. }
  193. compressed_bytes_read += last_read_bytes;
  194. } while(unlikely(compressed_message_size > compressed_bytes_read));
  195. worker_set_metric(WORKER_RECEIVER_JOB_BYTES_READ, (NETDATA_DOUBLE)compressed_bytes_read);
  196. // decompress the compressed block
  197. size_t bytes_to_parse = r->decompressor->decompress(r->decompressor, compressed, compressed_bytes_read);
  198. if (!bytes_to_parse) {
  199. internal_error(true, "no bytes to parse.");
  200. return false;
  201. }
  202. worker_set_metric(WORKER_RECEIVER_JOB_BYTES_UNCOMPRESSED, (NETDATA_DOUBLE)bytes_to_parse);
  203. // fill read buffer with decompressed data
  204. size_t len = (int)r->decompressor->get(r->decompressor, r->read_buffer + r->read_len, sizeof(r->read_buffer) - r->read_len - 1);
  205. if (!len) {
  206. internal_error(true, "decompressor returned zero length #2");
  207. return false;
  208. }
  209. r->read_len += (int)len;
  210. r->read_buffer[r->read_len] = '\0';
  211. return true;
  212. }
  213. #else // !ENABLE_COMPRESSION
  214. static bool receiver_read_compressed(struct receiver_state *r) {
  215. return receiver_read_uncompressed(r);
  216. }
  217. #endif // ENABLE_COMPRESSION
  218. /* Produce a full line if one exists, statefully return where we start next time.
  219. * When we hit the end of the buffer with a partial line move it to the beginning for the next fill.
  220. */
  221. static char *receiver_next_line(struct receiver_state *r, char *buffer, size_t buffer_length, size_t *pos) {
  222. size_t start = *pos;
  223. char *ss = &r->read_buffer[start];
  224. char *se = &r->read_buffer[r->read_len];
  225. char *ds = buffer;
  226. char *de = &buffer[buffer_length - 2];
  227. if(ss >= se) {
  228. *ds = '\0';
  229. *pos = 0;
  230. r->read_len = 0;
  231. r->read_buffer[r->read_len] = '\0';
  232. return NULL;
  233. }
  234. // copy all bytes to buffer
  235. while(ss < se && ds < de && *ss != '\n')
  236. *ds++ = *ss++;
  237. // if we have a newline, return the buffer
  238. if(ss < se && ds < de && *ss == '\n') {
  239. // newline found in the r->read_buffer
  240. *ds++ = *ss++; // copy the newline too
  241. *ds = '\0';
  242. *pos = ss - r->read_buffer;
  243. return buffer;
  244. }
  245. // if the destination is full, oops!
  246. if(ds == de) {
  247. error("STREAM: received line exceeds %d bytes. Truncating it.", PLUGINSD_LINE_MAX);
  248. *ds = '\0';
  249. *pos = ss - r->read_buffer;
  250. return buffer;
  251. }
  252. // no newline found in the r->read_buffer
  253. // move everything to the beginning
  254. memmove(r->read_buffer, &r->read_buffer[start], r->read_len - start);
  255. r->read_len -= (int)start;
  256. r->read_buffer[r->read_len] = '\0';
  257. *ds = '\0';
  258. *pos = 0;
  259. return NULL;
  260. }
  261. static void streaming_parser_thread_cleanup(void *ptr) {
  262. PARSER *parser = (PARSER *)ptr;
  263. rrd_collector_finished();
  264. parser_destroy(parser);
  265. }
  266. bool plugin_is_enabled(struct plugind *cd);
  267. static size_t streaming_parser(struct receiver_state *rpt, struct plugind *cd, int fd, void *ssl) {
  268. size_t result;
  269. PARSER_USER_OBJECT user = {
  270. .enabled = plugin_is_enabled(cd),
  271. .host = rpt->host,
  272. .opaque = rpt,
  273. .cd = cd,
  274. .trust_durations = 1,
  275. .capabilities = rpt->capabilities,
  276. };
  277. PARSER *parser = parser_init(&user, NULL, NULL, fd,
  278. PARSER_INPUT_SPLIT, ssl);
  279. pluginsd_keywords_init(parser, PARSER_INIT_STREAMING);
  280. rrd_collector_started();
  281. // this keeps the parser with its current value
  282. // so, parser needs to be allocated before pushing it
  283. netdata_thread_cleanup_push(streaming_parser_thread_cleanup, parser);
  284. parser_add_keyword(parser, "CLAIMED_ID", streaming_claimed_id);
  285. user.parser = parser;
  286. bool compressed_connection = false;
  287. #ifdef ENABLE_COMPRESSION
  288. if(stream_has_capability(rpt, STREAM_CAP_COMPRESSION)) {
  289. compressed_connection = true;
  290. if (!rpt->decompressor)
  291. rpt->decompressor = create_decompressor();
  292. else
  293. rpt->decompressor->reset(rpt->decompressor);
  294. }
  295. #endif
  296. rpt->read_buffer[0] = '\0';
  297. rpt->read_len = 0;
  298. size_t read_buffer_start = 0;
  299. char buffer[PLUGINSD_LINE_MAX + 2] = "";
  300. while(service_running(SERVICE_STREAMING)) {
  301. netdata_thread_testcancel();
  302. if(!receiver_next_line(rpt, buffer, PLUGINSD_LINE_MAX + 2, &read_buffer_start)) {
  303. bool have_new_data;
  304. if(likely(compressed_connection))
  305. have_new_data = receiver_read_compressed(rpt);
  306. else
  307. have_new_data = receiver_read_uncompressed(rpt);
  308. if(unlikely(!have_new_data)) {
  309. if(!rpt->exit.reason)
  310. rpt->exit.reason = "SOCKET READ ERROR";
  311. break;
  312. }
  313. rpt->last_msg_t = now_realtime_sec();
  314. continue;
  315. }
  316. if(unlikely(!service_running(SERVICE_STREAMING))) {
  317. if(!rpt->exit.reason)
  318. rpt->exit.reason = "NETDATA EXIT";
  319. goto done;
  320. }
  321. if(unlikely(rpt->exit.shutdown)) {
  322. if(!rpt->exit.reason)
  323. rpt->exit.reason = "SHUTDOWN REQUESTED";
  324. goto done;
  325. }
  326. if (unlikely(parser_action(parser, buffer))) {
  327. internal_error(true, "parser_action() failed on keyword '%s'.", buffer);
  328. if(!rpt->exit.reason)
  329. rpt->exit.reason = "PARSER FAILED";
  330. break;
  331. }
  332. }
  333. done:
  334. result = user.data_collections_count;
  335. // free parser with the pop function
  336. netdata_thread_cleanup_pop(1);
  337. return result;
  338. }
  339. static void rrdpush_receiver_replication_reset(RRDHOST *host) {
  340. RRDSET *st;
  341. rrdset_foreach_read(st, host) {
  342. rrdset_flag_clear(st, RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS);
  343. rrdset_flag_set(st, RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED);
  344. }
  345. rrdset_foreach_done(st);
  346. rrdhost_receiver_replicating_charts_zero(host);
  347. }
  348. static bool rrdhost_set_receiver(RRDHOST *host, struct receiver_state *rpt) {
  349. bool signal_rrdcontext = false;
  350. bool set_this = false;
  351. netdata_mutex_lock(&host->receiver_lock);
  352. if (!host->receiver || host->receiver == rpt) {
  353. rrdhost_flag_clear(host, RRDHOST_FLAG_ORPHAN);
  354. host->receiver = rpt;
  355. rpt->host = host;
  356. host->child_connect_time = now_realtime_sec();
  357. host->child_disconnected_time = 0;
  358. host->child_last_chart_command = 0;
  359. host->trigger_chart_obsoletion_check = 1;
  360. if (rpt->config.health_enabled != CONFIG_BOOLEAN_NO) {
  361. if (rpt->config.alarms_delay > 0) {
  362. host->health.health_delay_up_to = now_realtime_sec() + rpt->config.alarms_delay;
  363. log_health(
  364. "[%s]: Postponing health checks for %" PRId64 " seconds, because it was just connected.",
  365. rrdhost_hostname(host),
  366. (int64_t) rpt->config.alarms_delay);
  367. }
  368. }
  369. // this is a test
  370. // if(rpt->hops <= host->sender->hops)
  371. // rrdpush_sender_thread_stop(host, "HOPS MISMATCH", false);
  372. signal_rrdcontext = true;
  373. rrdpush_receiver_replication_reset(host);
  374. rrdhost_flag_clear(rpt->host, RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED);
  375. aclk_queue_node_info(rpt->host, true);
  376. set_this = true;
  377. }
  378. netdata_mutex_unlock(&host->receiver_lock);
  379. if(signal_rrdcontext)
  380. rrdcontext_host_child_connected(host);
  381. return set_this;
  382. }
  383. static void rrdhost_clear_receiver(struct receiver_state *rpt) {
  384. bool signal_rrdcontext = false;
  385. RRDHOST *host = rpt->host;
  386. if(host) {
  387. netdata_mutex_lock(&host->receiver_lock);
  388. // Make sure that we detach this thread and don't kill a freshly arriving receiver
  389. if(host->receiver == rpt) {
  390. host->trigger_chart_obsoletion_check = 0;
  391. host->child_connect_time = 0;
  392. host->child_disconnected_time = now_realtime_sec();
  393. if (rpt->config.health_enabled == CONFIG_BOOLEAN_AUTO)
  394. host->health.health_enabled = 0;
  395. rrdpush_sender_thread_stop(host, "RECEIVER LEFT", false);
  396. signal_rrdcontext = true;
  397. rrdpush_receiver_replication_reset(host);
  398. if (host->receiver == rpt)
  399. host->receiver = NULL;
  400. rrdhost_flag_set(host, RRDHOST_FLAG_ORPHAN);
  401. }
  402. netdata_mutex_unlock(&host->receiver_lock);
  403. if(signal_rrdcontext)
  404. rrdcontext_host_child_disconnected(host);
  405. }
  406. }
  407. bool stop_streaming_receiver(RRDHOST *host, const char *reason) {
  408. bool ret = false;
  409. netdata_mutex_lock(&host->receiver_lock);
  410. if(host->receiver) {
  411. if(!host->receiver->exit.shutdown) {
  412. host->receiver->exit.shutdown = true;
  413. host->receiver->exit.reason = reason;
  414. shutdown(host->receiver->fd, SHUT_RDWR);
  415. }
  416. netdata_thread_cancel(host->receiver->thread);
  417. }
  418. int count = 2000;
  419. while (host->receiver && count-- > 0) {
  420. netdata_mutex_unlock(&host->receiver_lock);
  421. // let the lock for the receiver thread to exit
  422. sleep_usec(1 * USEC_PER_MS);
  423. netdata_mutex_lock(&host->receiver_lock);
  424. }
  425. if(host->receiver)
  426. error("STREAM '%s' [receive from [%s]:%s]: "
  427. "thread %d takes too long to stop, giving up..."
  428. , rrdhost_hostname(host)
  429. , host->receiver->client_ip, host->receiver->client_port
  430. , gettid());
  431. else
  432. ret = true;
  433. netdata_mutex_unlock(&host->receiver_lock);
  434. return ret;
  435. }
  436. void rrdpush_receive_log_status(struct receiver_state *rpt, const char *msg, const char *status) {
  437. log_stream_connection(rpt->client_ip, rpt->client_port,
  438. (rpt->key && *rpt->key)? rpt->key : "-",
  439. (rpt->machine_guid && *rpt->machine_guid) ? rpt->machine_guid : "-",
  440. (rpt->hostname && *rpt->hostname) ? rpt->hostname : "-",
  441. status);
  442. info("STREAM '%s' [receive from [%s]:%s]: "
  443. "%s. "
  444. "STATUS: %s%s%s%s"
  445. , rpt->hostname
  446. , rpt->client_ip, rpt->client_port
  447. , msg
  448. , status
  449. , rpt->exit.reason?" (":""
  450. , rpt->exit.reason?rpt->exit.reason:""
  451. , rpt->exit.reason?")":""
  452. );
  453. }
  454. static void rrdhost_reset_destinations(RRDHOST *host) {
  455. for (struct rrdpush_destinations *d = host->destinations; d; d = d->next)
  456. d->postpone_reconnection_until = 0;
  457. }
  458. static int rrdpush_receive(struct receiver_state *rpt)
  459. {
  460. rpt->config.mode = default_rrd_memory_mode;
  461. rpt->config.history = default_rrd_history_entries;
  462. rpt->config.health_enabled = (int)default_health_enabled;
  463. rpt->config.alarms_delay = 60;
  464. rpt->config.rrdpush_enabled = (int)default_rrdpush_enabled;
  465. rpt->config.rrdpush_destination = default_rrdpush_destination;
  466. rpt->config.rrdpush_api_key = default_rrdpush_api_key;
  467. rpt->config.rrdpush_send_charts_matching = default_rrdpush_send_charts_matching;
  468. rpt->config.rrdpush_enable_replication = default_rrdpush_enable_replication;
  469. rpt->config.rrdpush_seconds_to_replicate = default_rrdpush_seconds_to_replicate;
  470. rpt->config.rrdpush_replication_step = default_rrdpush_replication_step;
  471. rpt->config.update_every = (int)appconfig_get_number(&stream_config, rpt->machine_guid, "update every", rpt->config.update_every);
  472. if(rpt->config.update_every < 0) rpt->config.update_every = 1;
  473. rpt->config.history = (int)appconfig_get_number(&stream_config, rpt->key, "default history", rpt->config.history);
  474. rpt->config.history = (int)appconfig_get_number(&stream_config, rpt->machine_guid, "history", rpt->config.history);
  475. if(rpt->config.history < 5) rpt->config.history = 5;
  476. rpt->config.mode = rrd_memory_mode_id(appconfig_get(&stream_config, rpt->key, "default memory mode", rrd_memory_mode_name(rpt->config.mode)));
  477. rpt->config.mode = rrd_memory_mode_id(appconfig_get(&stream_config, rpt->machine_guid, "memory mode", rrd_memory_mode_name(rpt->config.mode)));
  478. if (unlikely(rpt->config.mode == RRD_MEMORY_MODE_DBENGINE && !dbengine_enabled)) {
  479. error("STREAM '%s' [receive from %s:%s]: "
  480. "dbengine is not enabled, falling back to default."
  481. , rpt->hostname
  482. , rpt->client_ip, rpt->client_port
  483. );
  484. rpt->config.mode = default_rrd_memory_mode;
  485. }
  486. rpt->config.health_enabled = appconfig_get_boolean_ondemand(&stream_config, rpt->key, "health enabled by default", rpt->config.health_enabled);
  487. rpt->config.health_enabled = appconfig_get_boolean_ondemand(&stream_config, rpt->machine_guid, "health enabled", rpt->config.health_enabled);
  488. rpt->config.alarms_delay = appconfig_get_number(&stream_config, rpt->key, "default postpone alarms on connect seconds", rpt->config.alarms_delay);
  489. rpt->config.alarms_delay = appconfig_get_number(&stream_config, rpt->machine_guid, "postpone alarms on connect seconds", rpt->config.alarms_delay);
  490. rpt->config.rrdpush_enabled = appconfig_get_boolean(&stream_config, rpt->key, "default proxy enabled", rpt->config.rrdpush_enabled);
  491. rpt->config.rrdpush_enabled = appconfig_get_boolean(&stream_config, rpt->machine_guid, "proxy enabled", rpt->config.rrdpush_enabled);
  492. rpt->config.rrdpush_destination = appconfig_get(&stream_config, rpt->key, "default proxy destination", rpt->config.rrdpush_destination);
  493. rpt->config.rrdpush_destination = appconfig_get(&stream_config, rpt->machine_guid, "proxy destination", rpt->config.rrdpush_destination);
  494. rpt->config.rrdpush_api_key = appconfig_get(&stream_config, rpt->key, "default proxy api key", rpt->config.rrdpush_api_key);
  495. rpt->config.rrdpush_api_key = appconfig_get(&stream_config, rpt->machine_guid, "proxy api key", rpt->config.rrdpush_api_key);
  496. rpt->config.rrdpush_send_charts_matching = appconfig_get(&stream_config, rpt->key, "default proxy send charts matching", rpt->config.rrdpush_send_charts_matching);
  497. rpt->config.rrdpush_send_charts_matching = appconfig_get(&stream_config, rpt->machine_guid, "proxy send charts matching", rpt->config.rrdpush_send_charts_matching);
  498. rpt->config.rrdpush_enable_replication = appconfig_get_boolean(&stream_config, rpt->key, "enable replication", rpt->config.rrdpush_enable_replication);
  499. rpt->config.rrdpush_enable_replication = appconfig_get_boolean(&stream_config, rpt->machine_guid, "enable replication", rpt->config.rrdpush_enable_replication);
  500. rpt->config.rrdpush_seconds_to_replicate = appconfig_get_number(&stream_config, rpt->key, "seconds to replicate", rpt->config.rrdpush_seconds_to_replicate);
  501. rpt->config.rrdpush_seconds_to_replicate = appconfig_get_number(&stream_config, rpt->machine_guid, "seconds to replicate", rpt->config.rrdpush_seconds_to_replicate);
  502. rpt->config.rrdpush_replication_step = appconfig_get_number(&stream_config, rpt->key, "seconds per replication step", rpt->config.rrdpush_replication_step);
  503. rpt->config.rrdpush_replication_step = appconfig_get_number(&stream_config, rpt->machine_guid, "seconds per replication step", rpt->config.rrdpush_replication_step);
  504. #ifdef ENABLE_COMPRESSION
  505. rpt->config.rrdpush_compression = default_compression_enabled;
  506. rpt->config.rrdpush_compression = appconfig_get_boolean(&stream_config, rpt->key, "enable compression", rpt->config.rrdpush_compression);
  507. rpt->config.rrdpush_compression = appconfig_get_boolean(&stream_config, rpt->machine_guid, "enable compression", rpt->config.rrdpush_compression);
  508. rpt->rrdpush_compression = (rpt->config.rrdpush_compression && default_compression_enabled);
  509. #endif //ENABLE_COMPRESSION
  510. (void)appconfig_set_default(&stream_config, rpt->machine_guid, "host tags", (rpt->tags)?rpt->tags:"");
  511. // find the host for this receiver
  512. {
  513. // this will also update the host with our system_info
  514. RRDHOST *host = rrdhost_find_or_create(
  515. rpt->hostname
  516. , rpt->registry_hostname
  517. , rpt->machine_guid
  518. , rpt->os
  519. , rpt->timezone
  520. , rpt->abbrev_timezone
  521. , rpt->utc_offset
  522. , rpt->tags
  523. , rpt->program_name
  524. , rpt->program_version
  525. , rpt->config.update_every
  526. , rpt->config.history
  527. , rpt->config.mode
  528. , (unsigned int)(rpt->config.health_enabled != CONFIG_BOOLEAN_NO)
  529. , (unsigned int)(rpt->config.rrdpush_enabled && rpt->config.rrdpush_destination && *rpt->config.rrdpush_destination && rpt->config.rrdpush_api_key && *rpt->config.rrdpush_api_key)
  530. , rpt->config.rrdpush_destination
  531. , rpt->config.rrdpush_api_key
  532. , rpt->config.rrdpush_send_charts_matching
  533. , rpt->config.rrdpush_enable_replication
  534. , rpt->config.rrdpush_seconds_to_replicate
  535. , rpt->config.rrdpush_replication_step
  536. , rpt->system_info
  537. , 0
  538. );
  539. if(!host) {
  540. rrdpush_receive_log_status(rpt, "failed to find/create host structure", "INTERNAL ERROR DROPPING CONNECTION");
  541. close(rpt->fd);
  542. return 1;
  543. }
  544. if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD))) {
  545. rrdpush_receive_log_status(rpt, "host is initializing", "INITIALIZATION IN PROGRESS RETRY LATER");
  546. close(rpt->fd);
  547. return 1;
  548. }
  549. // system_info has been consumed by the host structure
  550. rpt->system_info = NULL;
  551. if(!rrdhost_set_receiver(host, rpt)) {
  552. rrdpush_receive_log_status(rpt, "host is already served by another receiver", "DUPLICATE RECEIVER DROPPING CONNECTION");
  553. close(rpt->fd);
  554. return 1;
  555. }
  556. }
  557. #ifdef NETDATA_INTERNAL_CHECKS
  558. info("STREAM '%s' [receive from [%s]:%s]: "
  559. "client willing to stream metrics for host '%s' with machine_guid '%s': "
  560. "update every = %d, history = %ld, memory mode = %s, health %s,%s tags '%s'"
  561. , rpt->hostname
  562. , rpt->client_ip
  563. , rpt->client_port
  564. , rrdhost_hostname(rpt->host)
  565. , rpt->host->machine_guid
  566. , rpt->host->rrd_update_every
  567. , rpt->host->rrd_history_entries
  568. , rrd_memory_mode_name(rpt->host->rrd_memory_mode)
  569. , (rpt->config.health_enabled == CONFIG_BOOLEAN_NO)?"disabled":((rpt->config.health_enabled == CONFIG_BOOLEAN_YES)?"enabled":"auto")
  570. #ifdef ENABLE_HTTPS
  571. , (rpt->ssl.conn != NULL) ? " SSL," : ""
  572. #else
  573. , ""
  574. #endif
  575. , rrdhost_tags(rpt->host)
  576. );
  577. #endif // NETDATA_INTERNAL_CHECKS
  578. struct plugind cd = {
  579. .update_every = default_rrd_update_every,
  580. .unsafe = {
  581. .spinlock = NETDATA_SPINLOCK_INITIALIZER,
  582. .running = true,
  583. .enabled = true,
  584. },
  585. .started_t = now_realtime_sec(),
  586. };
  587. // put the client IP and port into the buffers used by plugins.d
  588. snprintfz(cd.id, CONFIG_MAX_NAME, "%s:%s", rpt->client_ip, rpt->client_port);
  589. snprintfz(cd.filename, FILENAME_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  590. snprintfz(cd.fullfilename, FILENAME_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  591. snprintfz(cd.cmd, PLUGINSD_CMD_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  592. #ifdef ENABLE_COMPRESSION
  593. if (stream_has_capability(rpt, STREAM_CAP_COMPRESSION)) {
  594. if (!rpt->rrdpush_compression)
  595. rpt->capabilities &= ~STREAM_CAP_COMPRESSION;
  596. }
  597. #endif
  598. {
  599. // info("STREAM %s [receive from [%s]:%s]: initializing communication...", rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port);
  600. char initial_response[HTTP_HEADER_SIZE];
  601. if (stream_has_capability(rpt, STREAM_CAP_VCAPS)) {
  602. log_receiver_capabilities(rpt);
  603. sprintf(initial_response, "%s%u", START_STREAMING_PROMPT_VN, rpt->capabilities);
  604. }
  605. else if (stream_has_capability(rpt, STREAM_CAP_VN)) {
  606. log_receiver_capabilities(rpt);
  607. sprintf(initial_response, "%s%d", START_STREAMING_PROMPT_VN, stream_capabilities_to_vn(rpt->capabilities));
  608. }
  609. else if (stream_has_capability(rpt, STREAM_CAP_V2)) {
  610. log_receiver_capabilities(rpt);
  611. sprintf(initial_response, "%s", START_STREAMING_PROMPT_V2);
  612. }
  613. else { // stream_has_capability(rpt, STREAM_CAP_V1)
  614. log_receiver_capabilities(rpt);
  615. sprintf(initial_response, "%s", START_STREAMING_PROMPT_V1);
  616. }
  617. debug(D_STREAM, "Initial response to %s: %s", rpt->client_ip, initial_response);
  618. if(send_timeout(
  619. #ifdef ENABLE_HTTPS
  620. &rpt->ssl,
  621. #endif
  622. rpt->fd, initial_response, strlen(initial_response), 0, 60) != (ssize_t)strlen(initial_response)) {
  623. rrdpush_receive_log_status(rpt, "cannot reply back", "CANT REPLY DROPPING CONNECTION");
  624. close(rpt->fd);
  625. return 0;
  626. }
  627. }
  628. {
  629. // remove the non-blocking flag from the socket
  630. if(sock_delnonblock(rpt->fd) < 0)
  631. error("STREAM '%s' [receive from [%s]:%s]: "
  632. "cannot remove the non-blocking flag from socket %d"
  633. , rrdhost_hostname(rpt->host)
  634. , rpt->client_ip, rpt->client_port
  635. , rpt->fd);
  636. struct timeval timeout;
  637. timeout.tv_sec = 600;
  638. timeout.tv_usec = 0;
  639. if (unlikely(setsockopt(rpt->fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof timeout) != 0))
  640. error("STREAM '%s' [receive from [%s]:%s]: "
  641. "cannot set timeout for socket %d"
  642. , rrdhost_hostname(rpt->host)
  643. , rpt->client_ip, rpt->client_port
  644. , rpt->fd);
  645. }
  646. rrdpush_receive_log_status(rpt, "ready to receive data", "CONNECTED");
  647. #ifdef ENABLE_ACLK
  648. // in case we have cloud connection we inform cloud
  649. // new child connected
  650. if (netdata_cloud_setting)
  651. aclk_host_state_update(rpt->host, 1);
  652. #endif
  653. rrdhost_set_is_parent_label(++localhost->connected_children_count);
  654. // let it reconnect to parent immediately
  655. rrdhost_reset_destinations(rpt->host);
  656. size_t count = streaming_parser(rpt, &cd, rpt->fd,
  657. #ifdef ENABLE_HTTPS
  658. (rpt->ssl.conn) ? &rpt->ssl : NULL
  659. #else
  660. NULL
  661. #endif
  662. );
  663. rrdhost_flag_set(rpt->host, RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED);
  664. if(!rpt->exit.reason)
  665. rpt->exit.reason = "PARSER EXIT";
  666. {
  667. char msg[100 + 1];
  668. snprintfz(msg, 100, "disconnected (completed %zu updates)", count);
  669. rrdpush_receive_log_status(rpt, msg, "DISCONNECTED");
  670. }
  671. #ifdef ENABLE_ACLK
  672. // in case we have cloud connection we inform cloud
  673. // a child disconnected
  674. if (netdata_cloud_setting)
  675. aclk_host_state_update(rpt->host, 0);
  676. #endif
  677. rrdhost_set_is_parent_label(--localhost->connected_children_count);
  678. // cleanup
  679. close(rpt->fd);
  680. return (int)count;
  681. }
  682. static void rrdpush_receiver_thread_cleanup(void *ptr) {
  683. struct receiver_state *rpt = (struct receiver_state *) ptr;
  684. worker_unregister();
  685. rrdhost_clear_receiver(rpt);
  686. info("STREAM '%s' [receive from [%s]:%s]: "
  687. "receive thread ended (task id %d)"
  688. , rpt->hostname ? rpt->hostname : "-"
  689. , rpt->client_ip ? rpt->client_ip : "-", rpt->client_port ? rpt->client_port : "-"
  690. , gettid());
  691. receiver_state_free(rpt);
  692. }
  693. void *rrdpush_receiver_thread(void *ptr) {
  694. netdata_thread_cleanup_push(rrdpush_receiver_thread_cleanup, ptr);
  695. worker_register("STREAMRCV");
  696. worker_register_job_custom_metric(WORKER_RECEIVER_JOB_BYTES_READ, "received bytes", "bytes/s", WORKER_METRIC_INCREMENT);
  697. worker_register_job_custom_metric(WORKER_RECEIVER_JOB_BYTES_UNCOMPRESSED, "uncompressed bytes", "bytes/s", WORKER_METRIC_INCREMENT);
  698. worker_register_job_custom_metric(WORKER_RECEIVER_JOB_REPLICATION_COMPLETION, "replication completion", "%", WORKER_METRIC_ABSOLUTE);
  699. struct receiver_state *rpt = (struct receiver_state *)ptr;
  700. info("STREAM %s [%s]:%s: receive thread created (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, gettid());
  701. rrdpush_receive(rpt);
  702. netdata_thread_cleanup_pop(1);
  703. return NULL;
  704. }