receiver.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrdpush.h"
  3. extern struct config stream_config;
  4. void destroy_receiver_state(struct receiver_state *rpt) {
  5. freez(rpt->key);
  6. freez(rpt->hostname);
  7. freez(rpt->registry_hostname);
  8. freez(rpt->machine_guid);
  9. freez(rpt->os);
  10. freez(rpt->timezone);
  11. freez(rpt->abbrev_timezone);
  12. freez(rpt->tags);
  13. freez(rpt->client_ip);
  14. freez(rpt->client_port);
  15. freez(rpt->program_name);
  16. freez(rpt->program_version);
  17. #ifdef ENABLE_HTTPS
  18. if(rpt->ssl.conn){
  19. SSL_free(rpt->ssl.conn);
  20. }
  21. #endif
  22. #ifdef ENABLE_COMPRESSION
  23. if (rpt->decompressor)
  24. rpt->decompressor->destroy(&rpt->decompressor);
  25. #endif
  26. freez(rpt);
  27. }
  28. static void rrdpush_receiver_thread_cleanup(void *ptr) {
  29. worker_unregister();
  30. static __thread int executed = 0;
  31. if(!executed) {
  32. executed = 1;
  33. struct receiver_state *rpt = (struct receiver_state *) ptr;
  34. // If the shutdown sequence has started, and this receiver is still attached to the host then we cannot touch
  35. // the host pointer as it is unpredictable when the RRDHOST is deleted. Do the cleanup from rrdhost_free().
  36. if (netdata_exit && rpt->host) {
  37. rpt->exited = 1;
  38. return;
  39. }
  40. // Make sure that we detach this thread and don't kill a freshly arriving receiver
  41. if (!netdata_exit && rpt->host) {
  42. netdata_mutex_lock(&rpt->host->receiver_lock);
  43. if (rpt->host->receiver == rpt)
  44. rpt->host->receiver = NULL;
  45. netdata_mutex_unlock(&rpt->host->receiver_lock);
  46. }
  47. info("STREAM %s [receive from [%s]:%s]: receive thread ended (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, gettid());
  48. destroy_receiver_state(rpt);
  49. }
  50. }
  51. #include "collectors/plugins.d/pluginsd_parser.h"
  52. PARSER_RC streaming_timestamp(char **words, void *user, PLUGINSD_ACTION *plugins_action)
  53. {
  54. UNUSED(plugins_action);
  55. char *remote_time_txt = words[1];
  56. time_t remote_time = 0;
  57. RRDHOST *host = ((PARSER_USER_OBJECT *)user)->host;
  58. struct plugind *cd = ((PARSER_USER_OBJECT *)user)->cd;
  59. if (cd->version < VERSION_GAP_FILLING ) {
  60. error("STREAM %s from %s: Child negotiated version %u but sent TIMESTAMP!", host->hostname, cd->cmd,
  61. cd->version);
  62. return PARSER_RC_OK; // Ignore error and continue stream
  63. }
  64. if (remote_time_txt && *remote_time_txt) {
  65. remote_time = str2ull(remote_time_txt);
  66. time_t now = now_realtime_sec(), prev = rrdhost_last_entry_t(host);
  67. time_t gap = 0;
  68. if (prev == 0)
  69. info(
  70. "STREAM %s from %s: Initial connection (no gap to check), "
  71. "remote=%"PRId64" local=%"PRId64" slew=%"PRId64"",
  72. host->hostname,
  73. cd->cmd,
  74. (int64_t)remote_time,
  75. (int64_t)now,
  76. (int64_t)now - remote_time);
  77. else {
  78. gap = now - prev;
  79. info(
  80. "STREAM %s from %s: Checking for gaps... "
  81. "remote=%"PRId64" local=%"PRId64"..%"PRId64" slew=%"PRId64" %"PRId64"-sec gap",
  82. host->hostname,
  83. cd->cmd,
  84. (int64_t)remote_time,
  85. (int64_t)prev,
  86. (int64_t)now,
  87. (int64_t)(remote_time - now),
  88. (int64_t)gap);
  89. }
  90. char message[128];
  91. sprintf(
  92. message,
  93. "REPLICATE %"PRId64" %"PRId64"\n",
  94. (int64_t)(remote_time - gap),
  95. (int64_t)remote_time);
  96. int ret;
  97. #ifdef ENABLE_HTTPS
  98. SSL *conn = host->stream_ssl.conn ;
  99. if(conn && !host->stream_ssl.flags) {
  100. ret = SSL_write(conn, message, strlen(message));
  101. } else {
  102. ret = send(host->receiver->fd, message, strlen(message), MSG_DONTWAIT);
  103. }
  104. #else
  105. ret = send(host->receiver->fd, message, strlen(message), MSG_DONTWAIT);
  106. #endif
  107. if (ret != (int)strlen(message))
  108. error("Failed to send initial timestamp - gaps may appear in charts");
  109. return PARSER_RC_OK;
  110. }
  111. return PARSER_RC_ERROR;
  112. }
  113. #define CLAIMED_ID_MIN_WORDS 3
  114. PARSER_RC streaming_claimed_id(char **words, void *user, PLUGINSD_ACTION *plugins_action)
  115. {
  116. UNUSED(plugins_action);
  117. int i;
  118. uuid_t uuid;
  119. RRDHOST *host = ((PARSER_USER_OBJECT *)user)->host;
  120. for (i = 0; words[i]; i++) ;
  121. if (i != CLAIMED_ID_MIN_WORDS) {
  122. error("Command CLAIMED_ID came malformed %d parameters are expected but %d received", CLAIMED_ID_MIN_WORDS - 1, i - 1);
  123. return PARSER_RC_ERROR;
  124. }
  125. // We don't need the parsed UUID
  126. // just do it to check the format
  127. if(uuid_parse(words[1], uuid)) {
  128. error("1st parameter (host GUID) to CLAIMED_ID command is not valid GUID. Received: \"%s\".", words[1]);
  129. return PARSER_RC_ERROR;
  130. }
  131. if(uuid_parse(words[2], uuid) && strcmp(words[2], "NULL")) {
  132. error("2nd parameter (Claim ID) to CLAIMED_ID command is not valid GUID. Received: \"%s\".", words[2]);
  133. return PARSER_RC_ERROR;
  134. }
  135. if(strcmp(words[1], host->machine_guid)) {
  136. error("Claim ID is for host \"%s\" but it came over connection for \"%s\"", words[1], host->machine_guid);
  137. return PARSER_RC_OK; //the message is OK problem must be somewhere else
  138. }
  139. rrdhost_aclk_state_lock(host);
  140. if (host->aclk_state.claimed_id)
  141. freez(host->aclk_state.claimed_id);
  142. host->aclk_state.claimed_id = strcmp(words[2], "NULL") ? strdupz(words[2]) : NULL;
  143. store_claim_id(&host->host_uuid, host->aclk_state.claimed_id ? &uuid : NULL);
  144. rrdhost_aclk_state_unlock(host);
  145. rrdpush_claimed_id(host);
  146. return PARSER_RC_OK;
  147. }
  148. #ifndef ENABLE_COMPRESSION
  149. /* The receiver socket is blocking, perform a single read into a buffer so that we can reassemble lines for parsing.
  150. */
  151. static int receiver_read(struct receiver_state *r, FILE *fp) {
  152. #ifdef ENABLE_HTTPS
  153. if (r->ssl.conn && !r->ssl.flags) {
  154. ERR_clear_error();
  155. int desired = sizeof(r->read_buffer) - r->read_len - 1;
  156. int ret = SSL_read(r->ssl.conn, r->read_buffer + r->read_len, desired);
  157. if (ret > 0 ) {
  158. r->read_len += ret;
  159. return 0;
  160. }
  161. // Don't treat SSL_ERROR_WANT_READ or SSL_ERROR_WANT_WRITE differently on blocking socket
  162. u_long err;
  163. char buf[256];
  164. while ((err = ERR_get_error()) != 0) {
  165. ERR_error_string_n(err, buf, sizeof(buf));
  166. error("STREAM %s [receive from %s] ssl error: %s", r->hostname, r->client_ip, buf);
  167. }
  168. return 1;
  169. }
  170. #endif
  171. if (!fgets(r->read_buffer, sizeof(r->read_buffer), fp))
  172. return 1;
  173. r->read_len = strlen(r->read_buffer);
  174. return 0;
  175. }
  176. #else
  177. /*
  178. * The receiver socket is blocking, perform a single read into a buffer so that we can reassemble lines for parsing.
  179. * if SSL encryption is on, then use SSL API for reading stream data.
  180. * Use line oriented fgets() in buffer from receiver_state is provided.
  181. * In other cases use fread to read binary data from socket.
  182. * Return zero on success and the number of bytes were read using pointer in the last argument.
  183. */
  184. static int read_stream(struct receiver_state *r, FILE *fp, char* buffer, size_t size, int* ret) {
  185. if (!ret)
  186. return 1;
  187. *ret = 0;
  188. #ifdef ENABLE_HTTPS
  189. if (r->ssl.conn && !r->ssl.flags) {
  190. ERR_clear_error();
  191. if (buffer != r->read_buffer + r->read_len) {
  192. *ret = SSL_read(r->ssl.conn, buffer, size);
  193. if (*ret > 0 )
  194. return 0;
  195. } else {
  196. // we need to receive data with LF to parse compression header
  197. size_t ofs = 0;
  198. int res = 0;
  199. errno = 0;
  200. while (ofs < size) {
  201. do {
  202. res = SSL_read(r->ssl.conn, buffer + ofs, 1);
  203. // When either SSL_ERROR_SYSCALL (OpenSSL < 3.0) or SSL_ERROR_SSL(OpenSSL > 3.0) happens,
  204. // the connection was lost https://www.openssl.org/docs/man3.0/man3/SSL_get_error.html,
  205. // without the test we will have an infinite loop https://github.com/netdata/netdata/issues/13092
  206. int local_ssl_err = SSL_get_error(r->ssl.conn, res);
  207. if (local_ssl_err == SSL_ERROR_SYSCALL || local_ssl_err == SSL_ERROR_SSL) {
  208. error("The SSL connection has error SSL_ERROR_SYSCALL(%d) and system is registering errno = %d",
  209. local_ssl_err, errno);
  210. return 1;
  211. }
  212. } while (res == 0);
  213. if (res < 0)
  214. break;
  215. if (buffer[ofs] == '\n')
  216. break;
  217. ofs += res;
  218. }
  219. if (res > 0) {
  220. ofs += res;
  221. *ret = ofs;
  222. buffer[ofs] = 0;
  223. return 0;
  224. }
  225. }
  226. // Don't treat SSL_ERROR_WANT_READ or SSL_ERROR_WANT_WRITE differently on blocking socket
  227. u_long err;
  228. char buf[256];
  229. while ((err = ERR_get_error()) != 0) {
  230. ERR_error_string_n(err, buf, sizeof(buf));
  231. error("STREAM %s [receive from %s] ssl error: %s", r->hostname, r->client_ip, buf);
  232. }
  233. return 1;
  234. }
  235. #endif
  236. if (buffer != r->read_buffer + r->read_len) {
  237. // read to external buffer
  238. *ret = fread(buffer, 1, size, fp);
  239. if (!*ret)
  240. return 1;
  241. } else {
  242. if (!fgets(r->read_buffer, sizeof(r->read_buffer), fp))
  243. return 1;
  244. *ret = strlen(r->read_buffer);
  245. }
  246. return 0;
  247. }
  248. /*
  249. * Get the next line of data for parsing.
  250. * Return data from the decompressor buffer if available.
  251. * Otherwise read next line from the socket and check for compression header.
  252. * Return the line was read If no compression header was found.
  253. * Otherwise read the entire block of compressed data, decompress it
  254. * and return it in receiver_state buffer.
  255. * Return zero on success.
  256. */
  257. static int receiver_read(struct receiver_state *r, FILE *fp) {
  258. // check any decompressed data present
  259. if (r->decompressor &&
  260. r->decompressor->decompressed_bytes_in_buffer(r->decompressor)) {
  261. size_t available = sizeof(r->read_buffer) - r->read_len;
  262. if (available) {
  263. size_t len = r->decompressor->get(r->decompressor,
  264. r->read_buffer + r->read_len, available);
  265. if (!len)
  266. return 1;
  267. r->read_len += len;
  268. }
  269. return 0;
  270. }
  271. int ret = 0;
  272. if (read_stream(r, fp, r->read_buffer + r->read_len, sizeof(r->read_buffer) - r->read_len - 1, &ret))
  273. return 1;
  274. if (!is_compressed_data(r->read_buffer, ret)) {
  275. r->read_len += ret;
  276. return 0;
  277. }
  278. if (unlikely(!r->decompressor))
  279. r->decompressor = create_decompressor();
  280. size_t bytes_to_read = r->decompressor->start(r->decompressor,
  281. r->read_buffer, ret);
  282. // Read the entire block of compressed data because
  283. // we're unable to decompress incomplete block
  284. char compressed[bytes_to_read];
  285. do {
  286. if (read_stream(r, fp, compressed, bytes_to_read, &ret))
  287. return 1;
  288. // Send input data to decompressor
  289. if (ret)
  290. r->decompressor->put(r->decompressor, compressed, ret);
  291. bytes_to_read -= ret;
  292. } while (bytes_to_read > 0);
  293. // Decompress
  294. size_t bytes_to_parse = r->decompressor->decompress(r->decompressor);
  295. if (!bytes_to_parse)
  296. return 1;
  297. // Fill read buffer with decompressed data
  298. r->read_len = r->decompressor->get(r->decompressor,
  299. r->read_buffer, sizeof(r->read_buffer));
  300. return 0;
  301. }
  302. #endif
  303. /* Produce a full line if one exists, statefully return where we start next time.
  304. * When we hit the end of the buffer with a partial line move it to the beginning for the next fill.
  305. */
  306. static char *receiver_next_line(struct receiver_state *r, int *pos) {
  307. int start = *pos, scan = *pos;
  308. if (scan >= r->read_len) {
  309. r->read_len = 0;
  310. return NULL;
  311. }
  312. while (scan < r->read_len && r->read_buffer[scan] != '\n')
  313. scan++;
  314. if (scan < r->read_len && r->read_buffer[scan] == '\n') {
  315. *pos = scan+1;
  316. r->read_buffer[scan] = 0;
  317. return &r->read_buffer[start];
  318. }
  319. memmove(r->read_buffer, &r->read_buffer[start], r->read_len - start);
  320. r->read_len -= start;
  321. return NULL;
  322. }
  323. static void streaming_parser_thread_cleanup(void *ptr) {
  324. PARSER *parser = (PARSER *)ptr;
  325. parser_destroy(parser);
  326. }
  327. size_t streaming_parser(struct receiver_state *rpt, struct plugind *cd, FILE *fp) {
  328. size_t result;
  329. PARSER_USER_OBJECT user = {
  330. .enabled = cd->enabled,
  331. .host = rpt->host,
  332. .opaque = rpt,
  333. .cd = cd,
  334. .trust_durations = 1
  335. };
  336. PARSER *parser = parser_init(rpt->host, &user, fp, PARSER_INPUT_SPLIT);
  337. // this keeps the parser with its current value
  338. // so, parser needs to be allocated before pushing it
  339. netdata_thread_cleanup_push(streaming_parser_thread_cleanup, parser);
  340. parser_add_keyword(parser, "TIMESTAMP", streaming_timestamp);
  341. parser_add_keyword(parser, "CLAIMED_ID", streaming_claimed_id);
  342. parser->plugins_action->begin_action = &pluginsd_begin_action;
  343. parser->plugins_action->flush_action = &pluginsd_flush_action;
  344. parser->plugins_action->end_action = &pluginsd_end_action;
  345. parser->plugins_action->disable_action = &pluginsd_disable_action;
  346. parser->plugins_action->variable_action = &pluginsd_variable_action;
  347. parser->plugins_action->dimension_action = &pluginsd_dimension_action;
  348. parser->plugins_action->label_action = &pluginsd_label_action;
  349. parser->plugins_action->overwrite_action = &pluginsd_overwrite_action;
  350. parser->plugins_action->chart_action = &pluginsd_chart_action;
  351. parser->plugins_action->set_action = &pluginsd_set_action;
  352. parser->plugins_action->clabel_commit_action = &pluginsd_clabel_commit_action;
  353. parser->plugins_action->clabel_action = &pluginsd_clabel_action;
  354. user.parser = parser;
  355. #ifdef ENABLE_COMPRESSION
  356. if (rpt->decompressor)
  357. rpt->decompressor->reset(rpt->decompressor);
  358. #endif
  359. do{
  360. if (receiver_read(rpt, fp))
  361. break;
  362. int pos = 0;
  363. char *line;
  364. while ((line = receiver_next_line(rpt, &pos))) {
  365. if (unlikely(netdata_exit || rpt->shutdown || parser_action(parser, line)))
  366. goto done;
  367. }
  368. rpt->last_msg_t = now_realtime_sec();
  369. }
  370. while(!netdata_exit);
  371. done:
  372. result = user.count;
  373. // free parser with the pop function
  374. netdata_thread_cleanup_pop(1);
  375. return result;
  376. }
  377. static int rrdpush_receive(struct receiver_state *rpt)
  378. {
  379. int history = default_rrd_history_entries;
  380. RRD_MEMORY_MODE mode = default_rrd_memory_mode;
  381. int health_enabled = default_health_enabled;
  382. int rrdpush_enabled = default_rrdpush_enabled;
  383. char *rrdpush_destination = default_rrdpush_destination;
  384. char *rrdpush_api_key = default_rrdpush_api_key;
  385. char *rrdpush_send_charts_matching = default_rrdpush_send_charts_matching;
  386. time_t alarms_delay = 60;
  387. rpt->update_every = (int)appconfig_get_number(&stream_config, rpt->machine_guid, "update every", rpt->update_every);
  388. if(rpt->update_every < 0) rpt->update_every = 1;
  389. history = (int)appconfig_get_number(&stream_config, rpt->key, "default history", history);
  390. history = (int)appconfig_get_number(&stream_config, rpt->machine_guid, "history", history);
  391. if(history < 5) history = 5;
  392. mode = rrd_memory_mode_id(appconfig_get(&stream_config, rpt->key, "default memory mode", rrd_memory_mode_name(mode)));
  393. mode = rrd_memory_mode_id(appconfig_get(&stream_config, rpt->machine_guid, "memory mode", rrd_memory_mode_name(mode)));
  394. #ifndef ENABLE_DBENGINE
  395. if (unlikely(mode == RRD_MEMORY_MODE_DBENGINE)) {
  396. close(rpt->fd);
  397. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->machine_guid, rpt->hostname, "REJECTED -- DBENGINE MEMORY MODE NOT SUPPORTED");
  398. return 1;
  399. }
  400. #endif
  401. health_enabled = appconfig_get_boolean_ondemand(&stream_config, rpt->key, "health enabled by default", health_enabled);
  402. health_enabled = appconfig_get_boolean_ondemand(&stream_config, rpt->machine_guid, "health enabled", health_enabled);
  403. alarms_delay = appconfig_get_number(&stream_config, rpt->key, "default postpone alarms on connect seconds", alarms_delay);
  404. alarms_delay = appconfig_get_number(&stream_config, rpt->machine_guid, "postpone alarms on connect seconds", alarms_delay);
  405. rrdpush_enabled = appconfig_get_boolean(&stream_config, rpt->key, "default proxy enabled", rrdpush_enabled);
  406. rrdpush_enabled = appconfig_get_boolean(&stream_config, rpt->machine_guid, "proxy enabled", rrdpush_enabled);
  407. rrdpush_destination = appconfig_get(&stream_config, rpt->key, "default proxy destination", rrdpush_destination);
  408. rrdpush_destination = appconfig_get(&stream_config, rpt->machine_guid, "proxy destination", rrdpush_destination);
  409. rrdpush_api_key = appconfig_get(&stream_config, rpt->key, "default proxy api key", rrdpush_api_key);
  410. rrdpush_api_key = appconfig_get(&stream_config, rpt->machine_guid, "proxy api key", rrdpush_api_key);
  411. rrdpush_send_charts_matching = appconfig_get(&stream_config, rpt->key, "default proxy send charts matching", rrdpush_send_charts_matching);
  412. rrdpush_send_charts_matching = appconfig_get(&stream_config, rpt->machine_guid, "proxy send charts matching", rrdpush_send_charts_matching);
  413. #ifdef ENABLE_COMPRESSION
  414. unsigned int rrdpush_compression = default_compression_enabled;
  415. rrdpush_compression = appconfig_get_boolean(&stream_config, rpt->key, "enable compression", rrdpush_compression);
  416. rrdpush_compression = appconfig_get_boolean(&stream_config, rpt->machine_guid, "enable compression", rrdpush_compression);
  417. rpt->rrdpush_compression = (rrdpush_compression && default_compression_enabled);
  418. #endif //ENABLE_COMPRESSION
  419. (void)appconfig_set_default(&stream_config, rpt->machine_guid, "host tags", (rpt->tags)?rpt->tags:"");
  420. if (strcmp(rpt->machine_guid, localhost->machine_guid) == 0) {
  421. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->machine_guid, rpt->hostname, "DENIED - ATTEMPT TO RECEIVE METRICS FROM MACHINE_GUID IDENTICAL TO PARENT");
  422. error("STREAM %s [receive from %s:%s]: denied to receive metrics, machine GUID [%s] is my own. Did you copy the parent/proxy machine GUID to a child, or is this an inter-agent loop?", rpt->hostname, rpt->client_ip, rpt->client_port, rpt->machine_guid);
  423. char initial_response[HTTP_HEADER_SIZE + 1];
  424. snprintfz(initial_response, HTTP_HEADER_SIZE, "%s", START_STREAMING_ERROR_SAME_LOCALHOST);
  425. #ifdef ENABLE_HTTPS
  426. rpt->host->stream_ssl.conn = rpt->ssl.conn;
  427. rpt->host->stream_ssl.flags = rpt->ssl.flags;
  428. if(send_timeout(&rpt->ssl, rpt->fd, initial_response, strlen(initial_response), 0, 60) != (ssize_t)strlen(initial_response)) {
  429. #else
  430. if(send_timeout(rpt->fd, initial_response, strlen(initial_response), 0, 60) != strlen(initial_response)) {
  431. #endif
  432. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->host->hostname, "FAILED - CANNOT REPLY");
  433. error("STREAM %s [receive from [%s]:%s]: cannot send command.", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  434. close(rpt->fd);
  435. return 0;
  436. }
  437. close(rpt->fd);
  438. return 0;
  439. }
  440. if (rpt->host==NULL) {
  441. rpt->host = rrdhost_find_or_create(
  442. rpt->hostname
  443. , rpt->registry_hostname
  444. , rpt->machine_guid
  445. , rpt->os
  446. , rpt->timezone
  447. , rpt->abbrev_timezone
  448. , rpt->utc_offset
  449. , rpt->tags
  450. , rpt->program_name
  451. , rpt->program_version
  452. , rpt->update_every
  453. , history
  454. , mode
  455. , (unsigned int)(health_enabled != CONFIG_BOOLEAN_NO)
  456. , (unsigned int)(rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key)
  457. , rrdpush_destination
  458. , rrdpush_api_key
  459. , rrdpush_send_charts_matching
  460. , rpt->system_info
  461. );
  462. if(!rpt->host) {
  463. close(rpt->fd);
  464. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->machine_guid, rpt->hostname, "FAILED - CANNOT ACQUIRE HOST");
  465. error("STREAM %s [receive from [%s]:%s]: failed to find/create host structure.", rpt->hostname, rpt->client_ip, rpt->client_port);
  466. return 1;
  467. }
  468. netdata_mutex_lock(&rpt->host->receiver_lock);
  469. if (rpt->host->receiver == NULL)
  470. rpt->host->receiver = rpt;
  471. else {
  472. error("Multiple receivers connected for %s concurrently, cancelling this one...", rpt->machine_guid);
  473. netdata_mutex_unlock(&rpt->host->receiver_lock);
  474. close(rpt->fd);
  475. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->machine_guid, rpt->hostname, "FAILED - BEATEN TO HOST CREATION");
  476. return 1;
  477. }
  478. netdata_mutex_unlock(&rpt->host->receiver_lock);
  479. }
  480. else {
  481. rrd_wrlock();
  482. rrdhost_update(
  483. rpt->host,
  484. rpt->hostname,
  485. rpt->registry_hostname,
  486. rpt->machine_guid,
  487. rpt->os,
  488. rpt->timezone,
  489. rpt->abbrev_timezone,
  490. rpt->utc_offset,
  491. rpt->tags,
  492. rpt->program_name,
  493. rpt->program_version,
  494. rpt->update_every,
  495. history,
  496. mode,
  497. (unsigned int)(health_enabled != CONFIG_BOOLEAN_NO),
  498. (unsigned int)(rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key),
  499. rrdpush_destination,
  500. rrdpush_api_key,
  501. rrdpush_send_charts_matching,
  502. rpt->system_info);
  503. rrd_unlock();
  504. }
  505. #ifdef NETDATA_INTERNAL_CHECKS
  506. int ssl = 0;
  507. #ifdef ENABLE_HTTPS
  508. if (rpt->ssl.conn != NULL)
  509. ssl = 1;
  510. #endif
  511. info("STREAM %s [receive from [%s]:%s]: client willing to stream metrics for host '%s' with machine_guid '%s': update every = %d, history = %ld, memory mode = %s, health %s,%s tags '%s'"
  512. , rpt->hostname
  513. , rpt->client_ip
  514. , rpt->client_port
  515. , rpt->host->hostname
  516. , rpt->host->machine_guid
  517. , rpt->host->rrd_update_every
  518. , rpt->host->rrd_history_entries
  519. , rrd_memory_mode_name(rpt->host->rrd_memory_mode)
  520. , (health_enabled == CONFIG_BOOLEAN_NO)?"disabled":((health_enabled == CONFIG_BOOLEAN_YES)?"enabled":"auto")
  521. , ssl ? " SSL," : ""
  522. , rpt->host->tags?rpt->host->tags:""
  523. );
  524. #endif // NETDATA_INTERNAL_CHECKS
  525. struct plugind cd = {
  526. .enabled = 1,
  527. .update_every = default_rrd_update_every,
  528. .pid = 0,
  529. .serial_failures = 0,
  530. .successful_collections = 0,
  531. .obsolete = 0,
  532. .started_t = now_realtime_sec(),
  533. .next = NULL,
  534. .version = 0,
  535. };
  536. // put the client IP and port into the buffers used by plugins.d
  537. snprintfz(cd.id, CONFIG_MAX_NAME, "%s:%s", rpt->client_ip, rpt->client_port);
  538. snprintfz(cd.filename, FILENAME_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  539. snprintfz(cd.fullfilename, FILENAME_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  540. snprintfz(cd.cmd, PLUGINSD_CMD_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  541. info("STREAM %s [receive from [%s]:%s]: initializing communication...", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  542. char initial_response[HTTP_HEADER_SIZE];
  543. if (rpt->stream_version > 1) {
  544. if(rpt->stream_version >= STREAM_VERSION_COMPRESSION){
  545. #ifdef ENABLE_COMPRESSION
  546. if(!rpt->rrdpush_compression)
  547. rpt->stream_version = STREAM_VERSION_CLABELS;
  548. #else
  549. if(STREAMING_PROTOCOL_CURRENT_VERSION < rpt->stream_version) {
  550. rpt->stream_version = STREAMING_PROTOCOL_CURRENT_VERSION;
  551. }
  552. #endif
  553. }
  554. info("STREAM %s [receive from [%s]:%s]: Netdata is using the stream version %u.", rpt->host->hostname, rpt->client_ip, rpt->client_port, rpt->stream_version);
  555. sprintf(initial_response, "%s%u", START_STREAMING_PROMPT_VN, rpt->stream_version);
  556. } else if (rpt->stream_version == 1) {
  557. info("STREAM %s [receive from [%s]:%s]: Netdata is using the stream version %u.", rpt->host->hostname, rpt->client_ip, rpt->client_port, rpt->stream_version);
  558. sprintf(initial_response, "%s", START_STREAMING_PROMPT_V2);
  559. } else {
  560. info("STREAM %s [receive from [%s]:%s]: Netdata is using first stream protocol.", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  561. sprintf(initial_response, "%s", START_STREAMING_PROMPT);
  562. }
  563. debug(D_STREAM, "Initial response to %s: %s", rpt->client_ip, initial_response);
  564. #ifdef ENABLE_HTTPS
  565. rpt->host->stream_ssl.conn = rpt->ssl.conn;
  566. rpt->host->stream_ssl.flags = rpt->ssl.flags;
  567. if(send_timeout(&rpt->ssl, rpt->fd, initial_response, strlen(initial_response), 0, 60) != (ssize_t)strlen(initial_response)) {
  568. #else
  569. if(send_timeout(rpt->fd, initial_response, strlen(initial_response), 0, 60) != strlen(initial_response)) {
  570. #endif
  571. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->host->hostname, "FAILED - CANNOT REPLY");
  572. error("STREAM %s [receive from [%s]:%s]: cannot send ready command.", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  573. close(rpt->fd);
  574. return 0;
  575. }
  576. // remove the non-blocking flag from the socket
  577. if(sock_delnonblock(rpt->fd) < 0)
  578. error("STREAM %s [receive from [%s]:%s]: cannot remove the non-blocking flag from socket %d", rpt->host->hostname, rpt->client_ip, rpt->client_port, rpt->fd);
  579. struct timeval timeout;
  580. timeout.tv_sec = 120;
  581. timeout.tv_usec = 0;
  582. if (unlikely(setsockopt(rpt->fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof timeout) != 0))
  583. error("STREAM %s [receive from [%s]:%s]: cannot set timeout for socket %d", rpt->host->hostname, rpt->client_ip, rpt->client_port, rpt->fd);
  584. // convert the socket to a FILE *
  585. FILE *fp = fdopen(rpt->fd, "r");
  586. if(!fp) {
  587. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->host->hostname, "FAILED - SOCKET ERROR");
  588. error("STREAM %s [receive from [%s]:%s]: failed to get a FILE for FD %d.", rpt->host->hostname, rpt->client_ip, rpt->client_port, rpt->fd);
  589. close(rpt->fd);
  590. return 0;
  591. }
  592. rrdhost_wrlock(rpt->host);
  593. /* if(rpt->host->connected_senders > 0) {
  594. rrdhost_unlock(rpt->host);
  595. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->host->hostname, "REJECTED - ALREADY CONNECTED");
  596. info("STREAM %s [receive from [%s]:%s]: multiple streaming connections for the same host detected. Rejecting new connection.", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  597. fclose(fp);
  598. return 0;
  599. }
  600. */
  601. // rpt->host->connected_senders++;
  602. if(rpt->stream_version > 0) {
  603. rrdhost_flag_set(rpt->host, RRDHOST_FLAG_STREAM_LABELS_UPDATE);
  604. rrdhost_flag_clear(rpt->host, RRDHOST_FLAG_STREAM_LABELS_STOP);
  605. }
  606. else {
  607. rrdhost_flag_set(rpt->host, RRDHOST_FLAG_STREAM_LABELS_STOP);
  608. rrdhost_flag_clear(rpt->host, RRDHOST_FLAG_STREAM_LABELS_UPDATE);
  609. }
  610. if(health_enabled != CONFIG_BOOLEAN_NO) {
  611. if(alarms_delay > 0) {
  612. rpt->host->health_delay_up_to = now_realtime_sec() + alarms_delay;
  613. info(
  614. "Postponing health checks for %" PRId64 " seconds, on host '%s', because it was just connected.",
  615. (int64_t)alarms_delay,
  616. rpt->host->hostname);
  617. }
  618. }
  619. rpt->host->senders_connect_time = now_realtime_sec();
  620. rpt->host->senders_last_chart_command = 0;
  621. rpt->host->trigger_chart_obsoletion_check = 1;
  622. rrdhost_unlock(rpt->host);
  623. // call the plugins.d processor to receive the metrics
  624. info("STREAM %s [receive from [%s]:%s]: receiving metrics...", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  625. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->host->hostname, "CONNECTED");
  626. cd.version = rpt->stream_version;
  627. #ifdef ENABLE_ACLK
  628. // in case we have cloud connection we inform cloud
  629. // new child connected
  630. if (netdata_cloud_setting)
  631. aclk_host_state_update(rpt->host, 1);
  632. #endif
  633. rrdcontext_host_child_connected(rpt->host);
  634. size_t count = streaming_parser(rpt, &cd, fp);
  635. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->hostname,
  636. "DISCONNECTED");
  637. error("STREAM %s [receive from [%s]:%s]: disconnected (completed %zu updates).", rpt->hostname, rpt->client_ip,
  638. rpt->client_port, count);
  639. rrdcontext_host_child_disconnected(rpt->host);
  640. #ifdef ENABLE_ACLK
  641. // in case we have cloud connection we inform cloud
  642. // new child connected
  643. if (netdata_cloud_setting)
  644. aclk_host_state_update(rpt->host, 0);
  645. #endif
  646. // During a shutdown there is cleanup code in rrdhost that will cancel the sender thread
  647. if (!netdata_exit && rpt->host) {
  648. rrd_rdlock();
  649. rrdhost_wrlock(rpt->host);
  650. netdata_mutex_lock(&rpt->host->receiver_lock);
  651. if (rpt->host->receiver == rpt) {
  652. rpt->host->senders_connect_time = 0;
  653. rpt->host->trigger_chart_obsoletion_check = 0;
  654. rpt->host->senders_disconnected_time = now_realtime_sec();
  655. rrdhost_flag_set(rpt->host, RRDHOST_FLAG_ORPHAN);
  656. if(health_enabled == CONFIG_BOOLEAN_AUTO)
  657. rpt->host->health_enabled = 0;
  658. }
  659. rrdhost_unlock(rpt->host);
  660. if (rpt->host->receiver == rpt) {
  661. rrdpush_sender_thread_stop(rpt->host);
  662. }
  663. netdata_mutex_unlock(&rpt->host->receiver_lock);
  664. rrd_unlock();
  665. }
  666. // cleanup
  667. fclose(fp);
  668. return (int)count;
  669. }
  670. void *rrdpush_receiver_thread(void *ptr) {
  671. netdata_thread_cleanup_push(rrdpush_receiver_thread_cleanup, ptr);
  672. struct receiver_state *rpt = (struct receiver_state *)ptr;
  673. info("STREAM %s [%s]:%s: receive thread created (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, gettid());
  674. worker_register("STREAMRCV");
  675. rrdpush_receive(rpt);
  676. worker_unregister();
  677. netdata_thread_cleanup_pop(1);
  678. return NULL;
  679. }