receiver.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrdpush.h"
  3. extern struct config stream_config;
  4. void destroy_receiver_state(struct receiver_state *rpt) {
  5. freez(rpt->key);
  6. freez(rpt->hostname);
  7. freez(rpt->registry_hostname);
  8. freez(rpt->machine_guid);
  9. freez(rpt->os);
  10. freez(rpt->timezone);
  11. freez(rpt->abbrev_timezone);
  12. freez(rpt->tags);
  13. freez(rpt->client_ip);
  14. freez(rpt->client_port);
  15. freez(rpt->program_name);
  16. freez(rpt->program_version);
  17. #ifdef ENABLE_HTTPS
  18. if(rpt->ssl.conn){
  19. SSL_free(rpt->ssl.conn);
  20. }
  21. #endif
  22. #ifdef ENABLE_COMPRESSION
  23. if (rpt->decompressor)
  24. rpt->decompressor->destroy(&rpt->decompressor);
  25. #endif
  26. freez(rpt);
  27. }
  28. static void rrdpush_receiver_thread_cleanup(void *ptr) {
  29. worker_unregister();
  30. static __thread int executed = 0;
  31. if(!executed) {
  32. executed = 1;
  33. struct receiver_state *rpt = (struct receiver_state *) ptr;
  34. // If the shutdown sequence has started, and this receiver is still attached to the host then we cannot touch
  35. // the host pointer as it is unpredictable when the RRDHOST is deleted. Do the cleanup from rrdhost_free().
  36. if (netdata_exit && rpt->host) {
  37. rpt->exited = 1;
  38. return;
  39. }
  40. // Make sure that we detach this thread and don't kill a freshly arriving receiver
  41. if (!netdata_exit && rpt->host) {
  42. netdata_mutex_lock(&rpt->host->receiver_lock);
  43. if (rpt->host->receiver == rpt)
  44. rpt->host->receiver = NULL;
  45. netdata_mutex_unlock(&rpt->host->receiver_lock);
  46. }
  47. info("STREAM %s [receive from [%s]:%s]: receive thread ended (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, gettid());
  48. destroy_receiver_state(rpt);
  49. }
  50. }
  51. #include "collectors/plugins.d/pluginsd_parser.h"
  52. PARSER_RC streaming_timestamp(char **words, void *user, PLUGINSD_ACTION *plugins_action)
  53. {
  54. UNUSED(plugins_action);
  55. char *remote_time_txt = words[1];
  56. time_t remote_time = 0;
  57. RRDHOST *host = ((PARSER_USER_OBJECT *)user)->host;
  58. struct plugind *cd = ((PARSER_USER_OBJECT *)user)->cd;
  59. if (cd->version < VERSION_GAP_FILLING ) {
  60. error("STREAM %s from %s: Child negotiated version %u but sent TIMESTAMP!", host->hostname, cd->cmd,
  61. cd->version);
  62. return PARSER_RC_OK; // Ignore error and continue stream
  63. }
  64. if (remote_time_txt && *remote_time_txt) {
  65. remote_time = str2ull(remote_time_txt);
  66. time_t now = now_realtime_sec(), prev = rrdhost_last_entry_t(host);
  67. time_t gap = 0;
  68. if (prev == 0)
  69. info(
  70. "STREAM %s from %s: Initial connection (no gap to check), "
  71. "remote=%"PRId64" local=%"PRId64" slew=%"PRId64"",
  72. host->hostname,
  73. cd->cmd,
  74. (int64_t)remote_time,
  75. (int64_t)now,
  76. (int64_t)now - remote_time);
  77. else {
  78. gap = now - prev;
  79. info(
  80. "STREAM %s from %s: Checking for gaps... "
  81. "remote=%"PRId64" local=%"PRId64"..%"PRId64" slew=%"PRId64" %"PRId64"-sec gap",
  82. host->hostname,
  83. cd->cmd,
  84. (int64_t)remote_time,
  85. (int64_t)prev,
  86. (int64_t)now,
  87. (int64_t)(remote_time - now),
  88. (int64_t)gap);
  89. }
  90. char message[128];
  91. sprintf(
  92. message,
  93. "REPLICATE %"PRId64" %"PRId64"\n",
  94. (int64_t)(remote_time - gap),
  95. (int64_t)remote_time);
  96. int ret;
  97. #ifdef ENABLE_HTTPS
  98. SSL *conn = host->stream_ssl.conn ;
  99. if(conn && !host->stream_ssl.flags) {
  100. ret = SSL_write(conn, message, strlen(message));
  101. } else {
  102. ret = send(host->receiver->fd, message, strlen(message), MSG_DONTWAIT);
  103. }
  104. #else
  105. ret = send(host->receiver->fd, message, strlen(message), MSG_DONTWAIT);
  106. #endif
  107. if (ret != (int)strlen(message))
  108. error("Failed to send initial timestamp - gaps may appear in charts");
  109. return PARSER_RC_OK;
  110. }
  111. return PARSER_RC_ERROR;
  112. }
  113. #define CLAIMED_ID_MIN_WORDS 3
  114. PARSER_RC streaming_claimed_id(char **words, void *user, PLUGINSD_ACTION *plugins_action)
  115. {
  116. UNUSED(plugins_action);
  117. int i;
  118. uuid_t uuid;
  119. RRDHOST *host = ((PARSER_USER_OBJECT *)user)->host;
  120. for (i = 0; words[i]; i++) ;
  121. if (i != CLAIMED_ID_MIN_WORDS) {
  122. error("Command CLAIMED_ID came malformed %d parameters are expected but %d received", CLAIMED_ID_MIN_WORDS - 1, i - 1);
  123. return PARSER_RC_ERROR;
  124. }
  125. // We don't need the parsed UUID
  126. // just do it to check the format
  127. if(uuid_parse(words[1], uuid)) {
  128. error("1st parameter (host GUID) to CLAIMED_ID command is not valid GUID. Received: \"%s\".", words[1]);
  129. return PARSER_RC_ERROR;
  130. }
  131. if(uuid_parse(words[2], uuid) && strcmp(words[2], "NULL")) {
  132. error("2nd parameter (Claim ID) to CLAIMED_ID command is not valid GUID. Received: \"%s\".", words[2]);
  133. return PARSER_RC_ERROR;
  134. }
  135. if(strcmp(words[1], host->machine_guid)) {
  136. error("Claim ID is for host \"%s\" but it came over connection for \"%s\"", words[1], host->machine_guid);
  137. return PARSER_RC_OK; //the message is OK problem must be somewhere else
  138. }
  139. rrdhost_aclk_state_lock(host);
  140. if (host->aclk_state.claimed_id)
  141. freez(host->aclk_state.claimed_id);
  142. host->aclk_state.claimed_id = strcmp(words[2], "NULL") ? strdupz(words[2]) : NULL;
  143. store_claim_id(&host->host_uuid, host->aclk_state.claimed_id ? &uuid : NULL);
  144. rrdhost_aclk_state_unlock(host);
  145. rrdpush_claimed_id(host);
  146. return PARSER_RC_OK;
  147. }
  148. #ifndef ENABLE_COMPRESSION
  149. /* The receiver socket is blocking, perform a single read into a buffer so that we can reassemble lines for parsing.
  150. */
  151. static int receiver_read(struct receiver_state *r, FILE *fp) {
  152. #ifdef ENABLE_HTTPS
  153. if (r->ssl.conn && !r->ssl.flags) {
  154. ERR_clear_error();
  155. int desired = sizeof(r->read_buffer) - r->read_len - 1;
  156. int ret = SSL_read(r->ssl.conn, r->read_buffer + r->read_len, desired);
  157. if (ret > 0 ) {
  158. r->read_len += ret;
  159. return 0;
  160. }
  161. // Don't treat SSL_ERROR_WANT_READ or SSL_ERROR_WANT_WRITE differently on blocking socket
  162. u_long err;
  163. char buf[256];
  164. while ((err = ERR_get_error()) != 0) {
  165. ERR_error_string_n(err, buf, sizeof(buf));
  166. error("STREAM %s [receive from %s] ssl error: %s", r->hostname, r->client_ip, buf);
  167. }
  168. return 1;
  169. }
  170. #endif
  171. if (!fgets(r->read_buffer, sizeof(r->read_buffer), fp))
  172. return 1;
  173. r->read_len = strlen(r->read_buffer);
  174. return 0;
  175. }
  176. #else
  177. /*
  178. * The receiver socket is blocking, perform a single read into a buffer so that we can reassemble lines for parsing.
  179. * if SSL encryption is on, then use SSL API for reading stream data.
  180. * Use line oriented fgets() in buffer from receiver_state is provided.
  181. * In other cases use fread to read binary data from socket.
  182. * Return zero on success and the number of bytes were read using pointer in the last argument.
  183. */
  184. static int read_stream(struct receiver_state *r, FILE *fp, char* buffer, size_t size, int* ret) {
  185. if (!ret)
  186. return 1;
  187. *ret = 0;
  188. #ifdef ENABLE_HTTPS
  189. if (r->ssl.conn && !r->ssl.flags) {
  190. ERR_clear_error();
  191. if (buffer != r->read_buffer + r->read_len) {
  192. *ret = SSL_read(r->ssl.conn, buffer, size);
  193. if (*ret > 0 )
  194. return 0;
  195. } else {
  196. // we need to receive data with LF to parse compression header
  197. size_t ofs = 0;
  198. int res = 0;
  199. errno = 0;
  200. while (ofs < size) {
  201. do {
  202. res = SSL_read(r->ssl.conn, buffer + ofs, 1);
  203. // When either SSL_ERROR_SYSCALL (OpenSSL < 3.0) or SSL_ERROR_SSL(OpenSSL > 3.0) happens,
  204. // the connection was lost https://www.openssl.org/docs/man3.0/man3/SSL_get_error.html,
  205. // without the test we will have an infinite loop https://github.com/netdata/netdata/issues/13092
  206. int local_ssl_err = SSL_get_error(r->ssl.conn, res);
  207. if (local_ssl_err == SSL_ERROR_SYSCALL || local_ssl_err == SSL_ERROR_SSL) {
  208. error("The SSL connection has error SSL_ERROR_SYSCALL(%d) and system is registering errno = %d",
  209. local_ssl_err, errno);
  210. return 1;
  211. }
  212. } while (res == 0);
  213. if (res < 0)
  214. break;
  215. if (buffer[ofs] == '\n')
  216. break;
  217. ofs += res;
  218. }
  219. if (res > 0) {
  220. ofs += res;
  221. *ret = ofs;
  222. buffer[ofs] = 0;
  223. return 0;
  224. }
  225. }
  226. // Don't treat SSL_ERROR_WANT_READ or SSL_ERROR_WANT_WRITE differently on blocking socket
  227. u_long err;
  228. char buf[256];
  229. while ((err = ERR_get_error()) != 0) {
  230. ERR_error_string_n(err, buf, sizeof(buf));
  231. error("STREAM %s [receive from %s] ssl error: %s", r->hostname, r->client_ip, buf);
  232. }
  233. return 1;
  234. }
  235. #endif
  236. if (buffer != r->read_buffer + r->read_len) {
  237. // read to external buffer
  238. *ret = fread(buffer, 1, size, fp);
  239. if (!*ret)
  240. return 1;
  241. } else {
  242. if (!fgets(r->read_buffer, sizeof(r->read_buffer), fp))
  243. return 1;
  244. *ret = strlen(r->read_buffer);
  245. }
  246. return 0;
  247. }
  248. /*
  249. * Get the next line of data for parsing.
  250. * Return data from the decompressor buffer if available.
  251. * Otherwise read next line from the socket and check for compression header.
  252. * Return the line was read If no compression header was found.
  253. * Otherwise read the entire block of compressed data, decompress it
  254. * and return it in receiver_state buffer.
  255. * Return zero on success.
  256. */
  257. static int receiver_read(struct receiver_state *r, FILE *fp) {
  258. // check any decompressed data present
  259. if (r->decompressor &&
  260. r->decompressor->decompressed_bytes_in_buffer(r->decompressor)) {
  261. size_t available = sizeof(r->read_buffer) - r->read_len;
  262. if (available) {
  263. size_t len = r->decompressor->get(r->decompressor,
  264. r->read_buffer + r->read_len, available);
  265. if (!len)
  266. return 1;
  267. r->read_len += len;
  268. }
  269. return 0;
  270. }
  271. int ret = 0;
  272. if (read_stream(r, fp, r->read_buffer + r->read_len, sizeof(r->read_buffer) - r->read_len - 1, &ret))
  273. return 1;
  274. if (!is_compressed_data(r->read_buffer, ret)) {
  275. r->read_len += ret;
  276. return 0;
  277. }
  278. if (unlikely(!r->decompressor))
  279. r->decompressor = create_decompressor();
  280. size_t bytes_to_read = r->decompressor->start(r->decompressor,
  281. r->read_buffer, ret);
  282. // Read the entire block of compressed data because
  283. // we're unable to decompress incomplete block
  284. char compressed[bytes_to_read];
  285. do {
  286. if (read_stream(r, fp, compressed, bytes_to_read, &ret))
  287. return 1;
  288. // Send input data to decompressor
  289. if (ret)
  290. r->decompressor->put(r->decompressor, compressed, ret);
  291. bytes_to_read -= ret;
  292. } while (bytes_to_read > 0);
  293. // Decompress
  294. size_t bytes_to_parse = r->decompressor->decompress(r->decompressor);
  295. if (!bytes_to_parse)
  296. return 1;
  297. // Fill read buffer with decompressed data
  298. r->read_len = r->decompressor->get(r->decompressor,
  299. r->read_buffer, sizeof(r->read_buffer));
  300. return 0;
  301. }
  302. #endif
  303. /* Produce a full line if one exists, statefully return where we start next time.
  304. * When we hit the end of the buffer with a partial line move it to the beginning for the next fill.
  305. */
  306. static char *receiver_next_line(struct receiver_state *r, int *pos) {
  307. int start = *pos, scan = *pos;
  308. if (scan >= r->read_len) {
  309. r->read_len = 0;
  310. return NULL;
  311. }
  312. while (scan < r->read_len && r->read_buffer[scan] != '\n')
  313. scan++;
  314. if (scan < r->read_len && r->read_buffer[scan] == '\n') {
  315. *pos = scan+1;
  316. r->read_buffer[scan] = 0;
  317. return &r->read_buffer[start];
  318. }
  319. memmove(r->read_buffer, &r->read_buffer[start], r->read_len - start);
  320. r->read_len -= start;
  321. return NULL;
  322. }
  323. static void streaming_parser_thread_cleanup(void *ptr) {
  324. PARSER *parser = (PARSER *)ptr;
  325. parser_destroy(parser);
  326. }
  327. size_t streaming_parser(struct receiver_state *rpt, struct plugind *cd, FILE *fp) {
  328. size_t result;
  329. PARSER_USER_OBJECT user = {
  330. .enabled = cd->enabled,
  331. .host = rpt->host,
  332. .opaque = rpt,
  333. .cd = cd,
  334. .trust_durations = 1
  335. };
  336. PARSER *parser = parser_init(rpt->host, &user, fp, PARSER_INPUT_SPLIT);
  337. // this keeps the parser with its current value
  338. // so, parser needs to be allocated before pushing it
  339. netdata_thread_cleanup_push(streaming_parser_thread_cleanup, parser);
  340. parser_add_keyword(parser, "TIMESTAMP", streaming_timestamp);
  341. parser_add_keyword(parser, "CLAIMED_ID", streaming_claimed_id);
  342. parser->plugins_action->begin_action = &pluginsd_begin_action;
  343. parser->plugins_action->flush_action = &pluginsd_flush_action;
  344. parser->plugins_action->end_action = &pluginsd_end_action;
  345. parser->plugins_action->disable_action = &pluginsd_disable_action;
  346. parser->plugins_action->variable_action = &pluginsd_variable_action;
  347. parser->plugins_action->dimension_action = &pluginsd_dimension_action;
  348. parser->plugins_action->label_action = &pluginsd_label_action;
  349. parser->plugins_action->overwrite_action = &pluginsd_overwrite_action;
  350. parser->plugins_action->chart_action = &pluginsd_chart_action;
  351. parser->plugins_action->set_action = &pluginsd_set_action;
  352. parser->plugins_action->clabel_commit_action = &pluginsd_clabel_commit_action;
  353. parser->plugins_action->clabel_action = &pluginsd_clabel_action;
  354. user.parser = parser;
  355. #ifdef ENABLE_COMPRESSION
  356. if (rpt->decompressor)
  357. rpt->decompressor->reset(rpt->decompressor);
  358. #endif
  359. do{
  360. if (receiver_read(rpt, fp))
  361. break;
  362. int pos = 0;
  363. char *line;
  364. while ((line = receiver_next_line(rpt, &pos))) {
  365. if (unlikely(netdata_exit || rpt->shutdown || parser_action(parser, line)))
  366. goto done;
  367. }
  368. rpt->last_msg_t = now_realtime_sec();
  369. }
  370. while(!netdata_exit);
  371. done:
  372. result = user.count;
  373. // free parser with the pop function
  374. netdata_thread_cleanup_pop(1);
  375. return result;
  376. }
  377. static int rrdpush_receive(struct receiver_state *rpt)
  378. {
  379. int history = default_rrd_history_entries;
  380. RRD_MEMORY_MODE mode = default_rrd_memory_mode;
  381. int health_enabled = default_health_enabled;
  382. int rrdpush_enabled = default_rrdpush_enabled;
  383. char *rrdpush_destination = default_rrdpush_destination;
  384. char *rrdpush_api_key = default_rrdpush_api_key;
  385. char *rrdpush_send_charts_matching = default_rrdpush_send_charts_matching;
  386. time_t alarms_delay = 60;
  387. rpt->update_every = (int)appconfig_get_number(&stream_config, rpt->machine_guid, "update every", rpt->update_every);
  388. if(rpt->update_every < 0) rpt->update_every = 1;
  389. history = (int)appconfig_get_number(&stream_config, rpt->key, "default history", history);
  390. history = (int)appconfig_get_number(&stream_config, rpt->machine_guid, "history", history);
  391. if(history < 5) history = 5;
  392. mode = rrd_memory_mode_id(appconfig_get(&stream_config, rpt->key, "default memory mode", rrd_memory_mode_name(mode)));
  393. mode = rrd_memory_mode_id(appconfig_get(&stream_config, rpt->machine_guid, "memory mode", rrd_memory_mode_name(mode)));
  394. #ifndef ENABLE_DBENGINE
  395. if (unlikely(mode == RRD_MEMORY_MODE_DBENGINE)) {
  396. close(rpt->fd);
  397. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->machine_guid, rpt->hostname, "REJECTED -- DBENGINE MEMORY MODE NOT SUPPORTED");
  398. return 1;
  399. }
  400. #endif
  401. health_enabled = appconfig_get_boolean_ondemand(&stream_config, rpt->key, "health enabled by default", health_enabled);
  402. health_enabled = appconfig_get_boolean_ondemand(&stream_config, rpt->machine_guid, "health enabled", health_enabled);
  403. alarms_delay = appconfig_get_number(&stream_config, rpt->key, "default postpone alarms on connect seconds", alarms_delay);
  404. alarms_delay = appconfig_get_number(&stream_config, rpt->machine_guid, "postpone alarms on connect seconds", alarms_delay);
  405. rrdpush_enabled = appconfig_get_boolean(&stream_config, rpt->key, "default proxy enabled", rrdpush_enabled);
  406. rrdpush_enabled = appconfig_get_boolean(&stream_config, rpt->machine_guid, "proxy enabled", rrdpush_enabled);
  407. rrdpush_destination = appconfig_get(&stream_config, rpt->key, "default proxy destination", rrdpush_destination);
  408. rrdpush_destination = appconfig_get(&stream_config, rpt->machine_guid, "proxy destination", rrdpush_destination);
  409. rrdpush_api_key = appconfig_get(&stream_config, rpt->key, "default proxy api key", rrdpush_api_key);
  410. rrdpush_api_key = appconfig_get(&stream_config, rpt->machine_guid, "proxy api key", rrdpush_api_key);
  411. rrdpush_send_charts_matching = appconfig_get(&stream_config, rpt->key, "default proxy send charts matching", rrdpush_send_charts_matching);
  412. rrdpush_send_charts_matching = appconfig_get(&stream_config, rpt->machine_guid, "proxy send charts matching", rrdpush_send_charts_matching);
  413. #ifdef ENABLE_COMPRESSION
  414. unsigned int rrdpush_compression = default_compression_enabled;
  415. rrdpush_compression = appconfig_get_boolean(&stream_config, rpt->key, "enable compression", rrdpush_compression);
  416. rrdpush_compression = appconfig_get_boolean(&stream_config, rpt->machine_guid, "enable compression", rrdpush_compression);
  417. rpt->rrdpush_compression = (rrdpush_compression && default_compression_enabled);
  418. #endif //ENABLE_COMPRESSION
  419. (void)appconfig_set_default(&stream_config, rpt->machine_guid, "host tags", (rpt->tags)?rpt->tags:"");
  420. if (strcmp(rpt->machine_guid, localhost->machine_guid) == 0) {
  421. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->machine_guid, rpt->hostname, "DENIED - ATTEMPT TO RECEIVE METRICS FROM MACHINE_GUID IDENTICAL TO PARENT");
  422. error("STREAM %s [receive from %s:%s]: denied to receive metrics, machine GUID [%s] is my own. Did you copy the parent/proxy machine GUID to a child, or is this an inter-agent loop?", rpt->hostname, rpt->client_ip, rpt->client_port, rpt->machine_guid);
  423. char initial_response[HTTP_HEADER_SIZE + 1];
  424. snprintfz(initial_response, HTTP_HEADER_SIZE, "%s", START_STREAMING_ERROR_SAME_LOCALHOST);
  425. #ifdef ENABLE_HTTPS
  426. rpt->host->stream_ssl.conn = rpt->ssl.conn;
  427. rpt->host->stream_ssl.flags = rpt->ssl.flags;
  428. if(send_timeout(&rpt->ssl, rpt->fd, initial_response, strlen(initial_response), 0, 60) != (ssize_t)strlen(initial_response)) {
  429. #else
  430. if(send_timeout(rpt->fd, initial_response, strlen(initial_response), 0, 60) != strlen(initial_response)) {
  431. #endif
  432. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->host->hostname, "FAILED - CANNOT REPLY");
  433. error("STREAM %s [receive from [%s]:%s]: cannot send command.", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  434. close(rpt->fd);
  435. return 0;
  436. }
  437. close(rpt->fd);
  438. return 0;
  439. }
  440. if (rpt->host==NULL) {
  441. rpt->host = rrdhost_find_or_create(
  442. rpt->hostname
  443. , rpt->registry_hostname
  444. , rpt->machine_guid
  445. , rpt->os
  446. , rpt->timezone
  447. , rpt->abbrev_timezone
  448. , rpt->utc_offset
  449. , rpt->tags
  450. , rpt->program_name
  451. , rpt->program_version
  452. , rpt->update_every
  453. , history
  454. , mode
  455. , (unsigned int)(health_enabled != CONFIG_BOOLEAN_NO)
  456. , (unsigned int)(rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key)
  457. , rrdpush_destination
  458. , rrdpush_api_key
  459. , rrdpush_send_charts_matching
  460. , rpt->system_info
  461. , 0
  462. );
  463. if(!rpt->host) {
  464. close(rpt->fd);
  465. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->machine_guid, rpt->hostname, "FAILED - CANNOT ACQUIRE HOST");
  466. error("STREAM %s [receive from [%s]:%s]: failed to find/create host structure.", rpt->hostname, rpt->client_ip, rpt->client_port);
  467. return 1;
  468. }
  469. netdata_mutex_lock(&rpt->host->receiver_lock);
  470. if (rpt->host->receiver == NULL)
  471. rpt->host->receiver = rpt;
  472. else {
  473. error("Multiple receivers connected for %s concurrently, cancelling this one...", rpt->machine_guid);
  474. netdata_mutex_unlock(&rpt->host->receiver_lock);
  475. close(rpt->fd);
  476. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->machine_guid, rpt->hostname, "FAILED - BEATEN TO HOST CREATION");
  477. return 1;
  478. }
  479. netdata_mutex_unlock(&rpt->host->receiver_lock);
  480. }
  481. else {
  482. rrd_wrlock();
  483. rrdhost_update(
  484. rpt->host,
  485. rpt->hostname,
  486. rpt->registry_hostname,
  487. rpt->machine_guid,
  488. rpt->os,
  489. rpt->timezone,
  490. rpt->abbrev_timezone,
  491. rpt->utc_offset,
  492. rpt->tags,
  493. rpt->program_name,
  494. rpt->program_version,
  495. rpt->update_every,
  496. history,
  497. mode,
  498. (unsigned int)(health_enabled != CONFIG_BOOLEAN_NO),
  499. (unsigned int)(rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key),
  500. rrdpush_destination,
  501. rrdpush_api_key,
  502. rrdpush_send_charts_matching,
  503. rpt->system_info);
  504. rrd_unlock();
  505. }
  506. #ifdef NETDATA_INTERNAL_CHECKS
  507. int ssl = 0;
  508. #ifdef ENABLE_HTTPS
  509. if (rpt->ssl.conn != NULL)
  510. ssl = 1;
  511. #endif
  512. info("STREAM %s [receive from [%s]:%s]: client willing to stream metrics for host '%s' with machine_guid '%s': update every = %d, history = %ld, memory mode = %s, health %s,%s tags '%s'"
  513. , rpt->hostname
  514. , rpt->client_ip
  515. , rpt->client_port
  516. , rpt->host->hostname
  517. , rpt->host->machine_guid
  518. , rpt->host->rrd_update_every
  519. , rpt->host->rrd_history_entries
  520. , rrd_memory_mode_name(rpt->host->rrd_memory_mode)
  521. , (health_enabled == CONFIG_BOOLEAN_NO)?"disabled":((health_enabled == CONFIG_BOOLEAN_YES)?"enabled":"auto")
  522. , ssl ? " SSL," : ""
  523. , rpt->host->tags?rpt->host->tags:""
  524. );
  525. #endif // NETDATA_INTERNAL_CHECKS
  526. struct plugind cd = {
  527. .enabled = 1,
  528. .update_every = default_rrd_update_every,
  529. .pid = 0,
  530. .serial_failures = 0,
  531. .successful_collections = 0,
  532. .obsolete = 0,
  533. .started_t = now_realtime_sec(),
  534. .next = NULL,
  535. .version = 0,
  536. };
  537. // put the client IP and port into the buffers used by plugins.d
  538. snprintfz(cd.id, CONFIG_MAX_NAME, "%s:%s", rpt->client_ip, rpt->client_port);
  539. snprintfz(cd.filename, FILENAME_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  540. snprintfz(cd.fullfilename, FILENAME_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  541. snprintfz(cd.cmd, PLUGINSD_CMD_MAX, "%s:%s", rpt->client_ip, rpt->client_port);
  542. info("STREAM %s [receive from [%s]:%s]: initializing communication...", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  543. char initial_response[HTTP_HEADER_SIZE];
  544. if (rpt->stream_version > 1) {
  545. if(rpt->stream_version >= STREAM_VERSION_COMPRESSION){
  546. #ifdef ENABLE_COMPRESSION
  547. if(!rpt->rrdpush_compression)
  548. rpt->stream_version = STREAM_VERSION_CLABELS;
  549. #else
  550. if(STREAMING_PROTOCOL_CURRENT_VERSION < rpt->stream_version) {
  551. rpt->stream_version = STREAMING_PROTOCOL_CURRENT_VERSION;
  552. }
  553. #endif
  554. }
  555. info("STREAM %s [receive from [%s]:%s]: Netdata is using the stream version %u.", rpt->host->hostname, rpt->client_ip, rpt->client_port, rpt->stream_version);
  556. sprintf(initial_response, "%s%u", START_STREAMING_PROMPT_VN, rpt->stream_version);
  557. } else if (rpt->stream_version == 1) {
  558. info("STREAM %s [receive from [%s]:%s]: Netdata is using the stream version %u.", rpt->host->hostname, rpt->client_ip, rpt->client_port, rpt->stream_version);
  559. sprintf(initial_response, "%s", START_STREAMING_PROMPT_V2);
  560. } else {
  561. info("STREAM %s [receive from [%s]:%s]: Netdata is using first stream protocol.", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  562. sprintf(initial_response, "%s", START_STREAMING_PROMPT);
  563. }
  564. debug(D_STREAM, "Initial response to %s: %s", rpt->client_ip, initial_response);
  565. #ifdef ENABLE_HTTPS
  566. rpt->host->stream_ssl.conn = rpt->ssl.conn;
  567. rpt->host->stream_ssl.flags = rpt->ssl.flags;
  568. if(send_timeout(&rpt->ssl, rpt->fd, initial_response, strlen(initial_response), 0, 60) != (ssize_t)strlen(initial_response)) {
  569. #else
  570. if(send_timeout(rpt->fd, initial_response, strlen(initial_response), 0, 60) != strlen(initial_response)) {
  571. #endif
  572. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->host->hostname, "FAILED - CANNOT REPLY");
  573. error("STREAM %s [receive from [%s]:%s]: cannot send ready command.", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  574. close(rpt->fd);
  575. return 0;
  576. }
  577. // remove the non-blocking flag from the socket
  578. if(sock_delnonblock(rpt->fd) < 0)
  579. error("STREAM %s [receive from [%s]:%s]: cannot remove the non-blocking flag from socket %d", rpt->host->hostname, rpt->client_ip, rpt->client_port, rpt->fd);
  580. struct timeval timeout;
  581. timeout.tv_sec = 120;
  582. timeout.tv_usec = 0;
  583. if (unlikely(setsockopt(rpt->fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof timeout) != 0))
  584. error("STREAM %s [receive from [%s]:%s]: cannot set timeout for socket %d", rpt->host->hostname, rpt->client_ip, rpt->client_port, rpt->fd);
  585. // convert the socket to a FILE *
  586. FILE *fp = fdopen(rpt->fd, "r");
  587. if(!fp) {
  588. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->host->hostname, "FAILED - SOCKET ERROR");
  589. error("STREAM %s [receive from [%s]:%s]: failed to get a FILE for FD %d.", rpt->host->hostname, rpt->client_ip, rpt->client_port, rpt->fd);
  590. close(rpt->fd);
  591. return 0;
  592. }
  593. rrdhost_wrlock(rpt->host);
  594. /* if(rpt->host->connected_senders > 0) {
  595. rrdhost_unlock(rpt->host);
  596. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->host->hostname, "REJECTED - ALREADY CONNECTED");
  597. info("STREAM %s [receive from [%s]:%s]: multiple streaming connections for the same host detected. Rejecting new connection.", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  598. fclose(fp);
  599. return 0;
  600. }
  601. */
  602. // rpt->host->connected_senders++;
  603. if(rpt->stream_version > 0) {
  604. rrdhost_flag_set(rpt->host, RRDHOST_FLAG_STREAM_LABELS_UPDATE);
  605. rrdhost_flag_clear(rpt->host, RRDHOST_FLAG_STREAM_LABELS_STOP);
  606. }
  607. else {
  608. rrdhost_flag_set(rpt->host, RRDHOST_FLAG_STREAM_LABELS_STOP);
  609. rrdhost_flag_clear(rpt->host, RRDHOST_FLAG_STREAM_LABELS_UPDATE);
  610. }
  611. if(health_enabled != CONFIG_BOOLEAN_NO) {
  612. if(alarms_delay > 0) {
  613. rpt->host->health_delay_up_to = now_realtime_sec() + alarms_delay;
  614. info(
  615. "Postponing health checks for %" PRId64 " seconds, on host '%s', because it was just connected.",
  616. (int64_t)alarms_delay,
  617. rpt->host->hostname);
  618. }
  619. }
  620. rpt->host->senders_connect_time = now_realtime_sec();
  621. rpt->host->senders_last_chart_command = 0;
  622. rpt->host->trigger_chart_obsoletion_check = 1;
  623. rrdhost_unlock(rpt->host);
  624. // call the plugins.d processor to receive the metrics
  625. info("STREAM %s [receive from [%s]:%s]: receiving metrics...", rpt->host->hostname, rpt->client_ip, rpt->client_port);
  626. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->host->hostname, "CONNECTED");
  627. cd.version = rpt->stream_version;
  628. #ifdef ENABLE_ACLK
  629. // in case we have cloud connection we inform cloud
  630. // new child connected
  631. if (netdata_cloud_setting)
  632. aclk_host_state_update(rpt->host, 1);
  633. #endif
  634. rrdcontext_host_child_connected(rpt->host);
  635. size_t count = streaming_parser(rpt, &cd, fp);
  636. log_stream_connection(rpt->client_ip, rpt->client_port, rpt->key, rpt->host->machine_guid, rpt->hostname,
  637. "DISCONNECTED");
  638. error("STREAM %s [receive from [%s]:%s]: disconnected (completed %zu updates).", rpt->hostname, rpt->client_ip,
  639. rpt->client_port, count);
  640. rrdcontext_host_child_disconnected(rpt->host);
  641. #ifdef ENABLE_ACLK
  642. // in case we have cloud connection we inform cloud
  643. // new child connected
  644. if (netdata_cloud_setting)
  645. aclk_host_state_update(rpt->host, 0);
  646. #endif
  647. // During a shutdown there is cleanup code in rrdhost that will cancel the sender thread
  648. if (!netdata_exit && rpt->host) {
  649. rrd_rdlock();
  650. rrdhost_wrlock(rpt->host);
  651. netdata_mutex_lock(&rpt->host->receiver_lock);
  652. if (rpt->host->receiver == rpt) {
  653. rpt->host->senders_connect_time = 0;
  654. rpt->host->trigger_chart_obsoletion_check = 0;
  655. rpt->host->senders_disconnected_time = now_realtime_sec();
  656. rrdhost_flag_set(rpt->host, RRDHOST_FLAG_ORPHAN);
  657. if(health_enabled == CONFIG_BOOLEAN_AUTO)
  658. rpt->host->health_enabled = 0;
  659. }
  660. rrdhost_unlock(rpt->host);
  661. if (rpt->host->receiver == rpt) {
  662. rrdpush_sender_thread_stop(rpt->host);
  663. }
  664. netdata_mutex_unlock(&rpt->host->receiver_lock);
  665. rrd_unlock();
  666. }
  667. // cleanup
  668. fclose(fp);
  669. return (int)count;
  670. }
  671. void *rrdpush_receiver_thread(void *ptr) {
  672. netdata_thread_cleanup_push(rrdpush_receiver_thread_cleanup, ptr);
  673. struct receiver_state *rpt = (struct receiver_state *)ptr;
  674. info("STREAM %s [%s]:%s: receive thread created (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, gettid());
  675. worker_register("STREAMRCV");
  676. rrdpush_receive(rpt);
  677. worker_unregister();
  678. netdata_thread_cleanup_pop(1);
  679. return NULL;
  680. }