rrdpush.c 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrdpush.h"
  3. /*
  4. * rrdpush
  5. *
  6. * 3 threads are involved for all stream operations
  7. *
  8. * 1. a random data collection thread, calling rrdset_done_push()
  9. * this is called for each chart.
  10. *
  11. * the output of this work is kept in a thread BUFFER
  12. * the sender thread is signalled via a pipe (in RRDHOST)
  13. *
  14. * 2. a sender thread running at the sending netdata
  15. * this is spawned automatically on the first chart to be pushed
  16. *
  17. * It tries to push the metrics to the remote netdata, as fast
  18. * as possible (i.e. immediately after they are collected).
  19. *
  20. * 3. a receiver thread, running at the receiving netdata
  21. * this is spawned automatically when the sender connects to
  22. * the receiver.
  23. *
  24. */
  25. struct config stream_config = {
  26. .first_section = NULL,
  27. .last_section = NULL,
  28. .mutex = NETDATA_MUTEX_INITIALIZER,
  29. .index = {
  30. .avl_tree = {
  31. .root = NULL,
  32. .compar = appconfig_section_compare
  33. },
  34. .rwlock = AVL_LOCK_INITIALIZER
  35. }
  36. };
  37. unsigned int default_rrdpush_enabled = 0;
  38. STREAM_CAPABILITIES globally_disabled_capabilities = STREAM_CAP_NONE;
  39. unsigned int default_rrdpush_compression_enabled = 1;
  40. char *default_rrdpush_destination = NULL;
  41. char *default_rrdpush_api_key = NULL;
  42. char *default_rrdpush_send_charts_matching = NULL;
  43. bool default_rrdpush_enable_replication = true;
  44. time_t default_rrdpush_seconds_to_replicate = 86400;
  45. time_t default_rrdpush_replication_step = 600;
  46. #ifdef ENABLE_HTTPS
  47. char *netdata_ssl_ca_path = NULL;
  48. char *netdata_ssl_ca_file = NULL;
  49. #endif
  50. static void load_stream_conf() {
  51. errno = 0;
  52. char *filename = strdupz_path_subpath(netdata_configured_user_config_dir, "stream.conf");
  53. if(!appconfig_load(&stream_config, filename, 0, NULL)) {
  54. nd_log_daemon(NDLP_NOTICE, "CONFIG: cannot load user config '%s'. Will try stock config.", filename);
  55. freez(filename);
  56. filename = strdupz_path_subpath(netdata_configured_stock_config_dir, "stream.conf");
  57. if(!appconfig_load(&stream_config, filename, 0, NULL))
  58. nd_log_daemon(NDLP_NOTICE, "CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename);
  59. }
  60. freez(filename);
  61. }
  62. bool rrdpush_receiver_needs_dbengine() {
  63. struct section *co;
  64. for(co = stream_config.first_section; co; co = co->next) {
  65. if(strcmp(co->name, "stream") == 0)
  66. continue; // the first section is not relevant
  67. char *s;
  68. s = appconfig_get_by_section(co, "enabled", NULL);
  69. if(!s || !appconfig_test_boolean_value(s))
  70. continue;
  71. s = appconfig_get_by_section(co, "default memory mode", NULL);
  72. if(s && strcmp(s, "dbengine") == 0)
  73. return true;
  74. s = appconfig_get_by_section(co, "memory mode", NULL);
  75. if(s && strcmp(s, "dbengine") == 0)
  76. return true;
  77. }
  78. return false;
  79. }
  80. int rrdpush_init() {
  81. // --------------------------------------------------------------------
  82. // load stream.conf
  83. load_stream_conf();
  84. default_rrdpush_enabled = (unsigned int)appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, "enabled", default_rrdpush_enabled);
  85. default_rrdpush_destination = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "destination", "");
  86. default_rrdpush_api_key = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "api key", "");
  87. default_rrdpush_send_charts_matching = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "send charts matching", "*");
  88. default_rrdpush_enable_replication = config_get_boolean(CONFIG_SECTION_DB, "enable replication", default_rrdpush_enable_replication);
  89. default_rrdpush_seconds_to_replicate = config_get_number(CONFIG_SECTION_DB, "seconds to replicate", default_rrdpush_seconds_to_replicate);
  90. default_rrdpush_replication_step = config_get_number(CONFIG_SECTION_DB, "seconds per replication step", default_rrdpush_replication_step);
  91. rrdhost_free_orphan_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup orphan hosts after secs", rrdhost_free_orphan_time_s);
  92. default_rrdpush_compression_enabled = (unsigned int)appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM,
  93. "enable compression", default_rrdpush_compression_enabled);
  94. rrdpush_compression_levels[COMPRESSION_ALGORITHM_BROTLI] = (int)appconfig_get_number(
  95. &stream_config, CONFIG_SECTION_STREAM, "brotli compression level",
  96. rrdpush_compression_levels[COMPRESSION_ALGORITHM_BROTLI]);
  97. rrdpush_compression_levels[COMPRESSION_ALGORITHM_ZSTD] = (int)appconfig_get_number(
  98. &stream_config, CONFIG_SECTION_STREAM, "zstd compression level",
  99. rrdpush_compression_levels[COMPRESSION_ALGORITHM_ZSTD]);
  100. rrdpush_compression_levels[COMPRESSION_ALGORITHM_LZ4] = (int)appconfig_get_number(
  101. &stream_config, CONFIG_SECTION_STREAM, "lz4 compression acceleration",
  102. rrdpush_compression_levels[COMPRESSION_ALGORITHM_LZ4]);
  103. rrdpush_compression_levels[COMPRESSION_ALGORITHM_GZIP] = (int)appconfig_get_number(
  104. &stream_config, CONFIG_SECTION_STREAM, "gzip compression level",
  105. rrdpush_compression_levels[COMPRESSION_ALGORITHM_GZIP]);
  106. if(default_rrdpush_enabled && (!default_rrdpush_destination || !*default_rrdpush_destination || !default_rrdpush_api_key || !*default_rrdpush_api_key)) {
  107. nd_log_daemon(NDLP_WARNING, "STREAM [send]: cannot enable sending thread - information is missing.");
  108. default_rrdpush_enabled = 0;
  109. }
  110. #ifdef ENABLE_HTTPS
  111. netdata_ssl_validate_certificate_sender = !appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, "ssl skip certificate verification", !netdata_ssl_validate_certificate);
  112. if(!netdata_ssl_validate_certificate_sender)
  113. nd_log_daemon(NDLP_NOTICE, "SSL: streaming senders will skip SSL certificates verification.");
  114. netdata_ssl_ca_path = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CApath", NULL);
  115. netdata_ssl_ca_file = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CAfile", NULL);
  116. #endif
  117. return default_rrdpush_enabled;
  118. }
  119. // data collection happens from multiple threads
  120. // each of these threads calls rrdset_done()
  121. // which in turn calls rrdset_done_push()
  122. // which uses this pipe to notify the streaming thread
  123. // that there are more data ready to be sent
  124. #define PIPE_READ 0
  125. #define PIPE_WRITE 1
  126. // to have the remote netdata re-sync the charts
  127. // to its current clock, we send for this many
  128. // iterations a BEGIN line without microseconds
  129. // this is for the first iterations of each chart
  130. unsigned int remote_clock_resync_iterations = 60;
  131. static inline bool should_send_chart_matching(RRDSET *st, RRDSET_FLAGS flags) {
  132. if(!(flags & RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED))
  133. return false;
  134. if(unlikely(!(flags & (RRDSET_FLAG_UPSTREAM_SEND | RRDSET_FLAG_UPSTREAM_IGNORE)))) {
  135. RRDHOST *host = st->rrdhost;
  136. if (flags & RRDSET_FLAG_ANOMALY_DETECTION) {
  137. if(ml_streaming_enabled())
  138. rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND);
  139. else
  140. rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE);
  141. }
  142. else if(simple_pattern_matches_string(host->rrdpush_send_charts_matching, st->id) ||
  143. simple_pattern_matches_string(host->rrdpush_send_charts_matching, st->name))
  144. rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND);
  145. else
  146. rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE);
  147. // get the flags again, to know how to respond
  148. flags = rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_SEND|RRDSET_FLAG_UPSTREAM_IGNORE);
  149. }
  150. return flags & RRDSET_FLAG_UPSTREAM_SEND;
  151. }
  152. int configured_as_parent() {
  153. struct section *section = NULL;
  154. int is_parent = 0;
  155. appconfig_wrlock(&stream_config);
  156. for (section = stream_config.first_section; section; section = section->next) {
  157. uuid_t uuid;
  158. if (uuid_parse(section->name, uuid) != -1 &&
  159. appconfig_get_boolean_by_section(section, "enabled", 0)) {
  160. is_parent = 1;
  161. break;
  162. }
  163. }
  164. appconfig_unlock(&stream_config);
  165. return is_parent;
  166. }
  167. // chart labels
  168. static int send_clabels_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) {
  169. BUFFER *wb = (BUFFER *)data;
  170. buffer_sprintf(wb, PLUGINSD_KEYWORD_CLABEL " \"%s\" \"%s\" %d\n", name, value, ls & ~(RRDLABEL_FLAG_INTERNAL));
  171. return 1;
  172. }
  173. static void rrdpush_send_clabels(BUFFER *wb, RRDSET *st) {
  174. if (st->rrdlabels) {
  175. if(rrdlabels_walkthrough_read(st->rrdlabels, send_clabels_callback, wb) > 0)
  176. buffer_sprintf(wb, PLUGINSD_KEYWORD_CLABEL_COMMIT "\n");
  177. }
  178. }
  179. // Send the current chart definition.
  180. // Assumes that collector thread has already called sender_start for mutex / buffer state.
  181. static inline bool rrdpush_send_chart_definition(BUFFER *wb, RRDSET *st) {
  182. uint32_t version = rrdset_metadata_version(st);
  183. RRDHOST *host = st->rrdhost;
  184. NUMBER_ENCODING integer_encoding = stream_has_capability(host->sender, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX;
  185. bool with_slots = stream_has_capability(host->sender, STREAM_CAP_SLOTS) ? true : false;
  186. bool replication_progress = false;
  187. // properly set the name for the remote end to parse it
  188. char *name = "";
  189. if(likely(st->name)) {
  190. if(unlikely(st->id != st->name)) {
  191. // they differ
  192. name = strchr(rrdset_name(st), '.');
  193. if(name)
  194. name++;
  195. else
  196. name = "";
  197. }
  198. }
  199. buffer_fast_strcat(wb, PLUGINSD_KEYWORD_CHART, sizeof(PLUGINSD_KEYWORD_CHART) - 1);
  200. if(with_slots) {
  201. buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2);
  202. buffer_print_uint64_encoded(wb, integer_encoding, st->rrdpush.sender.chart_slot);
  203. }
  204. // send the chart
  205. buffer_sprintf(
  206. wb
  207. , " \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" %d %d \"%s %s %s %s\" \"%s\" \"%s\"\n"
  208. , rrdset_id(st)
  209. , name
  210. , rrdset_title(st)
  211. , rrdset_units(st)
  212. , rrdset_family(st)
  213. , rrdset_context(st)
  214. , rrdset_type_name(st->chart_type)
  215. , st->priority
  216. , st->update_every
  217. , rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)?"obsolete":""
  218. , rrdset_flag_check(st, RRDSET_FLAG_DETAIL)?"detail":""
  219. , rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)?"store_first":""
  220. , rrdset_flag_check(st, RRDSET_FLAG_HIDDEN)?"hidden":""
  221. , rrdset_plugin_name(st)
  222. , rrdset_module_name(st)
  223. );
  224. // send the chart labels
  225. if (stream_has_capability(host->sender, STREAM_CAP_CLABELS))
  226. rrdpush_send_clabels(wb, st);
  227. // send the dimensions
  228. RRDDIM *rd;
  229. rrddim_foreach_read(rd, st) {
  230. buffer_fast_strcat(wb, PLUGINSD_KEYWORD_DIMENSION, sizeof(PLUGINSD_KEYWORD_DIMENSION) - 1);
  231. if(with_slots) {
  232. buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2);
  233. buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdpush.sender.dim_slot);
  234. }
  235. buffer_sprintf(
  236. wb
  237. , " \"%s\" \"%s\" \"%s\" %d %d \"%s %s %s\"\n"
  238. , rrddim_id(rd)
  239. , rrddim_name(rd)
  240. , rrd_algorithm_name(rd->algorithm)
  241. , rd->multiplier
  242. , rd->divisor
  243. , rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)?"obsolete":""
  244. , rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN)?"hidden":""
  245. , rrddim_option_check(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS)?"noreset":""
  246. );
  247. }
  248. rrddim_foreach_done(rd);
  249. // send the chart functions
  250. if(stream_has_capability(host->sender, STREAM_CAP_FUNCTIONS))
  251. rrd_chart_functions_expose_rrdpush(st, wb);
  252. // send the chart local custom variables
  253. rrdvar_print_to_streaming_custom_chart_variables(st, wb);
  254. if (stream_has_capability(host->sender, STREAM_CAP_REPLICATION)) {
  255. time_t db_first_time_t, db_last_time_t;
  256. time_t now = now_realtime_sec();
  257. rrdset_get_retention_of_tier_for_collected_chart(st, &db_first_time_t, &db_last_time_t, now, 0);
  258. buffer_sprintf(wb, PLUGINSD_KEYWORD_CHART_DEFINITION_END " %llu %llu %llu\n",
  259. (unsigned long long)db_first_time_t,
  260. (unsigned long long)db_last_time_t,
  261. (unsigned long long)now);
  262. if(!rrdset_flag_check(st, RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS)) {
  263. rrdset_flag_set(st, RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS);
  264. rrdset_flag_clear(st, RRDSET_FLAG_SENDER_REPLICATION_FINISHED);
  265. rrdhost_sender_replicating_charts_plus_one(st->rrdhost);
  266. }
  267. replication_progress = true;
  268. #ifdef NETDATA_LOG_REPLICATION_REQUESTS
  269. internal_error(true, "REPLAY: 'host:%s/chart:%s' replication starts",
  270. rrdhost_hostname(st->rrdhost), rrdset_id(st));
  271. #endif
  272. }
  273. sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_METADATA);
  274. // we can set the exposed flag, after we commit the buffer
  275. // because replication may pick it up prematurely
  276. rrddim_foreach_read(rd, st) {
  277. rrddim_metadata_exposed_upstream(rd, version);
  278. }
  279. rrddim_foreach_done(rd);
  280. rrdset_metadata_exposed_upstream(st, version);
  281. st->rrdpush.sender.resync_time_s = st->last_collected_time.tv_sec + (remote_clock_resync_iterations * st->update_every);
  282. return replication_progress;
  283. }
  284. // sends the current chart dimensions
  285. static void rrdpush_send_chart_metrics(BUFFER *wb, RRDSET *st, struct sender_state *s __maybe_unused, RRDSET_FLAGS flags) {
  286. buffer_fast_strcat(wb, "BEGIN \"", 7);
  287. buffer_fast_strcat(wb, rrdset_id(st), string_strlen(st->id));
  288. buffer_fast_strcat(wb, "\" ", 2);
  289. if(st->last_collected_time.tv_sec > st->rrdpush.sender.resync_time_s)
  290. buffer_print_uint64(wb, st->usec_since_last_update);
  291. else
  292. buffer_fast_strcat(wb, "0", 1);
  293. buffer_fast_strcat(wb, "\n", 1);
  294. RRDDIM *rd;
  295. rrddim_foreach_read(rd, st) {
  296. if(unlikely(!rrddim_check_updated(rd)))
  297. continue;
  298. if(likely(rrddim_check_upstream_exposed_collector(rd))) {
  299. buffer_fast_strcat(wb, "SET \"", 5);
  300. buffer_fast_strcat(wb, rrddim_id(rd), string_strlen(rd->id));
  301. buffer_fast_strcat(wb, "\" = ", 4);
  302. buffer_print_int64(wb, rd->collector.collected_value);
  303. buffer_fast_strcat(wb, "\n", 1);
  304. }
  305. else {
  306. internal_error(true, "STREAM: 'host:%s/chart:%s/dim:%s' flag 'exposed' is updated but not exposed",
  307. rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd));
  308. // we will include it in the next iteration
  309. rrddim_metadata_updated(rd);
  310. }
  311. }
  312. rrddim_foreach_done(rd);
  313. if(unlikely(flags & RRDSET_FLAG_UPSTREAM_SEND_VARIABLES))
  314. rrdvar_print_to_streaming_custom_chart_variables(st, wb);
  315. buffer_fast_strcat(wb, "END\n", 4);
  316. }
  317. static void rrdpush_sender_thread_spawn(RRDHOST *host);
  318. // Called from the internal collectors to mark a chart obsolete.
  319. bool rrdset_push_chart_definition_now(RRDSET *st) {
  320. RRDHOST *host = st->rrdhost;
  321. if(unlikely(!rrdhost_can_send_definitions_to_parent(host)
  322. || !should_send_chart_matching(st, rrdset_flag_get(st)))) {
  323. return false;
  324. }
  325. BUFFER *wb = sender_start(host->sender);
  326. rrdpush_send_chart_definition(wb, st);
  327. sender_thread_buffer_free();
  328. return true;
  329. }
  330. void rrdset_push_metrics_v1(RRDSET_STREAM_BUFFER *rsb, RRDSET *st) {
  331. RRDHOST *host = st->rrdhost;
  332. rrdpush_send_chart_metrics(rsb->wb, st, host->sender, rsb->rrdset_flags);
  333. }
  334. void rrddim_push_metrics_v2(RRDSET_STREAM_BUFFER *rsb, RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags) {
  335. if(!rsb->wb || !rsb->v2 || !netdata_double_isnumber(n) || !does_storage_number_exist(flags))
  336. return;
  337. bool with_slots = stream_has_capability(rsb, STREAM_CAP_SLOTS) ? true : false;
  338. NUMBER_ENCODING integer_encoding = stream_has_capability(rsb, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX;
  339. NUMBER_ENCODING doubles_encoding = stream_has_capability(rsb, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_DECIMAL;
  340. BUFFER *wb = rsb->wb;
  341. time_t point_end_time_s = (time_t)(point_end_time_ut / USEC_PER_SEC);
  342. if(unlikely(rsb->last_point_end_time_s != point_end_time_s)) {
  343. if(unlikely(rsb->begin_v2_added))
  344. buffer_fast_strcat(wb, PLUGINSD_KEYWORD_END_V2 "\n", sizeof(PLUGINSD_KEYWORD_END_V2) - 1 + 1);
  345. buffer_fast_strcat(wb, PLUGINSD_KEYWORD_BEGIN_V2, sizeof(PLUGINSD_KEYWORD_BEGIN_V2) - 1);
  346. if(with_slots) {
  347. buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2);
  348. buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdset->rrdpush.sender.chart_slot);
  349. }
  350. buffer_fast_strcat(wb, " '", 2);
  351. buffer_fast_strcat(wb, rrdset_id(rd->rrdset), string_strlen(rd->rrdset->id));
  352. buffer_fast_strcat(wb, "' ", 2);
  353. buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdset->update_every);
  354. buffer_fast_strcat(wb, " ", 1);
  355. buffer_print_uint64_encoded(wb, integer_encoding, point_end_time_s);
  356. buffer_fast_strcat(wb, " ", 1);
  357. if(point_end_time_s == rsb->wall_clock_time)
  358. buffer_fast_strcat(wb, "#", 1);
  359. else
  360. buffer_print_uint64_encoded(wb, integer_encoding, rsb->wall_clock_time);
  361. buffer_fast_strcat(wb, "\n", 1);
  362. rsb->last_point_end_time_s = point_end_time_s;
  363. rsb->begin_v2_added = true;
  364. }
  365. buffer_fast_strcat(wb, PLUGINSD_KEYWORD_SET_V2, sizeof(PLUGINSD_KEYWORD_SET_V2) - 1);
  366. if(with_slots) {
  367. buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2);
  368. buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdpush.sender.dim_slot);
  369. }
  370. buffer_fast_strcat(wb, " '", 2);
  371. buffer_fast_strcat(wb, rrddim_id(rd), string_strlen(rd->id));
  372. buffer_fast_strcat(wb, "' ", 2);
  373. buffer_print_int64_encoded(wb, integer_encoding, rd->collector.last_collected_value);
  374. buffer_fast_strcat(wb, " ", 1);
  375. if((NETDATA_DOUBLE)rd->collector.last_collected_value == n)
  376. buffer_fast_strcat(wb, "#", 1);
  377. else
  378. buffer_print_netdata_double_encoded(wb, doubles_encoding, n);
  379. buffer_fast_strcat(wb, " ", 1);
  380. buffer_print_sn_flags(wb, flags, true);
  381. buffer_fast_strcat(wb, "\n", 1);
  382. }
  383. void rrdset_push_metrics_finished(RRDSET_STREAM_BUFFER *rsb, RRDSET *st) {
  384. if(!rsb->wb)
  385. return;
  386. if(rsb->v2 && rsb->begin_v2_added) {
  387. if(unlikely(rsb->rrdset_flags & RRDSET_FLAG_UPSTREAM_SEND_VARIABLES))
  388. rrdvar_print_to_streaming_custom_chart_variables(st, rsb->wb);
  389. buffer_fast_strcat(rsb->wb, PLUGINSD_KEYWORD_END_V2 "\n", sizeof(PLUGINSD_KEYWORD_END_V2) - 1 + 1);
  390. }
  391. sender_commit(st->rrdhost->sender, rsb->wb, STREAM_TRAFFIC_TYPE_DATA);
  392. *rsb = (RRDSET_STREAM_BUFFER){ .wb = NULL, };
  393. }
  394. RRDSET_STREAM_BUFFER rrdset_push_metric_initialize(RRDSET *st, time_t wall_clock_time) {
  395. RRDHOST *host = st->rrdhost;
  396. // fetch the flags we need to check with one atomic operation
  397. RRDHOST_FLAGS host_flags = __atomic_load_n(&host->flags, __ATOMIC_SEQ_CST);
  398. // check if we are not connected
  399. if(unlikely(!(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS))) {
  400. if(unlikely(!(host_flags & (RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN | RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED))))
  401. rrdpush_sender_thread_spawn(host);
  402. if(unlikely(!(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS))) {
  403. rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS);
  404. nd_log_daemon(NDLP_NOTICE, "STREAM %s [send]: not ready - collected metrics are not sent to parent.", rrdhost_hostname(host));
  405. }
  406. return (RRDSET_STREAM_BUFFER) { .wb = NULL, };
  407. }
  408. else if(unlikely(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS)) {
  409. nd_log_daemon(NDLP_INFO, "STREAM %s [send]: sending metrics to parent...", rrdhost_hostname(host));
  410. rrdhost_flag_clear(host, RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS);
  411. }
  412. if(unlikely(host_flags & RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED)) {
  413. BUFFER *wb = sender_start(host->sender);
  414. rrd_global_functions_expose_rrdpush(host, wb, stream_has_capability(host->sender, STREAM_CAP_DYNCFG));
  415. sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_FUNCTIONS);
  416. }
  417. bool exposed_upstream = rrdset_check_upstream_exposed(st);
  418. RRDSET_FLAGS rrdset_flags = rrdset_flag_get(st);
  419. bool replication_in_progress = !(rrdset_flags & RRDSET_FLAG_SENDER_REPLICATION_FINISHED);
  420. if(unlikely((exposed_upstream && replication_in_progress) ||
  421. !should_send_chart_matching(st, rrdset_flags)))
  422. return (RRDSET_STREAM_BUFFER) { .wb = NULL, };
  423. if(unlikely(!exposed_upstream)) {
  424. BUFFER *wb = sender_start(host->sender);
  425. replication_in_progress = rrdpush_send_chart_definition(wb, st);
  426. }
  427. if(replication_in_progress)
  428. return (RRDSET_STREAM_BUFFER) { .wb = NULL, };
  429. return (RRDSET_STREAM_BUFFER) {
  430. .capabilities = host->sender->capabilities,
  431. .v2 = stream_has_capability(host->sender, STREAM_CAP_INTERPOLATED),
  432. .rrdset_flags = rrdset_flags,
  433. .wb = sender_start(host->sender),
  434. .wall_clock_time = wall_clock_time,
  435. };
  436. }
  437. // labels
  438. static int send_labels_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) {
  439. BUFFER *wb = (BUFFER *)data;
  440. buffer_sprintf(wb, "LABEL \"%s\" = %d \"%s\"\n", name, ls, value);
  441. return 1;
  442. }
  443. void rrdpush_send_host_labels(RRDHOST *host) {
  444. if(unlikely(!rrdhost_can_send_definitions_to_parent(host)
  445. || !stream_has_capability(host->sender, STREAM_CAP_HLABELS)))
  446. return;
  447. BUFFER *wb = sender_start(host->sender);
  448. rrdlabels_walkthrough_read(host->rrdlabels, send_labels_callback, wb);
  449. buffer_sprintf(wb, "OVERWRITE %s\n", "labels");
  450. sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_METADATA);
  451. sender_thread_buffer_free();
  452. }
  453. void rrdpush_send_global_functions(RRDHOST *host) {
  454. if(!stream_has_capability(host->sender, STREAM_CAP_FUNCTIONS))
  455. return;
  456. if(unlikely(!rrdhost_can_send_definitions_to_parent(host)))
  457. return;
  458. BUFFER *wb = sender_start(host->sender);
  459. rrd_global_functions_expose_rrdpush(host, wb, stream_has_capability(host->sender, STREAM_CAP_DYNCFG));
  460. sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_FUNCTIONS);
  461. sender_thread_buffer_free();
  462. }
  463. void rrdpush_send_claimed_id(RRDHOST *host) {
  464. if(!stream_has_capability(host->sender, STREAM_CAP_CLAIM))
  465. return;
  466. if(unlikely(!rrdhost_can_send_definitions_to_parent(host)))
  467. return;
  468. BUFFER *wb = sender_start(host->sender);
  469. rrdhost_aclk_state_lock(host);
  470. buffer_sprintf(wb, "CLAIMED_ID %s %s\n", host->machine_guid, (host->aclk_state.claimed_id ? host->aclk_state.claimed_id : "NULL") );
  471. rrdhost_aclk_state_unlock(host);
  472. sender_commit(host->sender, wb, STREAM_TRAFFIC_TYPE_METADATA);
  473. sender_thread_buffer_free();
  474. }
  475. int connect_to_one_of_destinations(
  476. RRDHOST *host,
  477. int default_port,
  478. struct timeval *timeout,
  479. size_t *reconnects_counter,
  480. char *connected_to,
  481. size_t connected_to_size,
  482. struct rrdpush_destinations **destination)
  483. {
  484. int sock = -1;
  485. for (struct rrdpush_destinations *d = host->destinations; d; d = d->next) {
  486. time_t now = now_realtime_sec();
  487. if(d->postpone_reconnection_until > now)
  488. continue;
  489. nd_log(NDLS_DAEMON, NDLP_DEBUG,
  490. "STREAM %s: connecting to '%s' (default port: %d)...",
  491. rrdhost_hostname(host), string2str(d->destination), default_port);
  492. if (reconnects_counter)
  493. *reconnects_counter += 1;
  494. d->since = now;
  495. d->attempts++;
  496. sock = connect_to_this(string2str(d->destination), default_port, timeout);
  497. if (sock != -1) {
  498. if (connected_to && connected_to_size)
  499. strncpyz(connected_to, string2str(d->destination), connected_to_size);
  500. *destination = d;
  501. // move the current item to the end of the list
  502. // without this, this destination will break the loop again and again
  503. // not advancing the destinations to find one that may work
  504. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(host->destinations, d, prev, next);
  505. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(host->destinations, d, prev, next);
  506. break;
  507. }
  508. }
  509. return sock;
  510. }
  511. struct destinations_init_tmp {
  512. RRDHOST *host;
  513. struct rrdpush_destinations *list;
  514. int count;
  515. };
  516. bool destinations_init_add_one(char *entry, void *data) {
  517. struct destinations_init_tmp *t = data;
  518. struct rrdpush_destinations *d = callocz(1, sizeof(struct rrdpush_destinations));
  519. char *colon_ssl = strstr(entry, ":SSL");
  520. if(colon_ssl) {
  521. *colon_ssl = '\0';
  522. d->ssl = true;
  523. }
  524. else
  525. d->ssl = false;
  526. d->destination = string_strdupz(entry);
  527. __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(struct rrdpush_destinations), __ATOMIC_RELAXED);
  528. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(t->list, d, prev, next);
  529. t->count++;
  530. nd_log_daemon(NDLP_INFO, "STREAM: added streaming destination No %d: '%s' to host '%s'", t->count, string2str(d->destination), rrdhost_hostname(t->host));
  531. return false; // we return false, so that we will get all defined destinations
  532. }
  533. void rrdpush_destinations_init(RRDHOST *host) {
  534. if(!host->rrdpush_send_destination) return;
  535. rrdpush_destinations_free(host);
  536. struct destinations_init_tmp t = {
  537. .host = host,
  538. .list = NULL,
  539. .count = 0,
  540. };
  541. foreach_entry_in_connection_string(host->rrdpush_send_destination, destinations_init_add_one, &t);
  542. host->destinations = t.list;
  543. }
  544. void rrdpush_destinations_free(RRDHOST *host) {
  545. while (host->destinations) {
  546. struct rrdpush_destinations *tmp = host->destinations;
  547. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(host->destinations, tmp, prev, next);
  548. string_freez(tmp->destination);
  549. freez(tmp);
  550. __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(struct rrdpush_destinations), __ATOMIC_RELAXED);
  551. }
  552. host->destinations = NULL;
  553. }
  554. // ----------------------------------------------------------------------------
  555. // rrdpush sender thread
  556. // Either the receiver lost the connection or the host is being destroyed.
  557. // The sender mutex guards thread creation, any spurious data is wiped on reconnection.
  558. void rrdpush_sender_thread_stop(RRDHOST *host, STREAM_HANDSHAKE reason, bool wait) {
  559. if (!host->sender)
  560. return;
  561. sender_lock(host->sender);
  562. if(rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN)) {
  563. host->sender->exit.shutdown = true;
  564. host->sender->exit.reason = reason;
  565. // signal it to cancel
  566. netdata_thread_cancel(host->rrdpush_sender_thread);
  567. }
  568. sender_unlock(host->sender);
  569. if(wait) {
  570. sender_lock(host->sender);
  571. while(host->sender->tid) {
  572. sender_unlock(host->sender);
  573. sleep_usec(10 * USEC_PER_MS);
  574. sender_lock(host->sender);
  575. }
  576. sender_unlock(host->sender);
  577. }
  578. }
  579. // ----------------------------------------------------------------------------
  580. // rrdpush receiver thread
  581. static void rrdpush_sender_thread_spawn(RRDHOST *host) {
  582. sender_lock(host->sender);
  583. if(!rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN)) {
  584. char tag[NETDATA_THREAD_TAG_MAX + 1];
  585. snprintfz(tag, NETDATA_THREAD_TAG_MAX, THREAD_TAG_STREAM_SENDER "[%s]", rrdhost_hostname(host));
  586. if(netdata_thread_create(&host->rrdpush_sender_thread, tag, NETDATA_THREAD_OPTION_DEFAULT, rrdpush_sender_thread, (void *) host->sender))
  587. nd_log_daemon(NDLP_ERR, "STREAM %s [send]: failed to create new thread for client.", rrdhost_hostname(host));
  588. else
  589. rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN);
  590. }
  591. sender_unlock(host->sender);
  592. }
  593. int rrdpush_receiver_permission_denied(struct web_client *w) {
  594. // we always respond with the same message and error code
  595. // to prevent an attacker from gaining info about the error
  596. buffer_flush(w->response.data);
  597. buffer_strcat(w->response.data, START_STREAMING_ERROR_NOT_PERMITTED);
  598. return HTTP_RESP_UNAUTHORIZED;
  599. }
  600. int rrdpush_receiver_too_busy_now(struct web_client *w) {
  601. // we always respond with the same message and error code
  602. // to prevent an attacker from gaining info about the error
  603. buffer_flush(w->response.data);
  604. buffer_strcat(w->response.data, START_STREAMING_ERROR_BUSY_TRY_LATER);
  605. return HTTP_RESP_SERVICE_UNAVAILABLE;
  606. }
  607. static void rrdpush_receiver_takeover_web_connection(struct web_client *w, struct receiver_state *rpt) {
  608. rpt->fd = w->ifd;
  609. #ifdef ENABLE_HTTPS
  610. rpt->ssl.conn = w->ssl.conn;
  611. rpt->ssl.state = w->ssl.state;
  612. w->ssl = NETDATA_SSL_UNSET_CONNECTION;
  613. #endif
  614. WEB_CLIENT_IS_DEAD(w);
  615. if(web_server_mode == WEB_SERVER_MODE_STATIC_THREADED) {
  616. web_client_flag_set(w, WEB_CLIENT_FLAG_DONT_CLOSE_SOCKET);
  617. }
  618. else {
  619. if(w->ifd == w->ofd)
  620. w->ifd = w->ofd = -1;
  621. else
  622. w->ifd = -1;
  623. }
  624. buffer_flush(w->response.data);
  625. }
  626. void *rrdpush_receiver_thread(void *ptr);
  627. int rrdpush_receiver_thread_spawn(struct web_client *w, char *decoded_query_string, void *h2o_ctx) {
  628. if(!service_running(ABILITY_STREAMING_CONNECTIONS))
  629. return rrdpush_receiver_too_busy_now(w);
  630. struct receiver_state *rpt = callocz(1, sizeof(*rpt));
  631. rpt->last_msg_t = now_monotonic_sec();
  632. rpt->hops = 1;
  633. rpt->capabilities = STREAM_CAP_INVALID;
  634. #ifdef ENABLE_H2O
  635. rpt->h2o_ctx = h2o_ctx;
  636. #endif
  637. __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_receivers, sizeof(*rpt), __ATOMIC_RELAXED);
  638. __atomic_add_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED);
  639. rpt->system_info = callocz(1, sizeof(struct rrdhost_system_info));
  640. rpt->system_info->hops = rpt->hops;
  641. rpt->fd = -1;
  642. rpt->client_ip = strdupz(w->client_ip);
  643. rpt->client_port = strdupz(w->client_port);
  644. #ifdef ENABLE_HTTPS
  645. rpt->ssl = NETDATA_SSL_UNSET_CONNECTION;
  646. #endif
  647. rpt->config.update_every = default_rrd_update_every;
  648. // parse the parameters and fill rpt and rpt->system_info
  649. while(decoded_query_string) {
  650. char *value = strsep_skip_consecutive_separators(&decoded_query_string, "&");
  651. if(!value || !*value) continue;
  652. char *name = strsep_skip_consecutive_separators(&value, "=");
  653. if(!name || !*name) continue;
  654. if(!value || !*value) continue;
  655. if(!strcmp(name, "key") && !rpt->key)
  656. rpt->key = strdupz(value);
  657. else if(!strcmp(name, "hostname") && !rpt->hostname)
  658. rpt->hostname = strdupz(value);
  659. else if(!strcmp(name, "registry_hostname") && !rpt->registry_hostname)
  660. rpt->registry_hostname = strdupz(value);
  661. else if(!strcmp(name, "machine_guid") && !rpt->machine_guid)
  662. rpt->machine_guid = strdupz(value);
  663. else if(!strcmp(name, "update_every"))
  664. rpt->config.update_every = (int)strtoul(value, NULL, 0);
  665. else if(!strcmp(name, "os") && !rpt->os)
  666. rpt->os = strdupz(value);
  667. else if(!strcmp(name, "timezone") && !rpt->timezone)
  668. rpt->timezone = strdupz(value);
  669. else if(!strcmp(name, "abbrev_timezone") && !rpt->abbrev_timezone)
  670. rpt->abbrev_timezone = strdupz(value);
  671. else if(!strcmp(name, "utc_offset"))
  672. rpt->utc_offset = (int32_t)strtol(value, NULL, 0);
  673. else if(!strcmp(name, "hops"))
  674. rpt->hops = rpt->system_info->hops = (uint16_t) strtoul(value, NULL, 0);
  675. else if(!strcmp(name, "ml_capable"))
  676. rpt->system_info->ml_capable = strtoul(value, NULL, 0);
  677. else if(!strcmp(name, "ml_enabled"))
  678. rpt->system_info->ml_enabled = strtoul(value, NULL, 0);
  679. else if(!strcmp(name, "mc_version"))
  680. rpt->system_info->mc_version = strtoul(value, NULL, 0);
  681. else if(!strcmp(name, "ver") && (rpt->capabilities & STREAM_CAP_INVALID))
  682. rpt->capabilities = convert_stream_version_to_capabilities(strtoul(value, NULL, 0), NULL, false);
  683. else {
  684. // An old Netdata child does not have a compatible streaming protocol, map to something sane.
  685. if (!strcmp(name, "NETDATA_SYSTEM_OS_NAME"))
  686. name = "NETDATA_HOST_OS_NAME";
  687. else if (!strcmp(name, "NETDATA_SYSTEM_OS_ID"))
  688. name = "NETDATA_HOST_OS_ID";
  689. else if (!strcmp(name, "NETDATA_SYSTEM_OS_ID_LIKE"))
  690. name = "NETDATA_HOST_OS_ID_LIKE";
  691. else if (!strcmp(name, "NETDATA_SYSTEM_OS_VERSION"))
  692. name = "NETDATA_HOST_OS_VERSION";
  693. else if (!strcmp(name, "NETDATA_SYSTEM_OS_VERSION_ID"))
  694. name = "NETDATA_HOST_OS_VERSION_ID";
  695. else if (!strcmp(name, "NETDATA_SYSTEM_OS_DETECTION"))
  696. name = "NETDATA_HOST_OS_DETECTION";
  697. else if(!strcmp(name, "NETDATA_PROTOCOL_VERSION") && (rpt->capabilities & STREAM_CAP_INVALID))
  698. rpt->capabilities = convert_stream_version_to_capabilities(1, NULL, false);
  699. if (unlikely(rrdhost_set_system_info_variable(rpt->system_info, name, value))) {
  700. nd_log_daemon(NDLP_NOTICE, "STREAM '%s' [receive from [%s]:%s]: "
  701. "request has parameter '%s' = '%s', which is not used."
  702. , (rpt->hostname && *rpt->hostname) ? rpt->hostname : "-"
  703. , rpt->client_ip, rpt->client_port
  704. , name, value);
  705. }
  706. }
  707. }
  708. if (rpt->capabilities & STREAM_CAP_INVALID)
  709. // no version is supplied, assume version 0;
  710. rpt->capabilities = convert_stream_version_to_capabilities(0, NULL, false);
  711. // find the program name and version
  712. if(w->user_agent && w->user_agent[0]) {
  713. char *t = strchr(w->user_agent, '/');
  714. if(t && *t) {
  715. *t = '\0';
  716. t++;
  717. }
  718. rpt->program_name = strdupz(w->user_agent);
  719. if(t && *t) rpt->program_version = strdupz(t);
  720. }
  721. // check if we should accept this connection
  722. if(!rpt->key || !*rpt->key) {
  723. rrdpush_receive_log_status(
  724. rpt, "request without an API key, rejecting connection",
  725. RRDPUSH_STATUS_NO_API_KEY, NDLP_WARNING);
  726. receiver_state_free(rpt);
  727. return rrdpush_receiver_permission_denied(w);
  728. }
  729. if(!rpt->hostname || !*rpt->hostname) {
  730. rrdpush_receive_log_status(
  731. rpt, "request without a hostname, rejecting connection",
  732. RRDPUSH_STATUS_NO_HOSTNAME, NDLP_WARNING);
  733. receiver_state_free(rpt);
  734. return rrdpush_receiver_permission_denied(w);
  735. }
  736. if(!rpt->registry_hostname)
  737. rpt->registry_hostname = strdupz(rpt->hostname);
  738. if(!rpt->machine_guid || !*rpt->machine_guid) {
  739. rrdpush_receive_log_status(
  740. rpt, "request without a machine GUID, rejecting connection",
  741. RRDPUSH_STATUS_NO_MACHINE_GUID, NDLP_WARNING);
  742. receiver_state_free(rpt);
  743. return rrdpush_receiver_permission_denied(w);
  744. }
  745. {
  746. char buf[GUID_LEN + 1];
  747. if (regenerate_guid(rpt->key, buf) == -1) {
  748. rrdpush_receive_log_status(
  749. rpt, "API key is not a valid UUID (use the command uuidgen to generate one)",
  750. RRDPUSH_STATUS_INVALID_API_KEY, NDLP_WARNING);
  751. receiver_state_free(rpt);
  752. return rrdpush_receiver_permission_denied(w);
  753. }
  754. if (regenerate_guid(rpt->machine_guid, buf) == -1) {
  755. rrdpush_receive_log_status(
  756. rpt, "machine GUID is not a valid UUID",
  757. RRDPUSH_STATUS_INVALID_MACHINE_GUID, NDLP_WARNING);
  758. receiver_state_free(rpt);
  759. return rrdpush_receiver_permission_denied(w);
  760. }
  761. }
  762. const char *api_key_type = appconfig_get(&stream_config, rpt->key, "type", "api");
  763. if(!api_key_type || !*api_key_type) api_key_type = "unknown";
  764. if(strcmp(api_key_type, "api") != 0) {
  765. rrdpush_receive_log_status(
  766. rpt, "API key is a machine GUID",
  767. RRDPUSH_STATUS_INVALID_API_KEY, NDLP_WARNING);
  768. receiver_state_free(rpt);
  769. return rrdpush_receiver_permission_denied(w);
  770. }
  771. if(!appconfig_get_boolean(&stream_config, rpt->key, "enabled", 0)) {
  772. rrdpush_receive_log_status(
  773. rpt, "API key is not enabled",
  774. RRDPUSH_STATUS_API_KEY_DISABLED, NDLP_WARNING);
  775. receiver_state_free(rpt);
  776. return rrdpush_receiver_permission_denied(w);
  777. }
  778. {
  779. SIMPLE_PATTERN *key_allow_from = simple_pattern_create(
  780. appconfig_get(&stream_config, rpt->key, "allow from", "*"),
  781. NULL, SIMPLE_PATTERN_EXACT, true);
  782. if(key_allow_from) {
  783. if(!simple_pattern_matches(key_allow_from, w->client_ip)) {
  784. simple_pattern_free(key_allow_from);
  785. rrdpush_receive_log_status(
  786. rpt, "API key is not allowed from this IP",
  787. RRDPUSH_STATUS_NOT_ALLOWED_IP, NDLP_WARNING);
  788. receiver_state_free(rpt);
  789. return rrdpush_receiver_permission_denied(w);
  790. }
  791. simple_pattern_free(key_allow_from);
  792. }
  793. }
  794. {
  795. const char *machine_guid_type = appconfig_get(&stream_config, rpt->machine_guid, "type", "machine");
  796. if (!machine_guid_type || !*machine_guid_type) machine_guid_type = "unknown";
  797. if (strcmp(machine_guid_type, "machine") != 0) {
  798. rrdpush_receive_log_status(
  799. rpt, "machine GUID is an API key",
  800. RRDPUSH_STATUS_INVALID_MACHINE_GUID, NDLP_WARNING);
  801. receiver_state_free(rpt);
  802. return rrdpush_receiver_permission_denied(w);
  803. }
  804. }
  805. if(!appconfig_get_boolean(&stream_config, rpt->machine_guid, "enabled", 1)) {
  806. rrdpush_receive_log_status(
  807. rpt, "machine GUID is not enabled",
  808. RRDPUSH_STATUS_MACHINE_GUID_DISABLED, NDLP_WARNING);
  809. receiver_state_free(rpt);
  810. return rrdpush_receiver_permission_denied(w);
  811. }
  812. {
  813. SIMPLE_PATTERN *machine_allow_from = simple_pattern_create(
  814. appconfig_get(&stream_config, rpt->machine_guid, "allow from", "*"),
  815. NULL, SIMPLE_PATTERN_EXACT, true);
  816. if(machine_allow_from) {
  817. if(!simple_pattern_matches(machine_allow_from, w->client_ip)) {
  818. simple_pattern_free(machine_allow_from);
  819. rrdpush_receive_log_status(
  820. rpt, "machine GUID is not allowed from this IP",
  821. RRDPUSH_STATUS_NOT_ALLOWED_IP, NDLP_WARNING);
  822. receiver_state_free(rpt);
  823. return rrdpush_receiver_permission_denied(w);
  824. }
  825. simple_pattern_free(machine_allow_from);
  826. }
  827. }
  828. if (strcmp(rpt->machine_guid, localhost->machine_guid) == 0) {
  829. rrdpush_receiver_takeover_web_connection(w, rpt);
  830. rrdpush_receive_log_status(
  831. rpt, "machine GUID is my own",
  832. RRDPUSH_STATUS_LOCALHOST, NDLP_DEBUG);
  833. char initial_response[HTTP_HEADER_SIZE + 1];
  834. snprintfz(initial_response, HTTP_HEADER_SIZE, "%s", START_STREAMING_ERROR_SAME_LOCALHOST);
  835. if(send_timeout(
  836. #ifdef ENABLE_HTTPS
  837. &rpt->ssl,
  838. #endif
  839. rpt->fd, initial_response, strlen(initial_response), 0, 60) != (ssize_t)strlen(initial_response)) {
  840. nd_log_daemon(NDLP_ERR, "STREAM '%s' [receive from [%s]:%s]: "
  841. "failed to reply."
  842. , rpt->hostname
  843. , rpt->client_ip, rpt->client_port
  844. );
  845. }
  846. receiver_state_free(rpt);
  847. return HTTP_RESP_OK;
  848. }
  849. if(unlikely(web_client_streaming_rate_t > 0)) {
  850. static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER;
  851. static time_t last_stream_accepted_t = 0;
  852. time_t now = now_realtime_sec();
  853. spinlock_lock(&spinlock);
  854. if(unlikely(last_stream_accepted_t == 0))
  855. last_stream_accepted_t = now;
  856. if(now - last_stream_accepted_t < web_client_streaming_rate_t) {
  857. spinlock_unlock(&spinlock);
  858. char msg[100 + 1];
  859. snprintfz(msg, sizeof(msg) - 1,
  860. "rate limit, will accept new connection in %ld secs",
  861. (long)(web_client_streaming_rate_t - (now - last_stream_accepted_t)));
  862. rrdpush_receive_log_status(
  863. rpt, msg,
  864. RRDPUSH_STATUS_RATE_LIMIT, NDLP_NOTICE);
  865. receiver_state_free(rpt);
  866. return rrdpush_receiver_too_busy_now(w);
  867. }
  868. last_stream_accepted_t = now;
  869. spinlock_unlock(&spinlock);
  870. }
  871. /*
  872. * Quick path for rejecting multiple connections. The lock taken is fine-grained - it only protects the receiver
  873. * pointer within the host (if a host exists). This protects against multiple concurrent web requests hitting
  874. * separate threads within the web-server and landing here. The lock guards the thread-shutdown sequence that
  875. * detaches the receiver from the host. If the host is being created (first time-access) then we also use the
  876. * lock to prevent race-hazard (two threads try to create the host concurrently, one wins and the other does a
  877. * lookup to the now-attached structure).
  878. */
  879. {
  880. time_t age = 0;
  881. bool receiver_stale = false;
  882. bool receiver_working = false;
  883. rrd_rdlock();
  884. RRDHOST *host = rrdhost_find_by_guid(rpt->machine_guid);
  885. if (unlikely(host && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) /* Ignore archived hosts. */
  886. host = NULL;
  887. if (host) {
  888. netdata_mutex_lock(&host->receiver_lock);
  889. if (host->receiver) {
  890. age = now_monotonic_sec() - host->receiver->last_msg_t;
  891. if (age < 30)
  892. receiver_working = true;
  893. else
  894. receiver_stale = true;
  895. }
  896. netdata_mutex_unlock(&host->receiver_lock);
  897. }
  898. rrd_unlock();
  899. if (receiver_stale && stop_streaming_receiver(host, STREAM_HANDSHAKE_DISCONNECT_STALE_RECEIVER)) {
  900. // we stopped the receiver
  901. // we can proceed with this connection
  902. receiver_stale = false;
  903. nd_log_daemon(NDLP_NOTICE, "STREAM '%s' [receive from [%s]:%s]: "
  904. "stopped previous stale receiver to accept this one."
  905. , rpt->hostname
  906. , rpt->client_ip, rpt->client_port
  907. );
  908. }
  909. if (receiver_working || receiver_stale) {
  910. // another receiver is already connected
  911. // try again later
  912. char msg[200 + 1];
  913. snprintfz(msg, sizeof(msg) - 1,
  914. "multiple connections for same host, "
  915. "old connection was last used %ld secs ago%s",
  916. age, receiver_stale ? " (signaled old receiver to stop)" : " (new connection not accepted)");
  917. rrdpush_receive_log_status(
  918. rpt, msg,
  919. RRDPUSH_STATUS_ALREADY_CONNECTED, NDLP_DEBUG);
  920. // Have not set WEB_CLIENT_FLAG_DONT_CLOSE_SOCKET - caller should clean up
  921. buffer_flush(w->response.data);
  922. buffer_strcat(w->response.data, START_STREAMING_ERROR_ALREADY_STREAMING);
  923. receiver_state_free(rpt);
  924. return HTTP_RESP_CONFLICT;
  925. }
  926. }
  927. rrdpush_receiver_takeover_web_connection(w, rpt);
  928. char tag[NETDATA_THREAD_TAG_MAX + 1];
  929. snprintfz(tag, NETDATA_THREAD_TAG_MAX, THREAD_TAG_STREAM_RECEIVER "[%s]", rpt->hostname);
  930. tag[NETDATA_THREAD_TAG_MAX] = '\0';
  931. if(netdata_thread_create(&rpt->thread, tag, NETDATA_THREAD_OPTION_DEFAULT, rrdpush_receiver_thread, (void *)rpt)) {
  932. rrdpush_receive_log_status(
  933. rpt, "can't create receiver thread",
  934. RRDPUSH_STATUS_INTERNAL_SERVER_ERROR, NDLP_ERR);
  935. buffer_flush(w->response.data);
  936. buffer_strcat(w->response.data, "Can't handle this request");
  937. receiver_state_free(rpt);
  938. return HTTP_RESP_INTERNAL_SERVER_ERROR;
  939. }
  940. // prevent the caller from closing the streaming socket
  941. return HTTP_RESP_OK;
  942. }
  943. void rrdpush_reset_destinations_postpone_time(RRDHOST *host) {
  944. uint32_t wait = (host->sender) ? host->sender->reconnect_delay : 5;
  945. time_t now = now_realtime_sec();
  946. for (struct rrdpush_destinations *d = host->destinations; d; d = d->next)
  947. d->postpone_reconnection_until = now + wait;
  948. }
  949. static struct {
  950. STREAM_HANDSHAKE err;
  951. const char *str;
  952. } handshake_errors[] = {
  953. { STREAM_HANDSHAKE_OK_V3, "CONNECTED" },
  954. { STREAM_HANDSHAKE_OK_V2, "CONNECTED" },
  955. { STREAM_HANDSHAKE_OK_V1, "CONNECTED" },
  956. { STREAM_HANDSHAKE_NEVER, "" },
  957. { STREAM_HANDSHAKE_ERROR_BAD_HANDSHAKE, "BAD HANDSHAKE" },
  958. { STREAM_HANDSHAKE_ERROR_LOCALHOST, "LOCALHOST" },
  959. { STREAM_HANDSHAKE_ERROR_ALREADY_CONNECTED, "ALREADY CONNECTED" },
  960. { STREAM_HANDSHAKE_ERROR_DENIED, "DENIED" },
  961. { STREAM_HANDSHAKE_ERROR_SEND_TIMEOUT, "SEND TIMEOUT" },
  962. { STREAM_HANDSHAKE_ERROR_RECEIVE_TIMEOUT, "RECEIVE TIMEOUT" },
  963. { STREAM_HANDSHAKE_ERROR_INVALID_CERTIFICATE, "INVALID CERTIFICATE" },
  964. { STREAM_HANDSHAKE_ERROR_SSL_ERROR, "SSL ERROR" },
  965. { STREAM_HANDSHAKE_ERROR_CANT_CONNECT, "CANT CONNECT" },
  966. { STREAM_HANDSHAKE_BUSY_TRY_LATER, "BUSY TRY LATER" },
  967. { STREAM_HANDSHAKE_INTERNAL_ERROR, "INTERNAL ERROR" },
  968. { STREAM_HANDSHAKE_INITIALIZATION, "REMOTE IS INITIALIZING" },
  969. { STREAM_HANDSHAKE_DISCONNECT_HOST_CLEANUP, "DISCONNECTED HOST CLEANUP" },
  970. { STREAM_HANDSHAKE_DISCONNECT_STALE_RECEIVER, "DISCONNECTED STALE RECEIVER" },
  971. { STREAM_HANDSHAKE_DISCONNECT_SHUTDOWN, "DISCONNECTED SHUTDOWN REQUESTED" },
  972. { STREAM_HANDSHAKE_DISCONNECT_NETDATA_EXIT, "DISCONNECTED NETDATA EXIT" },
  973. { STREAM_HANDSHAKE_DISCONNECT_PARSER_EXIT, "DISCONNECTED PARSE ENDED" },
  974. {STREAM_HANDSHAKE_DISCONNECT_UNKNOWN_SOCKET_READ_ERROR, "DISCONNECTED UNKNOWN SOCKET READ ERROR" },
  975. { STREAM_HANDSHAKE_DISCONNECT_PARSER_FAILED, "DISCONNECTED PARSE ERROR" },
  976. { STREAM_HANDSHAKE_DISCONNECT_RECEIVER_LEFT, "DISCONNECTED RECEIVER LEFT" },
  977. { STREAM_HANDSHAKE_DISCONNECT_ORPHAN_HOST, "DISCONNECTED ORPHAN HOST" },
  978. { STREAM_HANDSHAKE_NON_STREAMABLE_HOST, "NON STREAMABLE HOST" },
  979. { STREAM_HANDSHAKE_DISCONNECT_NOT_SUFFICIENT_READ_BUFFER, "DISCONNECTED NOT SUFFICIENT READ BUFFER" },
  980. {STREAM_HANDSHAKE_DISCONNECT_SOCKET_EOF, "DISCONNECTED SOCKET EOF" },
  981. {STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_FAILED, "DISCONNECTED SOCKET READ FAILED" },
  982. {STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_TIMEOUT, "DISCONNECTED SOCKET READ TIMEOUT" },
  983. { 0, NULL },
  984. };
  985. const char *stream_handshake_error_to_string(STREAM_HANDSHAKE handshake_error) {
  986. if(handshake_error >= STREAM_HANDSHAKE_OK_V1)
  987. // handshake_error is the whole version / capabilities number
  988. return "CONNECTED";
  989. for(size_t i = 0; handshake_errors[i].str ; i++) {
  990. if(handshake_error == handshake_errors[i].err)
  991. return handshake_errors[i].str;
  992. }
  993. return "UNKNOWN";
  994. }
  995. static struct {
  996. STREAM_CAPABILITIES cap;
  997. const char *str;
  998. } capability_names[] = {
  999. {STREAM_CAP_V1, "V1" },
  1000. {STREAM_CAP_V2, "V2" },
  1001. {STREAM_CAP_VN, "VN" },
  1002. {STREAM_CAP_VCAPS, "VCAPS" },
  1003. {STREAM_CAP_HLABELS, "HLABELS" },
  1004. {STREAM_CAP_CLAIM, "CLAIM" },
  1005. {STREAM_CAP_CLABELS, "CLABELS" },
  1006. {STREAM_CAP_LZ4, "LZ4" },
  1007. {STREAM_CAP_FUNCTIONS, "FUNCTIONS" },
  1008. {STREAM_CAP_REPLICATION, "REPLICATION" },
  1009. {STREAM_CAP_BINARY, "BINARY" },
  1010. {STREAM_CAP_INTERPOLATED, "INTERPOLATED" },
  1011. {STREAM_CAP_IEEE754, "IEEE754" },
  1012. {STREAM_CAP_DATA_WITH_ML, "ML" },
  1013. {STREAM_CAP_DYNCFG, "DYNCFG" },
  1014. {STREAM_CAP_SLOTS, "SLOTS" },
  1015. {STREAM_CAP_ZSTD, "ZSTD" },
  1016. {STREAM_CAP_GZIP, "GZIP" },
  1017. {STREAM_CAP_BROTLI, "BROTLI" },
  1018. {STREAM_CAP_PROGRESS, "PROGRESS" },
  1019. {0 , NULL },
  1020. };
  1021. void stream_capabilities_to_string(BUFFER *wb, STREAM_CAPABILITIES caps) {
  1022. for(size_t i = 0; capability_names[i].str ; i++) {
  1023. if(caps & capability_names[i].cap) {
  1024. buffer_strcat(wb, capability_names[i].str);
  1025. buffer_strcat(wb, " ");
  1026. }
  1027. }
  1028. }
  1029. void stream_capabilities_to_json_array(BUFFER *wb, STREAM_CAPABILITIES caps, const char *key) {
  1030. if(key)
  1031. buffer_json_member_add_array(wb, key);
  1032. else
  1033. buffer_json_add_array_item_array(wb);
  1034. for(size_t i = 0; capability_names[i].str ; i++) {
  1035. if(caps & capability_names[i].cap)
  1036. buffer_json_add_array_item_string(wb, capability_names[i].str);
  1037. }
  1038. buffer_json_array_close(wb);
  1039. }
  1040. void log_receiver_capabilities(struct receiver_state *rpt) {
  1041. BUFFER *wb = buffer_create(100, NULL);
  1042. stream_capabilities_to_string(wb, rpt->capabilities);
  1043. nd_log_daemon(NDLP_INFO, "STREAM %s [receive from [%s]:%s]: established link with negotiated capabilities: %s",
  1044. rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, buffer_tostring(wb));
  1045. buffer_free(wb);
  1046. }
  1047. void log_sender_capabilities(struct sender_state *s) {
  1048. BUFFER *wb = buffer_create(100, NULL);
  1049. stream_capabilities_to_string(wb, s->capabilities);
  1050. nd_log_daemon(NDLP_INFO, "STREAM %s [send to %s]: established link with negotiated capabilities: %s",
  1051. rrdhost_hostname(s->host), s->connected_to, buffer_tostring(wb));
  1052. buffer_free(wb);
  1053. }
  1054. STREAM_CAPABILITIES stream_our_capabilities(RRDHOST *host, bool sender) {
  1055. STREAM_CAPABILITIES disabled_capabilities = globally_disabled_capabilities;
  1056. if(host && sender) {
  1057. // we have DATA_WITH_ML capability
  1058. // we should remove the DATA_WITH_ML capability if our database does not have anomaly info
  1059. // this can happen under these conditions: 1. we don't run ML, and 2. we don't receive ML
  1060. netdata_mutex_lock(&host->receiver_lock);
  1061. if(!ml_host_running(host) && !stream_has_capability(host->receiver, STREAM_CAP_DATA_WITH_ML))
  1062. disabled_capabilities |= STREAM_CAP_DATA_WITH_ML;
  1063. netdata_mutex_unlock(&host->receiver_lock);
  1064. if(host->sender)
  1065. disabled_capabilities |= host->sender->disabled_capabilities;
  1066. }
  1067. return (STREAM_CAP_V1 |
  1068. STREAM_CAP_V2 |
  1069. STREAM_CAP_VN |
  1070. STREAM_CAP_VCAPS |
  1071. STREAM_CAP_HLABELS |
  1072. STREAM_CAP_CLAIM |
  1073. STREAM_CAP_CLABELS |
  1074. STREAM_CAP_FUNCTIONS |
  1075. STREAM_CAP_REPLICATION |
  1076. STREAM_CAP_BINARY |
  1077. STREAM_CAP_INTERPOLATED |
  1078. STREAM_CAP_SLOTS |
  1079. STREAM_CAP_PROGRESS |
  1080. STREAM_CAP_COMPRESSIONS_AVAILABLE |
  1081. STREAM_CAP_DYNCFG |
  1082. STREAM_CAP_IEEE754 |
  1083. STREAM_CAP_DATA_WITH_ML |
  1084. 0) & ~disabled_capabilities;
  1085. }
  1086. STREAM_CAPABILITIES convert_stream_version_to_capabilities(int32_t version, RRDHOST *host, bool sender) {
  1087. STREAM_CAPABILITIES caps = 0;
  1088. if(version <= 1) caps = STREAM_CAP_V1;
  1089. else if(version < STREAM_OLD_VERSION_CLAIM) caps = STREAM_CAP_V2 | STREAM_CAP_HLABELS;
  1090. else if(version <= STREAM_OLD_VERSION_CLAIM) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM;
  1091. else if(version <= STREAM_OLD_VERSION_CLABELS) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM | STREAM_CAP_CLABELS;
  1092. else if(version <= STREAM_OLD_VERSION_LZ4) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM | STREAM_CAP_CLABELS | STREAM_CAP_LZ4_AVAILABLE;
  1093. else caps = version;
  1094. if(caps & STREAM_CAP_VCAPS)
  1095. caps &= ~(STREAM_CAP_V1|STREAM_CAP_V2|STREAM_CAP_VN);
  1096. if(caps & STREAM_CAP_VN)
  1097. caps &= ~(STREAM_CAP_V1|STREAM_CAP_V2);
  1098. if(caps & STREAM_CAP_V2)
  1099. caps &= ~(STREAM_CAP_V1);
  1100. STREAM_CAPABILITIES common_caps = caps & stream_our_capabilities(host, sender);
  1101. if(!(common_caps & STREAM_CAP_INTERPOLATED))
  1102. // DATA WITH ML requires INTERPOLATED
  1103. common_caps &= ~STREAM_CAP_DATA_WITH_ML;
  1104. return common_caps;
  1105. }
  1106. int32_t stream_capabilities_to_vn(uint32_t caps) {
  1107. if(caps & STREAM_CAP_LZ4) return STREAM_OLD_VERSION_LZ4;
  1108. if(caps & STREAM_CAP_CLABELS) return STREAM_OLD_VERSION_CLABELS;
  1109. return STREAM_OLD_VERSION_CLAIM; // if(caps & STREAM_CAP_CLAIM)
  1110. }