pluginsd_replication.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "pluginsd_replication.h"
  3. PARSER_RC pluginsd_replay_begin(char **words, size_t num_words, PARSER *parser) {
  4. int idx = 1;
  5. ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
  6. if(slot >= 0) idx++;
  7. char *id = get_word(words, num_words, idx++);
  8. char *start_time_str = get_word(words, num_words, idx++);
  9. char *end_time_str = get_word(words, num_words, idx++);
  10. char *child_now_str = get_word(words, num_words, idx++);
  11. RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_BEGIN);
  12. if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  13. RRDSET *st;
  14. if (likely(!id || !*id))
  15. st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_BEGIN, PLUGINSD_KEYWORD_REPLAY_BEGIN);
  16. else
  17. st = pluginsd_rrdset_cache_get_from_slot(parser, host, id, slot, PLUGINSD_KEYWORD_REPLAY_BEGIN);
  18. if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  19. if(!pluginsd_set_scope_chart(parser, st, PLUGINSD_KEYWORD_REPLAY_BEGIN))
  20. return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  21. if(start_time_str && end_time_str) {
  22. time_t start_time = (time_t) str2ull_encoded(start_time_str);
  23. time_t end_time = (time_t) str2ull_encoded(end_time_str);
  24. time_t wall_clock_time = 0, tolerance;
  25. bool wall_clock_comes_from_child; (void)wall_clock_comes_from_child;
  26. if(child_now_str) {
  27. wall_clock_time = (time_t) str2ull_encoded(child_now_str);
  28. tolerance = st->update_every + 1;
  29. wall_clock_comes_from_child = true;
  30. }
  31. if(wall_clock_time <= 0) {
  32. wall_clock_time = now_realtime_sec();
  33. tolerance = st->update_every + 5;
  34. wall_clock_comes_from_child = false;
  35. }
  36. #ifdef NETDATA_LOG_REPLICATION_REQUESTS
  37. internal_error(
  38. (!st->replay.start_streaming && (end_time < st->replay.after || start_time > st->replay.before)),
  39. "REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_BEGIN " from %ld to %ld, which does not match our request (%ld to %ld).",
  40. rrdhost_hostname(st->rrdhost), rrdset_id(st), start_time, end_time, st->replay.after, st->replay.before);
  41. internal_error(
  42. true,
  43. "REPLAY: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_BEGIN " from %ld to %ld, child wall clock is %ld (%s), had requested %ld to %ld",
  44. rrdhost_hostname(st->rrdhost), rrdset_id(st),
  45. start_time, end_time, wall_clock_time, wall_clock_comes_from_child ? "from child" : "parent time",
  46. st->replay.after, st->replay.before);
  47. #endif
  48. if(start_time && end_time && start_time < wall_clock_time + tolerance && end_time < wall_clock_time + tolerance && start_time < end_time) {
  49. if (unlikely(end_time - start_time != st->update_every))
  50. rrdset_set_update_every_s(st, end_time - start_time);
  51. st->last_collected_time.tv_sec = end_time;
  52. st->last_collected_time.tv_usec = 0;
  53. st->last_updated.tv_sec = end_time;
  54. st->last_updated.tv_usec = 0;
  55. st->counter++;
  56. st->counter_done++;
  57. // these are only needed for db mode RAM, ALLOC
  58. st->db.current_entry++;
  59. if(st->db.current_entry >= st->db.entries)
  60. st->db.current_entry -= st->db.entries;
  61. parser->user.replay.start_time = start_time;
  62. parser->user.replay.end_time = end_time;
  63. parser->user.replay.start_time_ut = (usec_t) start_time * USEC_PER_SEC;
  64. parser->user.replay.end_time_ut = (usec_t) end_time * USEC_PER_SEC;
  65. parser->user.replay.wall_clock_time = wall_clock_time;
  66. parser->user.replay.rset_enabled = true;
  67. return PARSER_RC_OK;
  68. }
  69. netdata_log_error("PLUGINSD REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_BEGIN
  70. " from %ld to %ld, but timestamps are invalid "
  71. "(now is %ld [%s], tolerance %ld). Ignoring " PLUGINSD_KEYWORD_REPLAY_SET,
  72. rrdhost_hostname(st->rrdhost), rrdset_id(st), start_time, end_time,
  73. wall_clock_time, wall_clock_comes_from_child ? "child wall clock" : "parent wall clock",
  74. tolerance);
  75. }
  76. // the child sends an RBEGIN without any parameters initially
  77. // setting rset_enabled to false, means the RSET should not store any metrics
  78. // to store metrics, the RBEGIN needs to have timestamps
  79. parser->user.replay.start_time = 0;
  80. parser->user.replay.end_time = 0;
  81. parser->user.replay.start_time_ut = 0;
  82. parser->user.replay.end_time_ut = 0;
  83. parser->user.replay.wall_clock_time = 0;
  84. parser->user.replay.rset_enabled = false;
  85. return PARSER_RC_OK;
  86. }
  87. PARSER_RC pluginsd_replay_set(char **words, size_t num_words, PARSER *parser) {
  88. int idx = 1;
  89. ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
  90. if(slot >= 0) idx++;
  91. char *dimension = get_word(words, num_words, idx++);
  92. char *value_str = get_word(words, num_words, idx++);
  93. char *flags_str = get_word(words, num_words, idx++);
  94. RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_SET);
  95. if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  96. RRDSET *st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN);
  97. if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  98. if(!parser->user.replay.rset_enabled) {
  99. nd_log_limit_static_thread_var(erl, 1, 0);
  100. nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_ERR,
  101. "PLUGINSD: 'host:%s/chart:%s' got a %s but it is disabled by %s errors",
  102. rrdhost_hostname(host), rrdset_id(st), PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN);
  103. // we have to return OK here
  104. return PARSER_RC_OK;
  105. }
  106. RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_REPLAY_SET);
  107. if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  108. st->pluginsd.set = true;
  109. if (unlikely(!parser->user.replay.start_time || !parser->user.replay.end_time)) {
  110. netdata_log_error("PLUGINSD: 'host:%s/chart:%s/dim:%s' got a %s with invalid timestamps %ld to %ld from a %s. Disabling it.",
  111. rrdhost_hostname(host),
  112. rrdset_id(st),
  113. dimension,
  114. PLUGINSD_KEYWORD_REPLAY_SET,
  115. parser->user.replay.start_time,
  116. parser->user.replay.end_time,
  117. PLUGINSD_KEYWORD_REPLAY_BEGIN);
  118. return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  119. }
  120. if (unlikely(!value_str || !*value_str))
  121. value_str = "NAN";
  122. if(unlikely(!flags_str))
  123. flags_str = "";
  124. if (likely(value_str)) {
  125. RRDDIM_FLAGS rd_flags = rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE | RRDDIM_FLAG_ARCHIVED);
  126. if(!(rd_flags & RRDDIM_FLAG_ARCHIVED)) {
  127. NETDATA_DOUBLE value = str2ndd_encoded(value_str, NULL);
  128. SN_FLAGS flags = pluginsd_parse_storage_number_flags(flags_str);
  129. if (!netdata_double_isnumber(value) || (flags == SN_EMPTY_SLOT)) {
  130. value = NAN;
  131. flags = SN_EMPTY_SLOT;
  132. }
  133. rrddim_store_metric(rd, parser->user.replay.end_time_ut, value, flags);
  134. rd->collector.last_collected_time.tv_sec = parser->user.replay.end_time;
  135. rd->collector.last_collected_time.tv_usec = 0;
  136. rd->collector.counter++;
  137. }
  138. else {
  139. nd_log_limit_static_global_var(erl, 1, 0);
  140. nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_WARNING,
  141. "PLUGINSD: 'host:%s/chart:%s/dim:%s' has the ARCHIVED flag set, but it is replicated. "
  142. "Ignoring data.",
  143. rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_name(rd));
  144. }
  145. }
  146. return PARSER_RC_OK;
  147. }
  148. PARSER_RC pluginsd_replay_rrddim_collection_state(char **words, size_t num_words, PARSER *parser) {
  149. if(parser->user.replay.rset_enabled == false)
  150. return PARSER_RC_OK;
  151. int idx = 1;
  152. ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
  153. if(slot >= 0) idx++;
  154. char *dimension = get_word(words, num_words, idx++);
  155. char *last_collected_ut_str = get_word(words, num_words, idx++);
  156. char *last_collected_value_str = get_word(words, num_words, idx++);
  157. char *last_calculated_value_str = get_word(words, num_words, idx++);
  158. char *last_stored_value_str = get_word(words, num_words, idx++);
  159. RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE);
  160. if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  161. RRDSET *st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE, PLUGINSD_KEYWORD_REPLAY_BEGIN);
  162. if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  163. if(st->pluginsd.set) {
  164. // reset pos to reuse the same RDAs
  165. st->pluginsd.pos = 0;
  166. st->pluginsd.set = false;
  167. }
  168. RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE);
  169. if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  170. usec_t dim_last_collected_ut = (usec_t)rd->collector.last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)rd->collector.last_collected_time.tv_usec;
  171. usec_t last_collected_ut = last_collected_ut_str ? str2ull_encoded(last_collected_ut_str) : 0;
  172. if(last_collected_ut > dim_last_collected_ut) {
  173. rd->collector.last_collected_time.tv_sec = (time_t)(last_collected_ut / USEC_PER_SEC);
  174. rd->collector.last_collected_time.tv_usec = (last_collected_ut % USEC_PER_SEC);
  175. }
  176. rd->collector.last_collected_value = last_collected_value_str ? str2ll_encoded(last_collected_value_str) : 0;
  177. rd->collector.last_calculated_value = last_calculated_value_str ? str2ndd_encoded(last_calculated_value_str, NULL) : 0;
  178. rd->collector.last_stored_value = last_stored_value_str ? str2ndd_encoded(last_stored_value_str, NULL) : 0.0;
  179. return PARSER_RC_OK;
  180. }
  181. PARSER_RC pluginsd_replay_rrdset_collection_state(char **words, size_t num_words, PARSER *parser) {
  182. if(parser->user.replay.rset_enabled == false)
  183. return PARSER_RC_OK;
  184. char *last_collected_ut_str = get_word(words, num_words, 1);
  185. char *last_updated_ut_str = get_word(words, num_words, 2);
  186. RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE);
  187. if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  188. RRDSET *st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE,
  189. PLUGINSD_KEYWORD_REPLAY_BEGIN);
  190. if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  191. usec_t chart_last_collected_ut = (usec_t)st->last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)st->last_collected_time.tv_usec;
  192. usec_t last_collected_ut = last_collected_ut_str ? str2ull_encoded(last_collected_ut_str) : 0;
  193. if(last_collected_ut > chart_last_collected_ut) {
  194. st->last_collected_time.tv_sec = (time_t)(last_collected_ut / USEC_PER_SEC);
  195. st->last_collected_time.tv_usec = (last_collected_ut % USEC_PER_SEC);
  196. }
  197. usec_t chart_last_updated_ut = (usec_t)st->last_updated.tv_sec * USEC_PER_SEC + (usec_t)st->last_updated.tv_usec;
  198. usec_t last_updated_ut = last_updated_ut_str ? str2ull_encoded(last_updated_ut_str) : 0;
  199. if(last_updated_ut > chart_last_updated_ut) {
  200. st->last_updated.tv_sec = (time_t)(last_updated_ut / USEC_PER_SEC);
  201. st->last_updated.tv_usec = (last_updated_ut % USEC_PER_SEC);
  202. }
  203. st->counter++;
  204. st->counter_done++;
  205. return PARSER_RC_OK;
  206. }
  207. PARSER_RC pluginsd_replay_end(char **words, size_t num_words, PARSER *parser) {
  208. if (num_words < 7) { // accepts 7, but the 7th is optional
  209. netdata_log_error("REPLAY: malformed " PLUGINSD_KEYWORD_REPLAY_END " command");
  210. return PARSER_RC_ERROR;
  211. }
  212. const char *update_every_child_txt = get_word(words, num_words, 1);
  213. const char *first_entry_child_txt = get_word(words, num_words, 2);
  214. const char *last_entry_child_txt = get_word(words, num_words, 3);
  215. const char *start_streaming_txt = get_word(words, num_words, 4);
  216. const char *first_entry_requested_txt = get_word(words, num_words, 5);
  217. const char *last_entry_requested_txt = get_word(words, num_words, 6);
  218. const char *child_world_time_txt = get_word(words, num_words, 7); // optional
  219. time_t update_every_child = (time_t) str2ull_encoded(update_every_child_txt);
  220. time_t first_entry_child = (time_t) str2ull_encoded(first_entry_child_txt);
  221. time_t last_entry_child = (time_t) str2ull_encoded(last_entry_child_txt);
  222. bool start_streaming = (strcmp(start_streaming_txt, "true") == 0);
  223. time_t first_entry_requested = (time_t) str2ull_encoded(first_entry_requested_txt);
  224. time_t last_entry_requested = (time_t) str2ull_encoded(last_entry_requested_txt);
  225. // the optional child world time
  226. time_t child_world_time = (child_world_time_txt && *child_world_time_txt) ? (time_t) str2ull_encoded(
  227. child_world_time_txt) : now_realtime_sec();
  228. RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_END);
  229. if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  230. RRDSET *st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_END, PLUGINSD_KEYWORD_REPLAY_BEGIN);
  231. if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
  232. #ifdef NETDATA_LOG_REPLICATION_REQUESTS
  233. internal_error(true,
  234. "PLUGINSD REPLAY: 'host:%s/chart:%s': got a " PLUGINSD_KEYWORD_REPLAY_END " child db from %llu to %llu, start_streaming %s, had requested from %llu to %llu, wall clock %llu",
  235. rrdhost_hostname(host), rrdset_id(st),
  236. (unsigned long long)first_entry_child, (unsigned long long)last_entry_child,
  237. start_streaming?"true":"false",
  238. (unsigned long long)first_entry_requested, (unsigned long long)last_entry_requested,
  239. (unsigned long long)child_world_time
  240. );
  241. #endif
  242. parser->user.data_collections_count++;
  243. if(parser->user.replay.rset_enabled && st->rrdhost->receiver) {
  244. time_t now = now_realtime_sec();
  245. time_t started = st->rrdhost->receiver->replication_first_time_t;
  246. time_t current = parser->user.replay.end_time;
  247. if(started && current > started) {
  248. host->rrdpush_receiver_replication_percent = (NETDATA_DOUBLE) (current - started) * 100.0 / (NETDATA_DOUBLE) (now - started);
  249. worker_set_metric(WORKER_RECEIVER_JOB_REPLICATION_COMPLETION,
  250. host->rrdpush_receiver_replication_percent);
  251. }
  252. }
  253. parser->user.replay.start_time = 0;
  254. parser->user.replay.end_time = 0;
  255. parser->user.replay.start_time_ut = 0;
  256. parser->user.replay.end_time_ut = 0;
  257. parser->user.replay.wall_clock_time = 0;
  258. parser->user.replay.rset_enabled = false;
  259. st->counter++;
  260. st->counter_done++;
  261. store_metric_collection_completed();
  262. #ifdef NETDATA_LOG_REPLICATION_REQUESTS
  263. st->replay.start_streaming = false;
  264. st->replay.after = 0;
  265. st->replay.before = 0;
  266. if(start_streaming)
  267. st->replay.log_next_data_collection = true;
  268. #endif
  269. if (start_streaming) {
  270. if (st->update_every != update_every_child)
  271. rrdset_set_update_every_s(st, update_every_child);
  272. if(rrdset_flag_check(st, RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS)) {
  273. rrdset_flag_set(st, RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED);
  274. rrdset_flag_clear(st, RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS);
  275. rrdset_flag_clear(st, RRDSET_FLAG_SYNC_CLOCK);
  276. rrdhost_receiver_replicating_charts_minus_one(st->rrdhost);
  277. }
  278. #ifdef NETDATA_LOG_REPLICATION_REQUESTS
  279. else
  280. internal_error(true, "REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_END " with enable_streaming = true, but there is no replication in progress for this chart.",
  281. rrdhost_hostname(host), rrdset_id(st));
  282. #endif
  283. pluginsd_clear_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_END);
  284. host->rrdpush_receiver_replication_percent = 100.0;
  285. worker_set_metric(WORKER_RECEIVER_JOB_REPLICATION_COMPLETION, host->rrdpush_receiver_replication_percent);
  286. return PARSER_RC_OK;
  287. }
  288. pluginsd_clear_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_END);
  289. rrdcontext_updated_retention_rrdset(st);
  290. bool ok = replicate_chart_request(send_to_plugin, parser, host, st,
  291. first_entry_child, last_entry_child, child_world_time,
  292. first_entry_requested, last_entry_requested);
  293. return ok ? PARSER_RC_OK : PARSER_RC_ERROR;
  294. }