rrdengineapi.c 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrdengine.h"
  3. /* Default global database instance */
  4. struct rrdengine_instance multidb_ctx;
  5. int default_rrdeng_page_cache_mb = 32;
  6. int default_rrdeng_disk_quota_mb = 256;
  7. int default_multidb_disk_quota_mb = 256;
  8. /* Default behaviour is to unblock data collection if the page cache is full of dirty pages by dropping metrics */
  9. uint8_t rrdeng_drop_metrics_under_page_cache_pressure = 1;
  10. static inline struct rrdengine_instance *get_rrdeng_ctx_from_host(RRDHOST *host)
  11. {
  12. return host->rrdeng_ctx;
  13. }
  14. /* This UUID is not unique across hosts */
  15. void rrdeng_generate_legacy_uuid(const char *dim_id, char *chart_id, uuid_t *ret_uuid)
  16. {
  17. EVP_MD_CTX *evpctx;
  18. unsigned char hash_value[EVP_MAX_MD_SIZE];
  19. unsigned int hash_len;
  20. evpctx = EVP_MD_CTX_create();
  21. EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
  22. EVP_DigestUpdate(evpctx, dim_id, strlen(dim_id));
  23. EVP_DigestUpdate(evpctx, chart_id, strlen(chart_id));
  24. EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
  25. EVP_MD_CTX_destroy(evpctx);
  26. fatal_assert(hash_len > sizeof(uuid_t));
  27. memcpy(ret_uuid, hash_value, sizeof(uuid_t));
  28. }
  29. /* Transform legacy UUID to be unique across hosts deterministacally */
  30. void rrdeng_convert_legacy_uuid_to_multihost(char machine_guid[GUID_LEN + 1], uuid_t *legacy_uuid, uuid_t *ret_uuid)
  31. {
  32. EVP_MD_CTX *evpctx;
  33. unsigned char hash_value[EVP_MAX_MD_SIZE];
  34. unsigned int hash_len;
  35. evpctx = EVP_MD_CTX_create();
  36. EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
  37. EVP_DigestUpdate(evpctx, machine_guid, GUID_LEN);
  38. EVP_DigestUpdate(evpctx, *legacy_uuid, sizeof(uuid_t));
  39. EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
  40. EVP_MD_CTX_destroy(evpctx);
  41. fatal_assert(hash_len > sizeof(uuid_t));
  42. memcpy(ret_uuid, hash_value, sizeof(uuid_t));
  43. }
  44. void rrdeng_metric_init(RRDDIM *rd, uuid_t *dim_uuid)
  45. {
  46. struct page_cache *pg_cache;
  47. struct rrdengine_instance *ctx;
  48. uuid_t legacy_uuid;
  49. uuid_t multihost_legacy_uuid;
  50. Pvoid_t *PValue;
  51. struct pg_cache_page_index *page_index = NULL;
  52. int is_multihost_child = 0;
  53. RRDHOST *host = rd->rrdset->rrdhost;
  54. ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
  55. if (unlikely(!ctx)) {
  56. error("Failed to fetch multidb context");
  57. return;
  58. }
  59. pg_cache = &ctx->pg_cache;
  60. rrdeng_generate_legacy_uuid(rd->id, rd->rrdset->id, &legacy_uuid);
  61. rd->state->metric_uuid = dim_uuid;
  62. if (host != localhost && host->rrdeng_ctx == &multidb_ctx)
  63. is_multihost_child = 1;
  64. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  65. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, &legacy_uuid, sizeof(uuid_t));
  66. if (likely(NULL != PValue)) {
  67. page_index = *PValue;
  68. }
  69. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  70. if (is_multihost_child || NULL == PValue) {
  71. /* First time we see the legacy UUID or metric belongs to child host in multi-host DB.
  72. * Drop legacy support, normal path */
  73. if (unlikely(!rd->state->metric_uuid))
  74. rd->state->metric_uuid = create_dimension_uuid(rd->rrdset, rd);
  75. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  76. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, rd->state->metric_uuid, sizeof(uuid_t));
  77. if (likely(NULL != PValue)) {
  78. page_index = *PValue;
  79. }
  80. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  81. if (NULL == PValue) {
  82. uv_rwlock_wrlock(&pg_cache->metrics_index.lock);
  83. PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, rd->state->metric_uuid, sizeof(uuid_t), PJE0);
  84. fatal_assert(NULL == *PValue); /* TODO: figure out concurrency model */
  85. *PValue = page_index = create_page_index(rd->state->metric_uuid);
  86. page_index->prev = pg_cache->metrics_index.last_page_index;
  87. pg_cache->metrics_index.last_page_index = page_index;
  88. uv_rwlock_wrunlock(&pg_cache->metrics_index.lock);
  89. }
  90. } else {
  91. /* There are legacy UUIDs in the database, implement backward compatibility */
  92. rrdeng_convert_legacy_uuid_to_multihost(rd->rrdset->rrdhost->machine_guid, &legacy_uuid,
  93. &multihost_legacy_uuid);
  94. if (unlikely(!rd->state->metric_uuid))
  95. rd->state->metric_uuid = mallocz(sizeof(uuid_t));
  96. int need_to_store = (dim_uuid == NULL || uuid_compare(*rd->state->metric_uuid, multihost_legacy_uuid));
  97. uuid_copy(*rd->state->metric_uuid, multihost_legacy_uuid);
  98. if (unlikely(need_to_store))
  99. (void)sql_store_dimension(rd->state->metric_uuid, rd->rrdset->chart_uuid, rd->id, rd->name, rd->multiplier, rd->divisor,
  100. rd->algorithm);
  101. }
  102. rd->state->rrdeng_uuid = &page_index->id;
  103. rd->state->page_index = page_index;
  104. }
  105. /*
  106. * Gets a handle for storing metrics to the database.
  107. * The handle must be released with rrdeng_store_metric_final().
  108. */
  109. void rrdeng_store_metric_init(RRDDIM *rd)
  110. {
  111. struct rrdeng_collect_handle *handle;
  112. struct rrdengine_instance *ctx;
  113. struct pg_cache_page_index *page_index;
  114. ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
  115. handle = &rd->state->handle.rrdeng;
  116. handle->ctx = ctx;
  117. handle->descr = NULL;
  118. handle->prev_descr = NULL;
  119. handle->unaligned_page = 0;
  120. page_index = rd->state->page_index;
  121. uv_rwlock_wrlock(&page_index->lock);
  122. ++page_index->writers;
  123. uv_rwlock_wrunlock(&page_index->lock);
  124. }
  125. /* The page must be populated and referenced */
  126. static int page_has_only_empty_metrics(struct rrdeng_page_descr *descr)
  127. {
  128. unsigned i;
  129. uint8_t has_only_empty_metrics = 1;
  130. storage_number *page;
  131. page = descr->pg_cache_descr->page;
  132. for (i = 0 ; i < descr->page_length / sizeof(storage_number); ++i) {
  133. if (SN_EMPTY_SLOT != page[i]) {
  134. has_only_empty_metrics = 0;
  135. break;
  136. }
  137. }
  138. return has_only_empty_metrics;
  139. }
  140. void rrdeng_store_metric_flush_current_page(RRDDIM *rd)
  141. {
  142. struct rrdeng_collect_handle *handle;
  143. struct rrdengine_instance *ctx;
  144. struct rrdeng_page_descr *descr;
  145. handle = &rd->state->handle.rrdeng;
  146. ctx = handle->ctx;
  147. if (unlikely(!ctx))
  148. return;
  149. descr = handle->descr;
  150. if (unlikely(NULL == descr)) {
  151. return;
  152. }
  153. if (likely(descr->page_length)) {
  154. int page_is_empty;
  155. rrd_stat_atomic_add(&ctx->stats.metric_API_producers, -1);
  156. if (handle->prev_descr) {
  157. /* unpin old second page */
  158. pg_cache_put(ctx, handle->prev_descr);
  159. }
  160. page_is_empty = page_has_only_empty_metrics(descr);
  161. if (page_is_empty) {
  162. debug(D_RRDENGINE, "Page has empty metrics only, deleting:");
  163. if (unlikely(debug_flags & D_RRDENGINE))
  164. print_page_cache_descr(descr);
  165. pg_cache_put(ctx, descr);
  166. pg_cache_punch_hole(ctx, descr, 1, 0, NULL);
  167. handle->prev_descr = NULL;
  168. } else {
  169. /*
  170. * Disable pinning for now as it leads to deadlocks. When a collector stops collecting the extra pinned page
  171. * eventually gets rotated but it cannot be destroyed due to the extra reference.
  172. */
  173. /* added 1 extra reference to keep 2 dirty pages pinned per metric, expected refcnt = 2 */
  174. /* rrdeng_page_descr_mutex_lock(ctx, descr);
  175. ret = pg_cache_try_get_unsafe(descr, 0);
  176. rrdeng_page_descr_mutex_unlock(ctx, descr);
  177. fatal_assert(1 == ret);*/
  178. rrdeng_commit_page(ctx, descr, handle->page_correlation_id);
  179. /* handle->prev_descr = descr;*/
  180. }
  181. } else {
  182. freez(descr->pg_cache_descr->page);
  183. rrdeng_destroy_pg_cache_descr(ctx, descr->pg_cache_descr);
  184. freez(descr);
  185. }
  186. handle->descr = NULL;
  187. }
  188. void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, storage_number number)
  189. {
  190. struct rrdeng_collect_handle *handle;
  191. struct rrdengine_instance *ctx;
  192. struct page_cache *pg_cache;
  193. struct rrdeng_page_descr *descr;
  194. storage_number *page;
  195. uint8_t must_flush_unaligned_page = 0, perfect_page_alignment = 0;
  196. handle = &rd->state->handle.rrdeng;
  197. ctx = handle->ctx;
  198. pg_cache = &ctx->pg_cache;
  199. descr = handle->descr;
  200. if (descr) {
  201. /* Make alignment decisions */
  202. if (descr->page_length == rd->rrdset->rrddim_page_alignment) {
  203. /* this is the leading dimension that defines chart alignment */
  204. perfect_page_alignment = 1;
  205. }
  206. /* is the metric far enough out of alignment with the others? */
  207. if (unlikely(descr->page_length + sizeof(number) < rd->rrdset->rrddim_page_alignment)) {
  208. handle->unaligned_page = 1;
  209. debug(D_RRDENGINE, "Metric page is not aligned with chart:");
  210. if (unlikely(debug_flags & D_RRDENGINE))
  211. print_page_cache_descr(descr);
  212. }
  213. if (unlikely(handle->unaligned_page &&
  214. /* did the other metrics change page? */
  215. rd->rrdset->rrddim_page_alignment <= sizeof(number))) {
  216. debug(D_RRDENGINE, "Flushing unaligned metric page.");
  217. must_flush_unaligned_page = 1;
  218. handle->unaligned_page = 0;
  219. }
  220. }
  221. if (unlikely(NULL == descr ||
  222. descr->page_length + sizeof(number) > RRDENG_BLOCK_SIZE ||
  223. must_flush_unaligned_page)) {
  224. rrdeng_store_metric_flush_current_page(rd);
  225. page = rrdeng_create_page(ctx, &rd->state->page_index->id, &descr);
  226. fatal_assert(page);
  227. handle->descr = descr;
  228. handle->page_correlation_id = rrd_atomic_fetch_add(&pg_cache->committed_page_index.latest_corr_id, 1);
  229. if (0 == rd->rrdset->rrddim_page_alignment) {
  230. /* this is the leading dimension that defines chart alignment */
  231. perfect_page_alignment = 1;
  232. }
  233. }
  234. page = descr->pg_cache_descr->page;
  235. page[descr->page_length / sizeof(number)] = number;
  236. pg_cache_atomic_set_pg_info(descr, point_in_time, descr->page_length + sizeof(number));
  237. if (perfect_page_alignment)
  238. rd->rrdset->rrddim_page_alignment = descr->page_length;
  239. if (unlikely(INVALID_TIME == descr->start_time)) {
  240. unsigned long new_metric_API_producers, old_metric_API_max_producers, ret_metric_API_max_producers;
  241. descr->start_time = point_in_time;
  242. new_metric_API_producers = rrd_atomic_add_fetch(&ctx->stats.metric_API_producers, 1);
  243. while (unlikely(new_metric_API_producers > (old_metric_API_max_producers = ctx->metric_API_max_producers))) {
  244. /* Increase ctx->metric_API_max_producers */
  245. ret_metric_API_max_producers = ulong_compare_and_swap(&ctx->metric_API_max_producers,
  246. old_metric_API_max_producers,
  247. new_metric_API_producers);
  248. if (old_metric_API_max_producers == ret_metric_API_max_producers) {
  249. /* success */
  250. break;
  251. }
  252. }
  253. pg_cache_insert(ctx, rd->state->page_index, descr);
  254. } else {
  255. pg_cache_add_new_metric_time(rd->state->page_index, descr);
  256. }
  257. }
  258. /*
  259. * Releases the database reference from the handle for storing metrics.
  260. * Returns 1 if it's safe to delete the dimension.
  261. */
  262. int rrdeng_store_metric_finalize(RRDDIM *rd)
  263. {
  264. struct rrdeng_collect_handle *handle;
  265. struct rrdengine_instance *ctx;
  266. struct pg_cache_page_index *page_index;
  267. uint8_t can_delete_metric = 0;
  268. handle = &rd->state->handle.rrdeng;
  269. ctx = handle->ctx;
  270. page_index = rd->state->page_index;
  271. rrdeng_store_metric_flush_current_page(rd);
  272. if (handle->prev_descr) {
  273. /* unpin old second page */
  274. pg_cache_put(ctx, handle->prev_descr);
  275. }
  276. uv_rwlock_wrlock(&page_index->lock);
  277. if (!--page_index->writers && !page_index->page_count) {
  278. can_delete_metric = 1;
  279. }
  280. uv_rwlock_wrunlock(&page_index->lock);
  281. return can_delete_metric;
  282. }
  283. /* Returns 1 if the data collection interval is well defined, 0 otherwise */
  284. static int metrics_with_known_interval(struct rrdeng_page_descr *descr)
  285. {
  286. unsigned page_entries;
  287. if (unlikely(INVALID_TIME == descr->start_time || INVALID_TIME == descr->end_time))
  288. return 0;
  289. page_entries = descr->page_length / sizeof(storage_number);
  290. if (likely(page_entries > 1)) {
  291. return 1;
  292. }
  293. return 0;
  294. }
  295. static inline uint32_t *pginfo_to_dt(struct rrdeng_page_info *page_info)
  296. {
  297. return (uint32_t *)&page_info->scratch[0];
  298. }
  299. static inline uint32_t *pginfo_to_points(struct rrdeng_page_info *page_info)
  300. {
  301. return (uint32_t *)&page_info->scratch[sizeof(uint32_t)];
  302. }
  303. /**
  304. * Calculates the regions of different data collection intervals in a netdata chart in the time range
  305. * [start_time,end_time]. This call takes the netdata chart read lock.
  306. * @param st the netdata chart whose data collection interval boundaries are calculated.
  307. * @param start_time inclusive starting time in usec
  308. * @param end_time inclusive ending time in usec
  309. * @param region_info_arrayp It allocates (*region_info_arrayp) and populates it with information of regions of a
  310. * reference dimension that that have different data collection intervals and overlap with the time range
  311. * [start_time,end_time]. The caller must free (*region_info_arrayp) with freez(). If region_info_arrayp is set
  312. * to NULL nothing was allocated.
  313. * @param max_intervalp is derefenced and set to be the largest data collection interval of all regions.
  314. * @return number of regions with different data collection intervals.
  315. */
  316. unsigned rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t end_time,
  317. struct rrdeng_region_info **region_info_arrayp, unsigned *max_intervalp, struct context_param *context_param_list)
  318. {
  319. struct pg_cache_page_index *page_index;
  320. struct rrdengine_instance *ctx;
  321. unsigned pages_nr;
  322. RRDDIM *rd_iter, *rd;
  323. struct rrdeng_page_info *page_info_array, *curr, *prev, *old_prev;
  324. unsigned i, j, page_entries, region_points, page_points, regions, max_interval;
  325. time_t now;
  326. usec_t dt, current_position_time, max_time = 0, min_time, curr_time, first_valid_time_in_page;
  327. struct rrdeng_region_info *region_info_array;
  328. uint8_t is_first_region_initialized;
  329. ctx = get_rrdeng_ctx_from_host(st->rrdhost);
  330. regions = 1;
  331. *max_intervalp = max_interval = 0;
  332. region_info_array = NULL;
  333. *region_info_arrayp = NULL;
  334. page_info_array = NULL;
  335. RRDDIM *temp_rd = context_param_list ? context_param_list->rd : NULL;
  336. rrdset_rdlock(st);
  337. for(rd_iter = temp_rd?temp_rd:st->dimensions, rd = NULL, min_time = (usec_t)-1 ; rd_iter ; rd_iter = rd_iter->next) {
  338. /*
  339. * Choose oldest dimension as reference. This is not equivalent to the union of all dimensions
  340. * but it is a best effort approximation with a bias towards older metrics in a chart. It
  341. * matches netdata behaviour in the sense that dimensions are generally aligned in a chart
  342. * and older dimensions contain more information about the time range. It does not work well
  343. * for metrics that have recently stopped being collected.
  344. */
  345. curr_time = pg_cache_oldest_time_in_range(ctx, rd_iter->state->rrdeng_uuid,
  346. start_time * USEC_PER_SEC, end_time * USEC_PER_SEC);
  347. if (INVALID_TIME != curr_time && curr_time < min_time) {
  348. rd = rd_iter;
  349. min_time = curr_time;
  350. }
  351. }
  352. rrdset_unlock(st);
  353. if (NULL == rd) {
  354. return 1;
  355. }
  356. pages_nr = pg_cache_preload(ctx, rd->state->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC,
  357. &page_info_array, &page_index);
  358. if (pages_nr) {
  359. /* conservative allocation, will reduce the size later if necessary */
  360. region_info_array = mallocz(sizeof(*region_info_array) * pages_nr);
  361. }
  362. is_first_region_initialized = 0;
  363. region_points = 0;
  364. /* pages loop */
  365. for (i = 0, curr = NULL, prev = NULL ; i < pages_nr ; ++i) {
  366. old_prev = prev;
  367. prev = curr;
  368. curr = &page_info_array[i];
  369. *pginfo_to_points(curr) = 0; /* initialize to invalid page */
  370. *pginfo_to_dt(curr) = 0; /* no known data collection interval yet */
  371. if (unlikely(INVALID_TIME == curr->start_time || INVALID_TIME == curr->end_time ||
  372. curr->end_time < curr->start_time)) {
  373. info("Ignoring page with invalid timestamps.");
  374. prev = old_prev;
  375. continue;
  376. }
  377. page_entries = curr->page_length / sizeof(storage_number);
  378. fatal_assert(0 != page_entries);
  379. if (likely(1 != page_entries)) {
  380. dt = (curr->end_time - curr->start_time) / (page_entries - 1);
  381. *pginfo_to_dt(curr) = ROUND_USEC_TO_SEC(dt);
  382. if (unlikely(0 == *pginfo_to_dt(curr)))
  383. *pginfo_to_dt(curr) = 1;
  384. } else {
  385. dt = 0;
  386. }
  387. for (j = 0, page_points = 0 ; j < page_entries ; ++j) {
  388. uint8_t is_metric_out_of_order, is_metric_earlier_than_range;
  389. is_metric_earlier_than_range = 0;
  390. is_metric_out_of_order = 0;
  391. current_position_time = curr->start_time + j * dt;
  392. now = current_position_time / USEC_PER_SEC;
  393. if (now > end_time) { /* there will be no more pages in the time range */
  394. break;
  395. }
  396. if (now < start_time)
  397. is_metric_earlier_than_range = 1;
  398. if (unlikely(current_position_time < max_time)) /* just went back in time */
  399. is_metric_out_of_order = 1;
  400. if (is_metric_earlier_than_range || unlikely(is_metric_out_of_order)) {
  401. if (unlikely(is_metric_out_of_order))
  402. info("Ignoring metric with out of order timestamp.");
  403. continue; /* next entry */
  404. }
  405. /* here is a valid metric */
  406. ++page_points;
  407. region_info_array[regions - 1].points = ++region_points;
  408. max_time = current_position_time;
  409. if (1 == page_points)
  410. first_valid_time_in_page = current_position_time;
  411. if (unlikely(!is_first_region_initialized)) {
  412. fatal_assert(1 == regions);
  413. /* this is the first region */
  414. region_info_array[0].start_time = current_position_time;
  415. is_first_region_initialized = 1;
  416. }
  417. }
  418. *pginfo_to_points(curr) = page_points;
  419. if (0 == page_points) {
  420. prev = old_prev;
  421. continue;
  422. }
  423. if (unlikely(0 == *pginfo_to_dt(curr))) { /* unknown data collection interval */
  424. fatal_assert(1 == page_points);
  425. if (likely(NULL != prev)) { /* get interval from previous page */
  426. *pginfo_to_dt(curr) = *pginfo_to_dt(prev);
  427. } else { /* there is no previous page in the query */
  428. struct rrdeng_page_info db_page_info;
  429. /* go to database */
  430. pg_cache_get_filtered_info_prev(ctx, page_index, curr->start_time,
  431. metrics_with_known_interval, &db_page_info);
  432. if (unlikely(db_page_info.start_time == INVALID_TIME || db_page_info.end_time == INVALID_TIME ||
  433. 0 == db_page_info.page_length)) { /* nothing in the database, default to update_every */
  434. *pginfo_to_dt(curr) = rd->update_every;
  435. } else {
  436. unsigned db_entries;
  437. usec_t db_dt;
  438. db_entries = db_page_info.page_length / sizeof(storage_number);
  439. db_dt = (db_page_info.end_time - db_page_info.start_time) / (db_entries - 1);
  440. *pginfo_to_dt(curr) = ROUND_USEC_TO_SEC(db_dt);
  441. if (unlikely(0 == *pginfo_to_dt(curr)))
  442. *pginfo_to_dt(curr) = 1;
  443. }
  444. }
  445. }
  446. if (likely(prev) && unlikely(*pginfo_to_dt(curr) != *pginfo_to_dt(prev))) {
  447. info("Data collection interval change detected in query: %"PRIu32" -> %"PRIu32,
  448. *pginfo_to_dt(prev), *pginfo_to_dt(curr));
  449. region_info_array[regions++ - 1].points -= page_points;
  450. region_info_array[regions - 1].points = region_points = page_points;
  451. region_info_array[regions - 1].start_time = first_valid_time_in_page;
  452. }
  453. if (*pginfo_to_dt(curr) > max_interval)
  454. max_interval = *pginfo_to_dt(curr);
  455. region_info_array[regions - 1].update_every = *pginfo_to_dt(curr);
  456. }
  457. if (page_info_array)
  458. freez(page_info_array);
  459. if (region_info_array) {
  460. if (likely(is_first_region_initialized)) {
  461. /* free unnecessary memory */
  462. region_info_array = reallocz(region_info_array, sizeof(*region_info_array) * regions);
  463. *region_info_arrayp = region_info_array;
  464. *max_intervalp = max_interval;
  465. } else {
  466. /* empty result */
  467. freez(region_info_array);
  468. }
  469. }
  470. return regions;
  471. }
  472. /*
  473. * Gets a handle for loading metrics from the database.
  474. * The handle must be released with rrdeng_load_metric_final().
  475. */
  476. void rrdeng_load_metric_init(RRDDIM *rd, struct rrddim_query_handle *rrdimm_handle, time_t start_time, time_t end_time)
  477. {
  478. struct rrdeng_query_handle *handle;
  479. struct rrdengine_instance *ctx;
  480. unsigned pages_nr;
  481. ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
  482. rrdimm_handle->start_time = start_time;
  483. rrdimm_handle->end_time = end_time;
  484. handle = &rrdimm_handle->rrdeng;
  485. handle->next_page_time = start_time;
  486. handle->now = start_time;
  487. handle->position = 0;
  488. handle->ctx = ctx;
  489. handle->descr = NULL;
  490. pages_nr = pg_cache_preload(ctx, rd->state->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC,
  491. NULL, &handle->page_index);
  492. if (unlikely(NULL == handle->page_index || 0 == pages_nr))
  493. /* there are no metrics to load */
  494. handle->next_page_time = INVALID_TIME;
  495. }
  496. /* Returns the metric and sets its timestamp into current_time */
  497. storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle, time_t *current_time)
  498. {
  499. struct rrdeng_query_handle *handle;
  500. struct rrdengine_instance *ctx;
  501. struct rrdeng_page_descr *descr;
  502. storage_number *page, ret;
  503. unsigned position, entries;
  504. usec_t next_page_time = 0, current_position_time, page_end_time = 0;
  505. uint32_t page_length;
  506. handle = &rrdimm_handle->rrdeng;
  507. if (unlikely(INVALID_TIME == handle->next_page_time)) {
  508. return SN_EMPTY_SLOT;
  509. }
  510. ctx = handle->ctx;
  511. if (unlikely(NULL == (descr = handle->descr))) {
  512. /* it's the first call */
  513. next_page_time = handle->next_page_time * USEC_PER_SEC;
  514. } else {
  515. pg_cache_atomic_get_pg_info(descr, &page_end_time, &page_length);
  516. }
  517. position = handle->position + 1;
  518. if (unlikely(NULL == descr ||
  519. position >= (page_length / sizeof(storage_number)))) {
  520. /* We need to get a new page */
  521. if (descr) {
  522. /* Drop old page's reference */
  523. #ifdef NETDATA_INTERNAL_CHECKS
  524. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
  525. #endif
  526. pg_cache_put(ctx, descr);
  527. handle->descr = NULL;
  528. handle->next_page_time = (page_end_time / USEC_PER_SEC) + 1;
  529. if (unlikely(handle->next_page_time > rrdimm_handle->end_time)) {
  530. goto no_more_metrics;
  531. }
  532. next_page_time = handle->next_page_time * USEC_PER_SEC;
  533. }
  534. descr = pg_cache_lookup_next(ctx, handle->page_index, &handle->page_index->id,
  535. next_page_time, rrdimm_handle->end_time * USEC_PER_SEC);
  536. if (NULL == descr) {
  537. goto no_more_metrics;
  538. }
  539. #ifdef NETDATA_INTERNAL_CHECKS
  540. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, 1);
  541. #endif
  542. handle->descr = descr;
  543. pg_cache_atomic_get_pg_info(descr, &page_end_time, &page_length);
  544. if (unlikely(INVALID_TIME == descr->start_time ||
  545. INVALID_TIME == page_end_time)) {
  546. goto no_more_metrics;
  547. }
  548. if (unlikely(descr->start_time != page_end_time && next_page_time > descr->start_time)) {
  549. /* we're in the middle of the page somewhere */
  550. entries = page_length / sizeof(storage_number);
  551. position = ((uint64_t)(next_page_time - descr->start_time)) * (entries - 1) /
  552. (page_end_time - descr->start_time);
  553. } else {
  554. position = 0;
  555. }
  556. }
  557. page = descr->pg_cache_descr->page;
  558. ret = page[position];
  559. entries = page_length / sizeof(storage_number);
  560. if (entries > 1) {
  561. usec_t dt;
  562. dt = (page_end_time - descr->start_time) / (entries - 1);
  563. current_position_time = descr->start_time + position * dt;
  564. } else {
  565. current_position_time = descr->start_time;
  566. }
  567. handle->position = position;
  568. handle->now = current_position_time / USEC_PER_SEC;
  569. /* fatal_assert(handle->now >= rrdimm_handle->start_time && handle->now <= rrdimm_handle->end_time);
  570. The above assertion is an approximation and needs to take update_every into account */
  571. if (unlikely(handle->now >= rrdimm_handle->end_time)) {
  572. /* next calls will not load any more metrics */
  573. handle->next_page_time = INVALID_TIME;
  574. }
  575. *current_time = handle->now;
  576. return ret;
  577. no_more_metrics:
  578. handle->next_page_time = INVALID_TIME;
  579. return SN_EMPTY_SLOT;
  580. }
  581. int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle)
  582. {
  583. struct rrdeng_query_handle *handle;
  584. handle = &rrdimm_handle->rrdeng;
  585. return (INVALID_TIME == handle->next_page_time);
  586. }
  587. /*
  588. * Releases the database reference from the handle for loading metrics.
  589. */
  590. void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle)
  591. {
  592. struct rrdeng_query_handle *handle;
  593. struct rrdengine_instance *ctx;
  594. struct rrdeng_page_descr *descr;
  595. handle = &rrdimm_handle->rrdeng;
  596. ctx = handle->ctx;
  597. descr = handle->descr;
  598. if (descr) {
  599. #ifdef NETDATA_INTERNAL_CHECKS
  600. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
  601. #endif
  602. pg_cache_put(ctx, descr);
  603. }
  604. }
  605. time_t rrdeng_metric_latest_time(RRDDIM *rd)
  606. {
  607. struct pg_cache_page_index *page_index;
  608. page_index = rd->state->page_index;
  609. return page_index->latest_time / USEC_PER_SEC;
  610. }
  611. time_t rrdeng_metric_oldest_time(RRDDIM *rd)
  612. {
  613. struct pg_cache_page_index *page_index;
  614. page_index = rd->state->page_index;
  615. return page_index->oldest_time / USEC_PER_SEC;
  616. }
  617. /* Also gets a reference for the page */
  618. void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrdeng_page_descr **ret_descr)
  619. {
  620. struct rrdeng_page_descr *descr;
  621. struct page_cache_descr *pg_cache_descr;
  622. void *page;
  623. /* TODO: check maximum number of pages in page cache limit */
  624. descr = pg_cache_create_descr();
  625. descr->id = id; /* TODO: add page type: metric, log, something? */
  626. page = mallocz(RRDENG_BLOCK_SIZE); /*TODO: add page size */
  627. rrdeng_page_descr_mutex_lock(ctx, descr);
  628. pg_cache_descr = descr->pg_cache_descr;
  629. pg_cache_descr->page = page;
  630. pg_cache_descr->flags = RRD_PAGE_DIRTY /*| RRD_PAGE_LOCKED */ | RRD_PAGE_POPULATED /* | BEING_COLLECTED */;
  631. pg_cache_descr->refcnt = 1;
  632. debug(D_RRDENGINE, "Created new page:");
  633. if (unlikely(debug_flags & D_RRDENGINE))
  634. print_page_cache_descr(descr);
  635. rrdeng_page_descr_mutex_unlock(ctx, descr);
  636. *ret_descr = descr;
  637. return page;
  638. }
  639. /* The page must not be empty */
  640. void rrdeng_commit_page(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr,
  641. Word_t page_correlation_id)
  642. {
  643. struct page_cache *pg_cache = &ctx->pg_cache;
  644. Pvoid_t *PValue;
  645. unsigned nr_committed_pages;
  646. if (unlikely(NULL == descr)) {
  647. debug(D_RRDENGINE, "%s: page descriptor is NULL, page has already been force-committed.", __func__);
  648. return;
  649. }
  650. fatal_assert(descr->page_length);
  651. uv_rwlock_wrlock(&pg_cache->committed_page_index.lock);
  652. PValue = JudyLIns(&pg_cache->committed_page_index.JudyL_array, page_correlation_id, PJE0);
  653. *PValue = descr;
  654. nr_committed_pages = ++pg_cache->committed_page_index.nr_committed_pages;
  655. uv_rwlock_wrunlock(&pg_cache->committed_page_index.lock);
  656. if (nr_committed_pages >= pg_cache_hard_limit(ctx) / 2) {
  657. /* over 50% of pages have not been committed yet */
  658. if (ctx->drop_metrics_under_page_cache_pressure &&
  659. nr_committed_pages >= pg_cache_committed_hard_limit(ctx)) {
  660. /* 100% of pages are dirty */
  661. struct rrdeng_cmd cmd;
  662. cmd.opcode = RRDENG_INVALIDATE_OLDEST_MEMORY_PAGE;
  663. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  664. } else {
  665. if (0 == (unsigned long) ctx->stats.pg_cache_over_half_dirty_events) {
  666. /* only print the first time */
  667. errno = 0;
  668. error("Failed to flush dirty buffers quickly enough in dbengine instance \"%s\". "
  669. "Metric data at risk of not being stored in the database, "
  670. "please reduce disk load or use a faster disk.", ctx->dbfiles_path);
  671. }
  672. rrd_stat_atomic_add(&ctx->stats.pg_cache_over_half_dirty_events, 1);
  673. rrd_stat_atomic_add(&global_pg_cache_over_half_dirty_events, 1);
  674. }
  675. }
  676. pg_cache_put(ctx, descr);
  677. }
  678. /* Gets a reference for the page */
  679. void *rrdeng_get_latest_page(struct rrdengine_instance *ctx, uuid_t *id, void **handle)
  680. {
  681. struct rrdeng_page_descr *descr;
  682. struct page_cache_descr *pg_cache_descr;
  683. debug(D_RRDENGINE, "Reading existing page:");
  684. descr = pg_cache_lookup(ctx, NULL, id, INVALID_TIME);
  685. if (NULL == descr) {
  686. *handle = NULL;
  687. return NULL;
  688. }
  689. *handle = descr;
  690. pg_cache_descr = descr->pg_cache_descr;
  691. return pg_cache_descr->page;
  692. }
  693. /* Gets a reference for the page */
  694. void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time, void **handle)
  695. {
  696. struct rrdeng_page_descr *descr;
  697. struct page_cache_descr *pg_cache_descr;
  698. debug(D_RRDENGINE, "Reading existing page:");
  699. descr = pg_cache_lookup(ctx, NULL, id, point_in_time);
  700. if (NULL == descr) {
  701. *handle = NULL;
  702. return NULL;
  703. }
  704. *handle = descr;
  705. pg_cache_descr = descr->pg_cache_descr;
  706. return pg_cache_descr->page;
  707. }
  708. /*
  709. * Gathers Database Engine statistics.
  710. * Careful when modifying this function.
  711. * You must not change the indices of the statistics or user code will break.
  712. * You must not exceed RRDENG_NR_STATS or it will crash.
  713. */
  714. void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array)
  715. {
  716. if (ctx == NULL)
  717. return;
  718. struct page_cache *pg_cache = &ctx->pg_cache;
  719. array[0] = (uint64_t)ctx->stats.metric_API_producers;
  720. array[1] = (uint64_t)ctx->stats.metric_API_consumers;
  721. array[2] = (uint64_t)pg_cache->page_descriptors;
  722. array[3] = (uint64_t)pg_cache->populated_pages;
  723. array[4] = (uint64_t)pg_cache->committed_page_index.nr_committed_pages;
  724. array[5] = (uint64_t)ctx->stats.pg_cache_insertions;
  725. array[6] = (uint64_t)ctx->stats.pg_cache_deletions;
  726. array[7] = (uint64_t)ctx->stats.pg_cache_hits;
  727. array[8] = (uint64_t)ctx->stats.pg_cache_misses;
  728. array[9] = (uint64_t)ctx->stats.pg_cache_backfills;
  729. array[10] = (uint64_t)ctx->stats.pg_cache_evictions;
  730. array[11] = (uint64_t)ctx->stats.before_compress_bytes;
  731. array[12] = (uint64_t)ctx->stats.after_compress_bytes;
  732. array[13] = (uint64_t)ctx->stats.before_decompress_bytes;
  733. array[14] = (uint64_t)ctx->stats.after_decompress_bytes;
  734. array[15] = (uint64_t)ctx->stats.io_write_bytes;
  735. array[16] = (uint64_t)ctx->stats.io_write_requests;
  736. array[17] = (uint64_t)ctx->stats.io_read_bytes;
  737. array[18] = (uint64_t)ctx->stats.io_read_requests;
  738. array[19] = (uint64_t)ctx->stats.io_write_extent_bytes;
  739. array[20] = (uint64_t)ctx->stats.io_write_extents;
  740. array[21] = (uint64_t)ctx->stats.io_read_extent_bytes;
  741. array[22] = (uint64_t)ctx->stats.io_read_extents;
  742. array[23] = (uint64_t)ctx->stats.datafile_creations;
  743. array[24] = (uint64_t)ctx->stats.datafile_deletions;
  744. array[25] = (uint64_t)ctx->stats.journalfile_creations;
  745. array[26] = (uint64_t)ctx->stats.journalfile_deletions;
  746. array[27] = (uint64_t)ctx->stats.page_cache_descriptors;
  747. array[28] = (uint64_t)ctx->stats.io_errors;
  748. array[29] = (uint64_t)ctx->stats.fs_errors;
  749. array[30] = (uint64_t)global_io_errors;
  750. array[31] = (uint64_t)global_fs_errors;
  751. array[32] = (uint64_t)rrdeng_reserved_file_descriptors;
  752. array[33] = (uint64_t)ctx->stats.pg_cache_over_half_dirty_events;
  753. array[34] = (uint64_t)global_pg_cache_over_half_dirty_events;
  754. array[35] = (uint64_t)ctx->stats.flushing_pressure_page_deletions;
  755. array[36] = (uint64_t)global_flushing_pressure_page_deletions;
  756. fatal_assert(RRDENG_NR_STATS == 37);
  757. }
  758. /* Releases reference to page */
  759. void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle)
  760. {
  761. (void)ctx;
  762. pg_cache_put(ctx, (struct rrdeng_page_descr *)handle);
  763. }
  764. /*
  765. * Returns 0 on success, negative on error
  766. */
  767. int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb,
  768. unsigned disk_space_mb)
  769. {
  770. struct rrdengine_instance *ctx;
  771. int error;
  772. uint32_t max_open_files;
  773. max_open_files = rlimit_nofile.rlim_cur / 4;
  774. /* reserve RRDENG_FD_BUDGET_PER_INSTANCE file descriptors for this instance */
  775. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, RRDENG_FD_BUDGET_PER_INSTANCE);
  776. if (rrdeng_reserved_file_descriptors > max_open_files) {
  777. error(
  778. "Exceeded the budget of available file descriptors (%u/%u), cannot create new dbengine instance.",
  779. (unsigned)rrdeng_reserved_file_descriptors, (unsigned)max_open_files);
  780. rrd_stat_atomic_add(&global_fs_errors, 1);
  781. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  782. return UV_EMFILE;
  783. }
  784. if (NULL == ctxp) {
  785. ctx = &multidb_ctx;
  786. memset(ctx, 0, sizeof(*ctx));
  787. } else {
  788. *ctxp = ctx = callocz(1, sizeof(*ctx));
  789. }
  790. ctx->global_compress_alg = RRD_LZ4;
  791. if (page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB)
  792. page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
  793. ctx->max_cache_pages = page_cache_mb * (1048576LU / RRDENG_BLOCK_SIZE);
  794. /* try to keep 5% of the page cache free */
  795. ctx->cache_pages_low_watermark = (ctx->max_cache_pages * 95LLU) / 100;
  796. if (disk_space_mb < RRDENG_MIN_DISK_SPACE_MB)
  797. disk_space_mb = RRDENG_MIN_DISK_SPACE_MB;
  798. ctx->max_disk_space = disk_space_mb * 1048576LLU;
  799. strncpyz(ctx->dbfiles_path, dbfiles_path, sizeof(ctx->dbfiles_path) - 1);
  800. ctx->dbfiles_path[sizeof(ctx->dbfiles_path) - 1] = '\0';
  801. if (NULL == host)
  802. strncpyz(ctx->machine_guid, registry_get_this_machine_guid(), GUID_LEN);
  803. else
  804. strncpyz(ctx->machine_guid, host->machine_guid, GUID_LEN);
  805. ctx->drop_metrics_under_page_cache_pressure = rrdeng_drop_metrics_under_page_cache_pressure;
  806. ctx->metric_API_max_producers = 0;
  807. ctx->quiesce = NO_QUIESCE;
  808. ctx->metalog_ctx = NULL; /* only set this after the metadata log has finished initializing */
  809. ctx->host = host;
  810. memset(&ctx->worker_config, 0, sizeof(ctx->worker_config));
  811. ctx->worker_config.ctx = ctx;
  812. init_page_cache(ctx);
  813. init_commit_log(ctx);
  814. error = init_rrd_files(ctx);
  815. if (error) {
  816. goto error_after_init_rrd_files;
  817. }
  818. init_completion(&ctx->rrdengine_completion);
  819. fatal_assert(0 == uv_thread_create(&ctx->worker_config.thread, rrdeng_worker, &ctx->worker_config));
  820. /* wait for worker thread to initialize */
  821. wait_for_completion(&ctx->rrdengine_completion);
  822. destroy_completion(&ctx->rrdengine_completion);
  823. uv_thread_set_name_np(ctx->worker_config.thread, "DBENGINE");
  824. if (ctx->worker_config.error) {
  825. goto error_after_rrdeng_worker;
  826. }
  827. error = metalog_init(ctx);
  828. if (error) {
  829. error("Failed to initialize metadata log file event loop.");
  830. goto error_after_rrdeng_worker;
  831. }
  832. return 0;
  833. error_after_rrdeng_worker:
  834. finalize_rrd_files(ctx);
  835. error_after_init_rrd_files:
  836. free_page_cache(ctx);
  837. if (ctx != &multidb_ctx) {
  838. freez(ctx);
  839. *ctxp = NULL;
  840. }
  841. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  842. return UV_EIO;
  843. }
  844. /*
  845. * Returns 0 on success, 1 on error
  846. */
  847. int rrdeng_exit(struct rrdengine_instance *ctx)
  848. {
  849. struct rrdeng_cmd cmd;
  850. if (NULL == ctx) {
  851. return 1;
  852. }
  853. /* TODO: add page to page cache */
  854. cmd.opcode = RRDENG_SHUTDOWN;
  855. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  856. fatal_assert(0 == uv_thread_join(&ctx->worker_config.thread));
  857. finalize_rrd_files(ctx);
  858. //metalog_exit(ctx->metalog_ctx);
  859. free_page_cache(ctx);
  860. if (ctx != &multidb_ctx) {
  861. freez(ctx);
  862. }
  863. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  864. return 0;
  865. }
  866. void rrdeng_prepare_exit(struct rrdengine_instance *ctx)
  867. {
  868. struct rrdeng_cmd cmd;
  869. if (NULL == ctx) {
  870. return;
  871. }
  872. init_completion(&ctx->rrdengine_completion);
  873. cmd.opcode = RRDENG_QUIESCE;
  874. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  875. /* wait for dbengine to quiesce */
  876. wait_for_completion(&ctx->rrdengine_completion);
  877. destroy_completion(&ctx->rrdengine_completion);
  878. //metalog_prepare_exit(ctx->metalog_ctx);
  879. }