rrdengineapi.c 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrdengine.h"
  3. /* Default global database instance */
  4. struct rrdengine_instance multidb_ctx;
  5. int default_rrdeng_page_cache_mb = 32;
  6. int default_rrdeng_disk_quota_mb = 256;
  7. int default_multidb_disk_quota_mb = 256;
  8. /* Default behaviour is to unblock data collection if the page cache is full of dirty pages by dropping metrics */
  9. uint8_t rrdeng_drop_metrics_under_page_cache_pressure = 1;
  10. static inline struct rrdengine_instance *get_rrdeng_ctx_from_host(RRDHOST *host)
  11. {
  12. return host->rrdeng_ctx;
  13. }
  14. /* This UUID is not unique across hosts */
  15. void rrdeng_generate_legacy_uuid(const char *dim_id, char *chart_id, uuid_t *ret_uuid)
  16. {
  17. EVP_MD_CTX *evpctx;
  18. unsigned char hash_value[EVP_MAX_MD_SIZE];
  19. unsigned int hash_len;
  20. evpctx = EVP_MD_CTX_create();
  21. EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
  22. EVP_DigestUpdate(evpctx, dim_id, strlen(dim_id));
  23. EVP_DigestUpdate(evpctx, chart_id, strlen(chart_id));
  24. EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
  25. EVP_MD_CTX_destroy(evpctx);
  26. fatal_assert(hash_len > sizeof(uuid_t));
  27. memcpy(ret_uuid, hash_value, sizeof(uuid_t));
  28. }
  29. /* Transform legacy UUID to be unique across hosts deterministacally */
  30. void rrdeng_convert_legacy_uuid_to_multihost(char machine_guid[GUID_LEN + 1], uuid_t *legacy_uuid, uuid_t *ret_uuid)
  31. {
  32. EVP_MD_CTX *evpctx;
  33. unsigned char hash_value[EVP_MAX_MD_SIZE];
  34. unsigned int hash_len;
  35. evpctx = EVP_MD_CTX_create();
  36. EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
  37. EVP_DigestUpdate(evpctx, machine_guid, GUID_LEN);
  38. EVP_DigestUpdate(evpctx, *legacy_uuid, sizeof(uuid_t));
  39. EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
  40. EVP_MD_CTX_destroy(evpctx);
  41. fatal_assert(hash_len > sizeof(uuid_t));
  42. memcpy(ret_uuid, hash_value, sizeof(uuid_t));
  43. }
  44. void rrdeng_metric_init(RRDDIM *rd, uuid_t *dim_uuid)
  45. {
  46. struct page_cache *pg_cache;
  47. struct rrdengine_instance *ctx;
  48. uuid_t legacy_uuid;
  49. Pvoid_t *PValue;
  50. struct pg_cache_page_index *page_index = NULL;
  51. int replace_instead_of_generate = 0, is_multihost_child = 0;
  52. RRDHOST *host = rd->rrdset->rrdhost;
  53. ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
  54. if (unlikely(!ctx)) {
  55. error("Failed to fetch multidb context");
  56. return;
  57. }
  58. pg_cache = &ctx->pg_cache;
  59. rrdeng_generate_legacy_uuid(rd->id, rd->rrdset->id, &legacy_uuid);
  60. rd->state->metric_uuid = callocz(1, sizeof(uuid_t));
  61. if (host != localhost && host->rrdeng_ctx == &multidb_ctx)
  62. is_multihost_child = 1;
  63. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  64. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, &legacy_uuid, sizeof(uuid_t));
  65. if (likely(NULL != PValue)) {
  66. page_index = *PValue;
  67. }
  68. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  69. if (is_multihost_child || NULL == PValue) {
  70. /* First time we see the legacy UUID or metric belongs to child host in multi-host DB.
  71. * Drop legacy support, normal path */
  72. if (NULL != dim_uuid) {
  73. replace_instead_of_generate = 1;
  74. uuid_copy(*rd->state->metric_uuid, *dim_uuid);
  75. }
  76. if (unlikely(find_or_generate_guid(rd, rd->state->metric_uuid, GUID_TYPE_DIMENSION,
  77. replace_instead_of_generate))) {
  78. errno = 0;
  79. error("FAILED to reuse GUID for %s", rd->id);
  80. if (unlikely(find_or_generate_guid(rd, rd->state->metric_uuid, GUID_TYPE_DIMENSION, 0))) {
  81. errno = 0;
  82. error("FAILED to generate GUID for %s", rd->id);
  83. freez(rd->state->metric_uuid);
  84. rd->state->metric_uuid = NULL;
  85. fatal_assert(0);
  86. }
  87. }
  88. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  89. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, rd->state->metric_uuid, sizeof(uuid_t));
  90. if (likely(NULL != PValue)) {
  91. page_index = *PValue;
  92. }
  93. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  94. if (NULL == PValue) {
  95. uv_rwlock_wrlock(&pg_cache->metrics_index.lock);
  96. PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, rd->state->metric_uuid, sizeof(uuid_t), PJE0);
  97. fatal_assert(NULL == *PValue); /* TODO: figure out concurrency model */
  98. *PValue = page_index = create_page_index(rd->state->metric_uuid);
  99. page_index->prev = pg_cache->metrics_index.last_page_index;
  100. pg_cache->metrics_index.last_page_index = page_index;
  101. uv_rwlock_wrunlock(&pg_cache->metrics_index.lock);
  102. }
  103. } else {
  104. /* There are legacy UUIDs in the database, implement backward compatibility */
  105. rrdeng_convert_legacy_uuid_to_multihost(rd->rrdset->rrdhost->machine_guid, &legacy_uuid,
  106. rd->state->metric_uuid);
  107. if (dim_uuid && uuid_compare(*rd->state->metric_uuid, *dim_uuid)) {
  108. error("Mismatch of metadata log DIMENSION GUID with dbengine metric GUID.");
  109. }
  110. if (unlikely(find_or_generate_guid(rd, rd->state->metric_uuid, GUID_TYPE_DIMENSION, 1))) {
  111. errno = 0;
  112. error("FAILED to generate GUID for %s", rd->id);
  113. freez(rd->state->metric_uuid);
  114. rd->state->metric_uuid = NULL;
  115. fatal_assert(0);
  116. }
  117. }
  118. rd->state->rrdeng_uuid = &page_index->id;
  119. rd->state->page_index = page_index;
  120. rd->state->compaction_id = 0;
  121. }
  122. /*
  123. * Gets a handle for storing metrics to the database.
  124. * The handle must be released with rrdeng_store_metric_final().
  125. */
  126. void rrdeng_store_metric_init(RRDDIM *rd)
  127. {
  128. struct rrdeng_collect_handle *handle;
  129. struct rrdengine_instance *ctx;
  130. struct pg_cache_page_index *page_index;
  131. ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
  132. handle = &rd->state->handle.rrdeng;
  133. handle->ctx = ctx;
  134. handle->descr = NULL;
  135. handle->prev_descr = NULL;
  136. handle->unaligned_page = 0;
  137. page_index = rd->state->page_index;
  138. uv_rwlock_wrlock(&page_index->lock);
  139. ++page_index->writers;
  140. uv_rwlock_wrunlock(&page_index->lock);
  141. }
  142. /* The page must be populated and referenced */
  143. static int page_has_only_empty_metrics(struct rrdeng_page_descr *descr)
  144. {
  145. unsigned i;
  146. uint8_t has_only_empty_metrics = 1;
  147. storage_number *page;
  148. page = descr->pg_cache_descr->page;
  149. for (i = 0 ; i < descr->page_length / sizeof(storage_number); ++i) {
  150. if (SN_EMPTY_SLOT != page[i]) {
  151. has_only_empty_metrics = 0;
  152. break;
  153. }
  154. }
  155. return has_only_empty_metrics;
  156. }
  157. void rrdeng_store_metric_flush_current_page(RRDDIM *rd)
  158. {
  159. struct rrdeng_collect_handle *handle;
  160. struct rrdengine_instance *ctx;
  161. struct rrdeng_page_descr *descr;
  162. handle = &rd->state->handle.rrdeng;
  163. ctx = handle->ctx;
  164. if (unlikely(!ctx))
  165. return;
  166. descr = handle->descr;
  167. if (unlikely(NULL == descr)) {
  168. return;
  169. }
  170. if (likely(descr->page_length)) {
  171. int page_is_empty;
  172. rrd_stat_atomic_add(&ctx->stats.metric_API_producers, -1);
  173. if (handle->prev_descr) {
  174. /* unpin old second page */
  175. pg_cache_put(ctx, handle->prev_descr);
  176. }
  177. page_is_empty = page_has_only_empty_metrics(descr);
  178. if (page_is_empty) {
  179. debug(D_RRDENGINE, "Page has empty metrics only, deleting:");
  180. if (unlikely(debug_flags & D_RRDENGINE))
  181. print_page_cache_descr(descr);
  182. pg_cache_put(ctx, descr);
  183. pg_cache_punch_hole(ctx, descr, 1, 0, NULL);
  184. handle->prev_descr = NULL;
  185. } else {
  186. /*
  187. * Disable pinning for now as it leads to deadlocks. When a collector stops collecting the extra pinned page
  188. * eventually gets rotated but it cannot be destroyed due to the extra reference.
  189. */
  190. /* added 1 extra reference to keep 2 dirty pages pinned per metric, expected refcnt = 2 */
  191. /* rrdeng_page_descr_mutex_lock(ctx, descr);
  192. ret = pg_cache_try_get_unsafe(descr, 0);
  193. rrdeng_page_descr_mutex_unlock(ctx, descr);
  194. fatal_assert(1 == ret);*/
  195. rrdeng_commit_page(ctx, descr, handle->page_correlation_id);
  196. /* handle->prev_descr = descr;*/
  197. }
  198. } else {
  199. freez(descr->pg_cache_descr->page);
  200. rrdeng_destroy_pg_cache_descr(ctx, descr->pg_cache_descr);
  201. freez(descr);
  202. }
  203. handle->descr = NULL;
  204. }
  205. void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, storage_number number)
  206. {
  207. struct rrdeng_collect_handle *handle;
  208. struct rrdengine_instance *ctx;
  209. struct page_cache *pg_cache;
  210. struct rrdeng_page_descr *descr;
  211. storage_number *page;
  212. uint8_t must_flush_unaligned_page = 0, perfect_page_alignment = 0;
  213. handle = &rd->state->handle.rrdeng;
  214. ctx = handle->ctx;
  215. pg_cache = &ctx->pg_cache;
  216. descr = handle->descr;
  217. if (descr) {
  218. /* Make alignment decisions */
  219. if (descr->page_length == rd->rrdset->rrddim_page_alignment) {
  220. /* this is the leading dimension that defines chart alignment */
  221. perfect_page_alignment = 1;
  222. }
  223. /* is the metric far enough out of alignment with the others? */
  224. if (unlikely(descr->page_length + sizeof(number) < rd->rrdset->rrddim_page_alignment)) {
  225. handle->unaligned_page = 1;
  226. debug(D_RRDENGINE, "Metric page is not aligned with chart:");
  227. if (unlikely(debug_flags & D_RRDENGINE))
  228. print_page_cache_descr(descr);
  229. }
  230. if (unlikely(handle->unaligned_page &&
  231. /* did the other metrics change page? */
  232. rd->rrdset->rrddim_page_alignment <= sizeof(number))) {
  233. debug(D_RRDENGINE, "Flushing unaligned metric page.");
  234. must_flush_unaligned_page = 1;
  235. handle->unaligned_page = 0;
  236. }
  237. }
  238. if (unlikely(NULL == descr ||
  239. descr->page_length + sizeof(number) > RRDENG_BLOCK_SIZE ||
  240. must_flush_unaligned_page)) {
  241. rrdeng_store_metric_flush_current_page(rd);
  242. page = rrdeng_create_page(ctx, &rd->state->page_index->id, &descr);
  243. fatal_assert(page);
  244. handle->descr = descr;
  245. handle->page_correlation_id = rrd_atomic_fetch_add(&pg_cache->committed_page_index.latest_corr_id, 1);
  246. if (0 == rd->rrdset->rrddim_page_alignment) {
  247. /* this is the leading dimension that defines chart alignment */
  248. perfect_page_alignment = 1;
  249. }
  250. }
  251. page = descr->pg_cache_descr->page;
  252. page[descr->page_length / sizeof(number)] = number;
  253. pg_cache_atomic_set_pg_info(descr, point_in_time, descr->page_length + sizeof(number));
  254. if (perfect_page_alignment)
  255. rd->rrdset->rrddim_page_alignment = descr->page_length;
  256. if (unlikely(INVALID_TIME == descr->start_time)) {
  257. unsigned long new_metric_API_producers, old_metric_API_max_producers, ret_metric_API_max_producers;
  258. descr->start_time = point_in_time;
  259. new_metric_API_producers = rrd_atomic_add_fetch(&ctx->stats.metric_API_producers, 1);
  260. while (unlikely(new_metric_API_producers > (old_metric_API_max_producers = ctx->metric_API_max_producers))) {
  261. /* Increase ctx->metric_API_max_producers */
  262. ret_metric_API_max_producers = ulong_compare_and_swap(&ctx->metric_API_max_producers,
  263. old_metric_API_max_producers,
  264. new_metric_API_producers);
  265. if (old_metric_API_max_producers == ret_metric_API_max_producers) {
  266. /* success */
  267. break;
  268. }
  269. }
  270. pg_cache_insert(ctx, rd->state->page_index, descr);
  271. } else {
  272. pg_cache_add_new_metric_time(rd->state->page_index, descr);
  273. }
  274. }
  275. /*
  276. * Releases the database reference from the handle for storing metrics.
  277. * Returns 1 if it's safe to delete the dimension.
  278. */
  279. int rrdeng_store_metric_finalize(RRDDIM *rd)
  280. {
  281. struct rrdeng_collect_handle *handle;
  282. struct rrdengine_instance *ctx;
  283. struct pg_cache_page_index *page_index;
  284. uint8_t can_delete_metric = 0;
  285. handle = &rd->state->handle.rrdeng;
  286. ctx = handle->ctx;
  287. page_index = rd->state->page_index;
  288. rrdeng_store_metric_flush_current_page(rd);
  289. if (handle->prev_descr) {
  290. /* unpin old second page */
  291. pg_cache_put(ctx, handle->prev_descr);
  292. }
  293. uv_rwlock_wrlock(&page_index->lock);
  294. if (!--page_index->writers && !page_index->page_count) {
  295. can_delete_metric = 1;
  296. }
  297. uv_rwlock_wrunlock(&page_index->lock);
  298. return can_delete_metric;
  299. }
  300. /* Returns 1 if the data collection interval is well defined, 0 otherwise */
  301. static int metrics_with_known_interval(struct rrdeng_page_descr *descr)
  302. {
  303. unsigned page_entries;
  304. if (unlikely(INVALID_TIME == descr->start_time || INVALID_TIME == descr->end_time))
  305. return 0;
  306. page_entries = descr->page_length / sizeof(storage_number);
  307. if (likely(page_entries > 1)) {
  308. return 1;
  309. }
  310. return 0;
  311. }
  312. static inline uint32_t *pginfo_to_dt(struct rrdeng_page_info *page_info)
  313. {
  314. return (uint32_t *)&page_info->scratch[0];
  315. }
  316. static inline uint32_t *pginfo_to_points(struct rrdeng_page_info *page_info)
  317. {
  318. return (uint32_t *)&page_info->scratch[sizeof(uint32_t)];
  319. }
  320. /**
  321. * Calculates the regions of different data collection intervals in a netdata chart in the time range
  322. * [start_time,end_time]. This call takes the netdata chart read lock.
  323. * @param st the netdata chart whose data collection interval boundaries are calculated.
  324. * @param start_time inclusive starting time in usec
  325. * @param end_time inclusive ending time in usec
  326. * @param region_info_arrayp It allocates (*region_info_arrayp) and populates it with information of regions of a
  327. * reference dimension that that have different data collection intervals and overlap with the time range
  328. * [start_time,end_time]. The caller must free (*region_info_arrayp) with freez(). If region_info_arrayp is set
  329. * to NULL nothing was allocated.
  330. * @param max_intervalp is derefenced and set to be the largest data collection interval of all regions.
  331. * @return number of regions with different data collection intervals.
  332. */
  333. unsigned rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t end_time,
  334. struct rrdeng_region_info **region_info_arrayp, unsigned *max_intervalp, struct context_param *context_param_list)
  335. {
  336. struct pg_cache_page_index *page_index;
  337. struct rrdengine_instance *ctx;
  338. unsigned pages_nr;
  339. RRDDIM *rd_iter, *rd;
  340. struct rrdeng_page_info *page_info_array, *curr, *prev, *old_prev;
  341. unsigned i, j, page_entries, region_points, page_points, regions, max_interval;
  342. time_t now;
  343. usec_t dt, current_position_time, max_time = 0, min_time, curr_time, first_valid_time_in_page;
  344. struct rrdeng_region_info *region_info_array;
  345. uint8_t is_first_region_initialized;
  346. ctx = get_rrdeng_ctx_from_host(st->rrdhost);
  347. regions = 1;
  348. *max_intervalp = max_interval = 0;
  349. region_info_array = NULL;
  350. *region_info_arrayp = NULL;
  351. page_info_array = NULL;
  352. RRDDIM *temp_rd = context_param_list ? context_param_list->rd : NULL;
  353. rrdset_rdlock(st);
  354. for(rd_iter = temp_rd?temp_rd:st->dimensions, rd = NULL, min_time = (usec_t)-1 ; rd_iter ; rd_iter = rd_iter->next) {
  355. /*
  356. * Choose oldest dimension as reference. This is not equivalent to the union of all dimensions
  357. * but it is a best effort approximation with a bias towards older metrics in a chart. It
  358. * matches netdata behaviour in the sense that dimensions are generally aligned in a chart
  359. * and older dimensions contain more information about the time range. It does not work well
  360. * for metrics that have recently stopped being collected.
  361. */
  362. curr_time = pg_cache_oldest_time_in_range(ctx, rd_iter->state->rrdeng_uuid,
  363. start_time * USEC_PER_SEC, end_time * USEC_PER_SEC);
  364. if (INVALID_TIME != curr_time && curr_time < min_time) {
  365. rd = rd_iter;
  366. min_time = curr_time;
  367. }
  368. }
  369. rrdset_unlock(st);
  370. if (NULL == rd) {
  371. return 1;
  372. }
  373. pages_nr = pg_cache_preload(ctx, rd->state->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC,
  374. &page_info_array, &page_index);
  375. if (pages_nr) {
  376. /* conservative allocation, will reduce the size later if necessary */
  377. region_info_array = mallocz(sizeof(*region_info_array) * pages_nr);
  378. }
  379. is_first_region_initialized = 0;
  380. region_points = 0;
  381. /* pages loop */
  382. for (i = 0, curr = NULL, prev = NULL ; i < pages_nr ; ++i) {
  383. old_prev = prev;
  384. prev = curr;
  385. curr = &page_info_array[i];
  386. *pginfo_to_points(curr) = 0; /* initialize to invalid page */
  387. *pginfo_to_dt(curr) = 0; /* no known data collection interval yet */
  388. if (unlikely(INVALID_TIME == curr->start_time || INVALID_TIME == curr->end_time ||
  389. curr->end_time < curr->start_time)) {
  390. info("Ignoring page with invalid timestamps.");
  391. prev = old_prev;
  392. continue;
  393. }
  394. page_entries = curr->page_length / sizeof(storage_number);
  395. fatal_assert(0 != page_entries);
  396. if (likely(1 != page_entries)) {
  397. dt = (curr->end_time - curr->start_time) / (page_entries - 1);
  398. *pginfo_to_dt(curr) = ROUND_USEC_TO_SEC(dt);
  399. if (unlikely(0 == *pginfo_to_dt(curr)))
  400. *pginfo_to_dt(curr) = 1;
  401. } else {
  402. dt = 0;
  403. }
  404. for (j = 0, page_points = 0 ; j < page_entries ; ++j) {
  405. uint8_t is_metric_out_of_order, is_metric_earlier_than_range;
  406. is_metric_earlier_than_range = 0;
  407. is_metric_out_of_order = 0;
  408. current_position_time = curr->start_time + j * dt;
  409. now = current_position_time / USEC_PER_SEC;
  410. if (now > end_time) { /* there will be no more pages in the time range */
  411. break;
  412. }
  413. if (now < start_time)
  414. is_metric_earlier_than_range = 1;
  415. if (unlikely(current_position_time < max_time)) /* just went back in time */
  416. is_metric_out_of_order = 1;
  417. if (is_metric_earlier_than_range || unlikely(is_metric_out_of_order)) {
  418. if (unlikely(is_metric_out_of_order))
  419. info("Ignoring metric with out of order timestamp.");
  420. continue; /* next entry */
  421. }
  422. /* here is a valid metric */
  423. ++page_points;
  424. region_info_array[regions - 1].points = ++region_points;
  425. max_time = current_position_time;
  426. if (1 == page_points)
  427. first_valid_time_in_page = current_position_time;
  428. if (unlikely(!is_first_region_initialized)) {
  429. fatal_assert(1 == regions);
  430. /* this is the first region */
  431. region_info_array[0].start_time = current_position_time;
  432. is_first_region_initialized = 1;
  433. }
  434. }
  435. *pginfo_to_points(curr) = page_points;
  436. if (0 == page_points) {
  437. prev = old_prev;
  438. continue;
  439. }
  440. if (unlikely(0 == *pginfo_to_dt(curr))) { /* unknown data collection interval */
  441. fatal_assert(1 == page_points);
  442. if (likely(NULL != prev)) { /* get interval from previous page */
  443. *pginfo_to_dt(curr) = *pginfo_to_dt(prev);
  444. } else { /* there is no previous page in the query */
  445. struct rrdeng_page_info db_page_info;
  446. /* go to database */
  447. pg_cache_get_filtered_info_prev(ctx, page_index, curr->start_time,
  448. metrics_with_known_interval, &db_page_info);
  449. if (unlikely(db_page_info.start_time == INVALID_TIME || db_page_info.end_time == INVALID_TIME ||
  450. 0 == db_page_info.page_length)) { /* nothing in the database, default to update_every */
  451. *pginfo_to_dt(curr) = rd->update_every;
  452. } else {
  453. unsigned db_entries;
  454. usec_t db_dt;
  455. db_entries = db_page_info.page_length / sizeof(storage_number);
  456. db_dt = (db_page_info.end_time - db_page_info.start_time) / (db_entries - 1);
  457. *pginfo_to_dt(curr) = ROUND_USEC_TO_SEC(db_dt);
  458. if (unlikely(0 == *pginfo_to_dt(curr)))
  459. *pginfo_to_dt(curr) = 1;
  460. }
  461. }
  462. }
  463. if (likely(prev) && unlikely(*pginfo_to_dt(curr) != *pginfo_to_dt(prev))) {
  464. info("Data collection interval change detected in query: %"PRIu32" -> %"PRIu32,
  465. *pginfo_to_dt(prev), *pginfo_to_dt(curr));
  466. region_info_array[regions++ - 1].points -= page_points;
  467. region_info_array[regions - 1].points = region_points = page_points;
  468. region_info_array[regions - 1].start_time = first_valid_time_in_page;
  469. }
  470. if (*pginfo_to_dt(curr) > max_interval)
  471. max_interval = *pginfo_to_dt(curr);
  472. region_info_array[regions - 1].update_every = *pginfo_to_dt(curr);
  473. }
  474. if (page_info_array)
  475. freez(page_info_array);
  476. if (region_info_array) {
  477. if (likely(is_first_region_initialized)) {
  478. /* free unnecessary memory */
  479. region_info_array = reallocz(region_info_array, sizeof(*region_info_array) * regions);
  480. *region_info_arrayp = region_info_array;
  481. *max_intervalp = max_interval;
  482. } else {
  483. /* empty result */
  484. freez(region_info_array);
  485. }
  486. }
  487. return regions;
  488. }
  489. /*
  490. * Gets a handle for loading metrics from the database.
  491. * The handle must be released with rrdeng_load_metric_final().
  492. */
  493. void rrdeng_load_metric_init(RRDDIM *rd, struct rrddim_query_handle *rrdimm_handle, time_t start_time, time_t end_time)
  494. {
  495. struct rrdeng_query_handle *handle;
  496. struct rrdengine_instance *ctx;
  497. unsigned pages_nr;
  498. ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
  499. rrdimm_handle->start_time = start_time;
  500. rrdimm_handle->end_time = end_time;
  501. handle = &rrdimm_handle->rrdeng;
  502. handle->next_page_time = start_time;
  503. handle->now = start_time;
  504. handle->position = 0;
  505. handle->ctx = ctx;
  506. handle->descr = NULL;
  507. pages_nr = pg_cache_preload(ctx, rd->state->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC,
  508. NULL, &handle->page_index);
  509. if (unlikely(NULL == handle->page_index || 0 == pages_nr))
  510. /* there are no metrics to load */
  511. handle->next_page_time = INVALID_TIME;
  512. }
  513. /* Returns the metric and sets its timestamp into current_time */
  514. storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle, time_t *current_time)
  515. {
  516. struct rrdeng_query_handle *handle;
  517. struct rrdengine_instance *ctx;
  518. struct rrdeng_page_descr *descr;
  519. storage_number *page, ret;
  520. unsigned position, entries;
  521. usec_t next_page_time = 0, current_position_time, page_end_time = 0;
  522. uint32_t page_length;
  523. handle = &rrdimm_handle->rrdeng;
  524. if (unlikely(INVALID_TIME == handle->next_page_time)) {
  525. return SN_EMPTY_SLOT;
  526. }
  527. ctx = handle->ctx;
  528. if (unlikely(NULL == (descr = handle->descr))) {
  529. /* it's the first call */
  530. next_page_time = handle->next_page_time * USEC_PER_SEC;
  531. } else {
  532. pg_cache_atomic_get_pg_info(descr, &page_end_time, &page_length);
  533. }
  534. position = handle->position + 1;
  535. if (unlikely(NULL == descr ||
  536. position >= (page_length / sizeof(storage_number)))) {
  537. /* We need to get a new page */
  538. if (descr) {
  539. /* Drop old page's reference */
  540. #ifdef NETDATA_INTERNAL_CHECKS
  541. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
  542. #endif
  543. pg_cache_put(ctx, descr);
  544. handle->descr = NULL;
  545. handle->next_page_time = (page_end_time / USEC_PER_SEC) + 1;
  546. if (unlikely(handle->next_page_time > rrdimm_handle->end_time)) {
  547. goto no_more_metrics;
  548. }
  549. next_page_time = handle->next_page_time * USEC_PER_SEC;
  550. }
  551. descr = pg_cache_lookup_next(ctx, handle->page_index, &handle->page_index->id,
  552. next_page_time, rrdimm_handle->end_time * USEC_PER_SEC);
  553. if (NULL == descr) {
  554. goto no_more_metrics;
  555. }
  556. #ifdef NETDATA_INTERNAL_CHECKS
  557. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, 1);
  558. #endif
  559. handle->descr = descr;
  560. pg_cache_atomic_get_pg_info(descr, &page_end_time, &page_length);
  561. if (unlikely(INVALID_TIME == descr->start_time ||
  562. INVALID_TIME == page_end_time)) {
  563. goto no_more_metrics;
  564. }
  565. if (unlikely(descr->start_time != page_end_time && next_page_time > descr->start_time)) {
  566. /* we're in the middle of the page somewhere */
  567. entries = page_length / sizeof(storage_number);
  568. position = ((uint64_t)(next_page_time - descr->start_time)) * (entries - 1) /
  569. (page_end_time - descr->start_time);
  570. } else {
  571. position = 0;
  572. }
  573. }
  574. page = descr->pg_cache_descr->page;
  575. ret = page[position];
  576. entries = page_length / sizeof(storage_number);
  577. if (entries > 1) {
  578. usec_t dt;
  579. dt = (page_end_time - descr->start_time) / (entries - 1);
  580. current_position_time = descr->start_time + position * dt;
  581. } else {
  582. current_position_time = descr->start_time;
  583. }
  584. handle->position = position;
  585. handle->now = current_position_time / USEC_PER_SEC;
  586. /* fatal_assert(handle->now >= rrdimm_handle->start_time && handle->now <= rrdimm_handle->end_time);
  587. The above assertion is an approximation and needs to take update_every into account */
  588. if (unlikely(handle->now >= rrdimm_handle->end_time)) {
  589. /* next calls will not load any more metrics */
  590. handle->next_page_time = INVALID_TIME;
  591. }
  592. *current_time = handle->now;
  593. return ret;
  594. no_more_metrics:
  595. handle->next_page_time = INVALID_TIME;
  596. return SN_EMPTY_SLOT;
  597. }
  598. int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle)
  599. {
  600. struct rrdeng_query_handle *handle;
  601. handle = &rrdimm_handle->rrdeng;
  602. return (INVALID_TIME == handle->next_page_time);
  603. }
  604. /*
  605. * Releases the database reference from the handle for loading metrics.
  606. */
  607. void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle)
  608. {
  609. struct rrdeng_query_handle *handle;
  610. struct rrdengine_instance *ctx;
  611. struct rrdeng_page_descr *descr;
  612. handle = &rrdimm_handle->rrdeng;
  613. ctx = handle->ctx;
  614. descr = handle->descr;
  615. if (descr) {
  616. #ifdef NETDATA_INTERNAL_CHECKS
  617. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
  618. #endif
  619. pg_cache_put(ctx, descr);
  620. }
  621. }
  622. time_t rrdeng_metric_latest_time(RRDDIM *rd)
  623. {
  624. struct pg_cache_page_index *page_index;
  625. page_index = rd->state->page_index;
  626. return page_index->latest_time / USEC_PER_SEC;
  627. }
  628. time_t rrdeng_metric_oldest_time(RRDDIM *rd)
  629. {
  630. struct pg_cache_page_index *page_index;
  631. page_index = rd->state->page_index;
  632. return page_index->oldest_time / USEC_PER_SEC;
  633. }
  634. /* Also gets a reference for the page */
  635. void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrdeng_page_descr **ret_descr)
  636. {
  637. struct rrdeng_page_descr *descr;
  638. struct page_cache_descr *pg_cache_descr;
  639. void *page;
  640. /* TODO: check maximum number of pages in page cache limit */
  641. descr = pg_cache_create_descr();
  642. descr->id = id; /* TODO: add page type: metric, log, something? */
  643. page = mallocz(RRDENG_BLOCK_SIZE); /*TODO: add page size */
  644. rrdeng_page_descr_mutex_lock(ctx, descr);
  645. pg_cache_descr = descr->pg_cache_descr;
  646. pg_cache_descr->page = page;
  647. pg_cache_descr->flags = RRD_PAGE_DIRTY /*| RRD_PAGE_LOCKED */ | RRD_PAGE_POPULATED /* | BEING_COLLECTED */;
  648. pg_cache_descr->refcnt = 1;
  649. debug(D_RRDENGINE, "Created new page:");
  650. if (unlikely(debug_flags & D_RRDENGINE))
  651. print_page_cache_descr(descr);
  652. rrdeng_page_descr_mutex_unlock(ctx, descr);
  653. *ret_descr = descr;
  654. return page;
  655. }
  656. /* The page must not be empty */
  657. void rrdeng_commit_page(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr,
  658. Word_t page_correlation_id)
  659. {
  660. struct page_cache *pg_cache = &ctx->pg_cache;
  661. Pvoid_t *PValue;
  662. unsigned nr_committed_pages;
  663. if (unlikely(NULL == descr)) {
  664. debug(D_RRDENGINE, "%s: page descriptor is NULL, page has already been force-committed.", __func__);
  665. return;
  666. }
  667. fatal_assert(descr->page_length);
  668. uv_rwlock_wrlock(&pg_cache->committed_page_index.lock);
  669. PValue = JudyLIns(&pg_cache->committed_page_index.JudyL_array, page_correlation_id, PJE0);
  670. *PValue = descr;
  671. nr_committed_pages = ++pg_cache->committed_page_index.nr_committed_pages;
  672. uv_rwlock_wrunlock(&pg_cache->committed_page_index.lock);
  673. if (nr_committed_pages >= pg_cache_hard_limit(ctx) / 2) {
  674. /* over 50% of pages have not been committed yet */
  675. if (ctx->drop_metrics_under_page_cache_pressure &&
  676. nr_committed_pages >= pg_cache_committed_hard_limit(ctx)) {
  677. /* 100% of pages are dirty */
  678. struct rrdeng_cmd cmd;
  679. cmd.opcode = RRDENG_INVALIDATE_OLDEST_MEMORY_PAGE;
  680. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  681. } else {
  682. if (0 == (unsigned long) ctx->stats.pg_cache_over_half_dirty_events) {
  683. /* only print the first time */
  684. errno = 0;
  685. error("Failed to flush dirty buffers quickly enough in dbengine instance \"%s\". "
  686. "Metric data at risk of not being stored in the database, "
  687. "please reduce disk load or use a faster disk.", ctx->dbfiles_path);
  688. }
  689. rrd_stat_atomic_add(&ctx->stats.pg_cache_over_half_dirty_events, 1);
  690. rrd_stat_atomic_add(&global_pg_cache_over_half_dirty_events, 1);
  691. }
  692. }
  693. pg_cache_put(ctx, descr);
  694. }
  695. /* Gets a reference for the page */
  696. void *rrdeng_get_latest_page(struct rrdengine_instance *ctx, uuid_t *id, void **handle)
  697. {
  698. struct rrdeng_page_descr *descr;
  699. struct page_cache_descr *pg_cache_descr;
  700. debug(D_RRDENGINE, "Reading existing page:");
  701. descr = pg_cache_lookup(ctx, NULL, id, INVALID_TIME);
  702. if (NULL == descr) {
  703. *handle = NULL;
  704. return NULL;
  705. }
  706. *handle = descr;
  707. pg_cache_descr = descr->pg_cache_descr;
  708. return pg_cache_descr->page;
  709. }
  710. /* Gets a reference for the page */
  711. void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time, void **handle)
  712. {
  713. struct rrdeng_page_descr *descr;
  714. struct page_cache_descr *pg_cache_descr;
  715. debug(D_RRDENGINE, "Reading existing page:");
  716. descr = pg_cache_lookup(ctx, NULL, id, point_in_time);
  717. if (NULL == descr) {
  718. *handle = NULL;
  719. return NULL;
  720. }
  721. *handle = descr;
  722. pg_cache_descr = descr->pg_cache_descr;
  723. return pg_cache_descr->page;
  724. }
  725. /*
  726. * Gathers Database Engine statistics.
  727. * Careful when modifying this function.
  728. * You must not change the indices of the statistics or user code will break.
  729. * You must not exceed RRDENG_NR_STATS or it will crash.
  730. */
  731. void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array)
  732. {
  733. if (ctx == NULL)
  734. return;
  735. struct page_cache *pg_cache = &ctx->pg_cache;
  736. array[0] = (uint64_t)ctx->stats.metric_API_producers;
  737. array[1] = (uint64_t)ctx->stats.metric_API_consumers;
  738. array[2] = (uint64_t)pg_cache->page_descriptors;
  739. array[3] = (uint64_t)pg_cache->populated_pages;
  740. array[4] = (uint64_t)pg_cache->committed_page_index.nr_committed_pages;
  741. array[5] = (uint64_t)ctx->stats.pg_cache_insertions;
  742. array[6] = (uint64_t)ctx->stats.pg_cache_deletions;
  743. array[7] = (uint64_t)ctx->stats.pg_cache_hits;
  744. array[8] = (uint64_t)ctx->stats.pg_cache_misses;
  745. array[9] = (uint64_t)ctx->stats.pg_cache_backfills;
  746. array[10] = (uint64_t)ctx->stats.pg_cache_evictions;
  747. array[11] = (uint64_t)ctx->stats.before_compress_bytes;
  748. array[12] = (uint64_t)ctx->stats.after_compress_bytes;
  749. array[13] = (uint64_t)ctx->stats.before_decompress_bytes;
  750. array[14] = (uint64_t)ctx->stats.after_decompress_bytes;
  751. array[15] = (uint64_t)ctx->stats.io_write_bytes;
  752. array[16] = (uint64_t)ctx->stats.io_write_requests;
  753. array[17] = (uint64_t)ctx->stats.io_read_bytes;
  754. array[18] = (uint64_t)ctx->stats.io_read_requests;
  755. array[19] = (uint64_t)ctx->stats.io_write_extent_bytes;
  756. array[20] = (uint64_t)ctx->stats.io_write_extents;
  757. array[21] = (uint64_t)ctx->stats.io_read_extent_bytes;
  758. array[22] = (uint64_t)ctx->stats.io_read_extents;
  759. array[23] = (uint64_t)ctx->stats.datafile_creations;
  760. array[24] = (uint64_t)ctx->stats.datafile_deletions;
  761. array[25] = (uint64_t)ctx->stats.journalfile_creations;
  762. array[26] = (uint64_t)ctx->stats.journalfile_deletions;
  763. array[27] = (uint64_t)ctx->stats.page_cache_descriptors;
  764. array[28] = (uint64_t)ctx->stats.io_errors;
  765. array[29] = (uint64_t)ctx->stats.fs_errors;
  766. array[30] = (uint64_t)global_io_errors;
  767. array[31] = (uint64_t)global_fs_errors;
  768. array[32] = (uint64_t)rrdeng_reserved_file_descriptors;
  769. array[33] = (uint64_t)ctx->stats.pg_cache_over_half_dirty_events;
  770. array[34] = (uint64_t)global_pg_cache_over_half_dirty_events;
  771. array[35] = (uint64_t)ctx->stats.flushing_pressure_page_deletions;
  772. array[36] = (uint64_t)global_flushing_pressure_page_deletions;
  773. fatal_assert(RRDENG_NR_STATS == 37);
  774. }
  775. /* Releases reference to page */
  776. void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle)
  777. {
  778. (void)ctx;
  779. pg_cache_put(ctx, (struct rrdeng_page_descr *)handle);
  780. }
  781. /*
  782. * Returns 0 on success, negative on error
  783. */
  784. int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb,
  785. unsigned disk_space_mb)
  786. {
  787. struct rrdengine_instance *ctx;
  788. int error;
  789. uint32_t max_open_files;
  790. max_open_files = rlimit_nofile.rlim_cur / 4;
  791. /* reserve RRDENG_FD_BUDGET_PER_INSTANCE file descriptors for this instance */
  792. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, RRDENG_FD_BUDGET_PER_INSTANCE);
  793. if (rrdeng_reserved_file_descriptors > max_open_files) {
  794. error(
  795. "Exceeded the budget of available file descriptors (%u/%u), cannot create new dbengine instance.",
  796. (unsigned)rrdeng_reserved_file_descriptors, (unsigned)max_open_files);
  797. rrd_stat_atomic_add(&global_fs_errors, 1);
  798. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  799. return UV_EMFILE;
  800. }
  801. if (NULL == ctxp) {
  802. ctx = &multidb_ctx;
  803. memset(ctx, 0, sizeof(*ctx));
  804. } else {
  805. *ctxp = ctx = callocz(1, sizeof(*ctx));
  806. }
  807. ctx->global_compress_alg = RRD_LZ4;
  808. if (page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB)
  809. page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
  810. ctx->max_cache_pages = page_cache_mb * (1048576LU / RRDENG_BLOCK_SIZE);
  811. /* try to keep 5% of the page cache free */
  812. ctx->cache_pages_low_watermark = (ctx->max_cache_pages * 95LLU) / 100;
  813. if (disk_space_mb < RRDENG_MIN_DISK_SPACE_MB)
  814. disk_space_mb = RRDENG_MIN_DISK_SPACE_MB;
  815. ctx->max_disk_space = disk_space_mb * 1048576LLU;
  816. strncpyz(ctx->dbfiles_path, dbfiles_path, sizeof(ctx->dbfiles_path) - 1);
  817. ctx->dbfiles_path[sizeof(ctx->dbfiles_path) - 1] = '\0';
  818. if (NULL == host)
  819. strncpyz(ctx->machine_guid, registry_get_this_machine_guid(), GUID_LEN);
  820. else
  821. strncpyz(ctx->machine_guid, host->machine_guid, GUID_LEN);
  822. ctx->drop_metrics_under_page_cache_pressure = rrdeng_drop_metrics_under_page_cache_pressure;
  823. ctx->metric_API_max_producers = 0;
  824. ctx->quiesce = NO_QUIESCE;
  825. ctx->metalog_ctx = NULL; /* only set this after the metadata log has finished initializing */
  826. ctx->host = host;
  827. memset(&ctx->worker_config, 0, sizeof(ctx->worker_config));
  828. ctx->worker_config.ctx = ctx;
  829. init_page_cache(ctx);
  830. init_commit_log(ctx);
  831. error = init_rrd_files(ctx);
  832. if (error) {
  833. goto error_after_init_rrd_files;
  834. }
  835. init_completion(&ctx->rrdengine_completion);
  836. fatal_assert(0 == uv_thread_create(&ctx->worker_config.thread, rrdeng_worker, &ctx->worker_config));
  837. /* wait for worker thread to initialize */
  838. wait_for_completion(&ctx->rrdengine_completion);
  839. destroy_completion(&ctx->rrdengine_completion);
  840. uv_thread_set_name_np(ctx->worker_config.thread, "DBENGINE");
  841. if (ctx->worker_config.error) {
  842. goto error_after_rrdeng_worker;
  843. }
  844. error = metalog_init(ctx);
  845. if (error) {
  846. error("Failed to initialize metadata log file event loop.");
  847. goto error_after_rrdeng_worker;
  848. }
  849. return 0;
  850. error_after_rrdeng_worker:
  851. finalize_rrd_files(ctx);
  852. error_after_init_rrd_files:
  853. free_page_cache(ctx);
  854. if (ctx != &multidb_ctx) {
  855. freez(ctx);
  856. *ctxp = NULL;
  857. }
  858. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  859. return UV_EIO;
  860. }
  861. /*
  862. * Returns 0 on success, 1 on error
  863. */
  864. int rrdeng_exit(struct rrdengine_instance *ctx)
  865. {
  866. struct rrdeng_cmd cmd;
  867. if (NULL == ctx) {
  868. return 1;
  869. }
  870. /* TODO: add page to page cache */
  871. cmd.opcode = RRDENG_SHUTDOWN;
  872. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  873. fatal_assert(0 == uv_thread_join(&ctx->worker_config.thread));
  874. finalize_rrd_files(ctx);
  875. metalog_exit(ctx->metalog_ctx);
  876. free_page_cache(ctx);
  877. if (ctx != &multidb_ctx) {
  878. freez(ctx);
  879. }
  880. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  881. return 0;
  882. }
  883. void rrdeng_prepare_exit(struct rrdengine_instance *ctx)
  884. {
  885. struct rrdeng_cmd cmd;
  886. if (NULL == ctx) {
  887. return;
  888. }
  889. init_completion(&ctx->rrdengine_completion);
  890. cmd.opcode = RRDENG_QUIESCE;
  891. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  892. /* wait for dbengine to quiesce */
  893. wait_for_completion(&ctx->rrdengine_completion);
  894. destroy_completion(&ctx->rrdengine_completion);
  895. metalog_prepare_exit(ctx->metalog_ctx);
  896. }