rrdengineapi.c 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrdengine.h"
  3. /* Default global database instance */
  4. struct rrdengine_instance multidb_ctx;
  5. int default_rrdeng_page_cache_mb = 32;
  6. int default_rrdeng_disk_quota_mb = 256;
  7. int default_multidb_disk_quota_mb = 256;
  8. /* Default behaviour is to unblock data collection if the page cache is full of dirty pages by dropping metrics */
  9. uint8_t rrdeng_drop_metrics_under_page_cache_pressure = 1;
  10. static inline struct rrdengine_instance *get_rrdeng_ctx_from_host(RRDHOST *host)
  11. {
  12. return host->rrdeng_ctx;
  13. }
  14. /* This UUID is not unique across hosts */
  15. void rrdeng_generate_legacy_uuid(const char *dim_id, char *chart_id, uuid_t *ret_uuid)
  16. {
  17. EVP_MD_CTX *evpctx;
  18. unsigned char hash_value[EVP_MAX_MD_SIZE];
  19. unsigned int hash_len;
  20. evpctx = EVP_MD_CTX_create();
  21. EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
  22. EVP_DigestUpdate(evpctx, dim_id, strlen(dim_id));
  23. EVP_DigestUpdate(evpctx, chart_id, strlen(chart_id));
  24. EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
  25. EVP_MD_CTX_destroy(evpctx);
  26. fatal_assert(hash_len > sizeof(uuid_t));
  27. memcpy(ret_uuid, hash_value, sizeof(uuid_t));
  28. }
  29. /* Transform legacy UUID to be unique across hosts deterministically */
  30. void rrdeng_convert_legacy_uuid_to_multihost(char machine_guid[GUID_LEN + 1], uuid_t *legacy_uuid, uuid_t *ret_uuid)
  31. {
  32. EVP_MD_CTX *evpctx;
  33. unsigned char hash_value[EVP_MAX_MD_SIZE];
  34. unsigned int hash_len;
  35. evpctx = EVP_MD_CTX_create();
  36. EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
  37. EVP_DigestUpdate(evpctx, machine_guid, GUID_LEN);
  38. EVP_DigestUpdate(evpctx, *legacy_uuid, sizeof(uuid_t));
  39. EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
  40. EVP_MD_CTX_destroy(evpctx);
  41. fatal_assert(hash_len > sizeof(uuid_t));
  42. memcpy(ret_uuid, hash_value, sizeof(uuid_t));
  43. }
  44. void rrdeng_metric_init(RRDDIM *rd)
  45. {
  46. struct page_cache *pg_cache;
  47. struct rrdengine_instance *ctx;
  48. uuid_t legacy_uuid;
  49. uuid_t multihost_legacy_uuid;
  50. Pvoid_t *PValue;
  51. struct pg_cache_page_index *page_index = NULL;
  52. int is_multihost_child = 0;
  53. RRDHOST *host = rd->rrdset->rrdhost;
  54. ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
  55. if (unlikely(!ctx)) {
  56. error("Failed to fetch multidb context");
  57. return;
  58. }
  59. pg_cache = &ctx->pg_cache;
  60. rrdeng_generate_legacy_uuid(rd->id, rd->rrdset->id, &legacy_uuid);
  61. if (host != localhost && host->rrdeng_ctx == &multidb_ctx)
  62. is_multihost_child = 1;
  63. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  64. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, &legacy_uuid, sizeof(uuid_t));
  65. if (likely(NULL != PValue)) {
  66. page_index = *PValue;
  67. }
  68. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  69. if (is_multihost_child || NULL == PValue) {
  70. /* First time we see the legacy UUID or metric belongs to child host in multi-host DB.
  71. * Drop legacy support, normal path */
  72. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  73. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, &rd->state->metric_uuid, sizeof(uuid_t));
  74. if (likely(NULL != PValue)) {
  75. page_index = *PValue;
  76. }
  77. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  78. if (NULL == PValue) {
  79. uv_rwlock_wrlock(&pg_cache->metrics_index.lock);
  80. PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, &rd->state->metric_uuid, sizeof(uuid_t), PJE0);
  81. fatal_assert(NULL == *PValue); /* TODO: figure out concurrency model */
  82. *PValue = page_index = create_page_index(&rd->state->metric_uuid);
  83. page_index->prev = pg_cache->metrics_index.last_page_index;
  84. pg_cache->metrics_index.last_page_index = page_index;
  85. uv_rwlock_wrunlock(&pg_cache->metrics_index.lock);
  86. }
  87. } else {
  88. /* There are legacy UUIDs in the database, implement backward compatibility */
  89. rrdeng_convert_legacy_uuid_to_multihost(rd->rrdset->rrdhost->machine_guid, &legacy_uuid,
  90. &multihost_legacy_uuid);
  91. int need_to_store = uuid_compare(rd->state->metric_uuid, multihost_legacy_uuid);
  92. uuid_copy(rd->state->metric_uuid, multihost_legacy_uuid);
  93. if (unlikely(need_to_store))
  94. (void)sql_store_dimension(&rd->state->metric_uuid, rd->rrdset->chart_uuid, rd->id, rd->name, rd->multiplier, rd->divisor,
  95. rd->algorithm);
  96. }
  97. rd->state->rrdeng_uuid = &page_index->id;
  98. rd->state->page_index = page_index;
  99. }
  100. /*
  101. * Gets a handle for storing metrics to the database.
  102. * The handle must be released with rrdeng_store_metric_final().
  103. */
  104. void rrdeng_store_metric_init(RRDDIM *rd)
  105. {
  106. struct rrdeng_collect_handle *handle;
  107. struct rrdengine_instance *ctx;
  108. struct pg_cache_page_index *page_index;
  109. ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
  110. handle = &rd->state->handle.rrdeng;
  111. handle->ctx = ctx;
  112. handle->descr = NULL;
  113. handle->prev_descr = NULL;
  114. handle->unaligned_page = 0;
  115. page_index = rd->state->page_index;
  116. uv_rwlock_wrlock(&page_index->lock);
  117. ++page_index->writers;
  118. uv_rwlock_wrunlock(&page_index->lock);
  119. }
  120. /* The page must be populated and referenced */
  121. static int page_has_only_empty_metrics(struct rrdeng_page_descr *descr)
  122. {
  123. unsigned i;
  124. uint8_t has_only_empty_metrics = 1;
  125. storage_number *page;
  126. page = descr->pg_cache_descr->page;
  127. for (i = 0 ; i < descr->page_length / sizeof(storage_number); ++i) {
  128. if (SN_EMPTY_SLOT != page[i]) {
  129. has_only_empty_metrics = 0;
  130. break;
  131. }
  132. }
  133. return has_only_empty_metrics;
  134. }
  135. void rrdeng_store_metric_flush_current_page(RRDDIM *rd)
  136. {
  137. struct rrdeng_collect_handle *handle;
  138. struct rrdengine_instance *ctx;
  139. struct rrdeng_page_descr *descr;
  140. handle = &rd->state->handle.rrdeng;
  141. ctx = handle->ctx;
  142. if (unlikely(!ctx))
  143. return;
  144. descr = handle->descr;
  145. if (unlikely(NULL == descr)) {
  146. return;
  147. }
  148. if (likely(descr->page_length)) {
  149. int page_is_empty;
  150. rrd_stat_atomic_add(&ctx->stats.metric_API_producers, -1);
  151. if (handle->prev_descr) {
  152. /* unpin old second page */
  153. pg_cache_put(ctx, handle->prev_descr);
  154. }
  155. page_is_empty = page_has_only_empty_metrics(descr);
  156. if (page_is_empty) {
  157. debug(D_RRDENGINE, "Page has empty metrics only, deleting:");
  158. if (unlikely(debug_flags & D_RRDENGINE))
  159. print_page_cache_descr(descr);
  160. pg_cache_put(ctx, descr);
  161. pg_cache_punch_hole(ctx, descr, 1, 0, NULL);
  162. handle->prev_descr = NULL;
  163. } else {
  164. /*
  165. * Disable pinning for now as it leads to deadlocks. When a collector stops collecting the extra pinned page
  166. * eventually gets rotated but it cannot be destroyed due to the extra reference.
  167. */
  168. /* added 1 extra reference to keep 2 dirty pages pinned per metric, expected refcnt = 2 */
  169. /* rrdeng_page_descr_mutex_lock(ctx, descr);
  170. ret = pg_cache_try_get_unsafe(descr, 0);
  171. rrdeng_page_descr_mutex_unlock(ctx, descr);
  172. fatal_assert(1 == ret);*/
  173. rrdeng_commit_page(ctx, descr, handle->page_correlation_id);
  174. /* handle->prev_descr = descr;*/
  175. }
  176. } else {
  177. freez(descr->pg_cache_descr->page);
  178. rrdeng_destroy_pg_cache_descr(ctx, descr->pg_cache_descr);
  179. freez(descr);
  180. }
  181. handle->descr = NULL;
  182. }
  183. void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, storage_number number)
  184. {
  185. struct rrdeng_collect_handle *handle;
  186. struct rrdengine_instance *ctx;
  187. struct page_cache *pg_cache;
  188. struct rrdeng_page_descr *descr;
  189. storage_number *page;
  190. uint8_t must_flush_unaligned_page = 0, perfect_page_alignment = 0;
  191. handle = &rd->state->handle.rrdeng;
  192. ctx = handle->ctx;
  193. pg_cache = &ctx->pg_cache;
  194. descr = handle->descr;
  195. if (descr) {
  196. /* Make alignment decisions */
  197. if (descr->page_length == rd->rrdset->rrddim_page_alignment) {
  198. /* this is the leading dimension that defines chart alignment */
  199. perfect_page_alignment = 1;
  200. }
  201. /* is the metric far enough out of alignment with the others? */
  202. if (unlikely(descr->page_length + sizeof(number) < rd->rrdset->rrddim_page_alignment)) {
  203. handle->unaligned_page = 1;
  204. debug(D_RRDENGINE, "Metric page is not aligned with chart:");
  205. if (unlikely(debug_flags & D_RRDENGINE))
  206. print_page_cache_descr(descr);
  207. }
  208. if (unlikely(handle->unaligned_page &&
  209. /* did the other metrics change page? */
  210. rd->rrdset->rrddim_page_alignment <= sizeof(number))) {
  211. debug(D_RRDENGINE, "Flushing unaligned metric page.");
  212. must_flush_unaligned_page = 1;
  213. handle->unaligned_page = 0;
  214. }
  215. }
  216. if (unlikely(NULL == descr ||
  217. descr->page_length + sizeof(number) > RRDENG_BLOCK_SIZE ||
  218. must_flush_unaligned_page)) {
  219. rrdeng_store_metric_flush_current_page(rd);
  220. page = rrdeng_create_page(ctx, &rd->state->page_index->id, &descr);
  221. fatal_assert(page);
  222. handle->descr = descr;
  223. handle->page_correlation_id = rrd_atomic_fetch_add(&pg_cache->committed_page_index.latest_corr_id, 1);
  224. if (0 == rd->rrdset->rrddim_page_alignment) {
  225. /* this is the leading dimension that defines chart alignment */
  226. perfect_page_alignment = 1;
  227. }
  228. }
  229. page = descr->pg_cache_descr->page;
  230. page[descr->page_length / sizeof(number)] = number;
  231. pg_cache_atomic_set_pg_info(descr, point_in_time, descr->page_length + sizeof(number));
  232. if (perfect_page_alignment)
  233. rd->rrdset->rrddim_page_alignment = descr->page_length;
  234. if (unlikely(INVALID_TIME == descr->start_time)) {
  235. unsigned long new_metric_API_producers, old_metric_API_max_producers, ret_metric_API_max_producers;
  236. descr->start_time = point_in_time;
  237. new_metric_API_producers = rrd_atomic_add_fetch(&ctx->stats.metric_API_producers, 1);
  238. while (unlikely(new_metric_API_producers > (old_metric_API_max_producers = ctx->metric_API_max_producers))) {
  239. /* Increase ctx->metric_API_max_producers */
  240. ret_metric_API_max_producers = ulong_compare_and_swap(&ctx->metric_API_max_producers,
  241. old_metric_API_max_producers,
  242. new_metric_API_producers);
  243. if (old_metric_API_max_producers == ret_metric_API_max_producers) {
  244. /* success */
  245. break;
  246. }
  247. }
  248. pg_cache_insert(ctx, rd->state->page_index, descr);
  249. } else {
  250. pg_cache_add_new_metric_time(rd->state->page_index, descr);
  251. }
  252. }
  253. /*
  254. * Releases the database reference from the handle for storing metrics.
  255. * Returns 1 if it's safe to delete the dimension.
  256. */
  257. int rrdeng_store_metric_finalize(RRDDIM *rd)
  258. {
  259. struct rrdeng_collect_handle *handle;
  260. struct rrdengine_instance *ctx;
  261. struct pg_cache_page_index *page_index;
  262. uint8_t can_delete_metric = 0;
  263. handle = &rd->state->handle.rrdeng;
  264. ctx = handle->ctx;
  265. page_index = rd->state->page_index;
  266. rrdeng_store_metric_flush_current_page(rd);
  267. if (handle->prev_descr) {
  268. /* unpin old second page */
  269. pg_cache_put(ctx, handle->prev_descr);
  270. }
  271. uv_rwlock_wrlock(&page_index->lock);
  272. if (!--page_index->writers && !page_index->page_count) {
  273. can_delete_metric = 1;
  274. }
  275. uv_rwlock_wrunlock(&page_index->lock);
  276. return can_delete_metric;
  277. }
  278. /* Returns 1 if the data collection interval is well defined, 0 otherwise */
  279. static int metrics_with_known_interval(struct rrdeng_page_descr *descr)
  280. {
  281. unsigned page_entries;
  282. if (unlikely(INVALID_TIME == descr->start_time || INVALID_TIME == descr->end_time))
  283. return 0;
  284. page_entries = descr->page_length / sizeof(storage_number);
  285. if (likely(page_entries > 1)) {
  286. return 1;
  287. }
  288. return 0;
  289. }
  290. static inline uint32_t *pginfo_to_dt(struct rrdeng_page_info *page_info)
  291. {
  292. return (uint32_t *)&page_info->scratch[0];
  293. }
  294. static inline uint32_t *pginfo_to_points(struct rrdeng_page_info *page_info)
  295. {
  296. return (uint32_t *)&page_info->scratch[sizeof(uint32_t)];
  297. }
  298. /**
  299. * Calculates the regions of different data collection intervals in a netdata chart in the time range
  300. * [start_time,end_time]. This call takes the netdata chart read lock.
  301. * @param st the netdata chart whose data collection interval boundaries are calculated.
  302. * @param start_time inclusive starting time in usec
  303. * @param end_time inclusive ending time in usec
  304. * @param region_info_arrayp It allocates (*region_info_arrayp) and populates it with information of regions of a
  305. * reference dimension that that have different data collection intervals and overlap with the time range
  306. * [start_time,end_time]. The caller must free (*region_info_arrayp) with freez(). If region_info_arrayp is set
  307. * to NULL nothing was allocated.
  308. * @param max_intervalp is dereferenced and set to be the largest data collection interval of all regions.
  309. * @return number of regions with different data collection intervals.
  310. */
  311. unsigned rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t end_time,
  312. struct rrdeng_region_info **region_info_arrayp, unsigned *max_intervalp, struct context_param *context_param_list)
  313. {
  314. struct pg_cache_page_index *page_index;
  315. struct rrdengine_instance *ctx;
  316. unsigned pages_nr;
  317. RRDDIM *rd_iter, *rd;
  318. struct rrdeng_page_info *page_info_array, *curr, *prev, *old_prev;
  319. unsigned i, j, page_entries, region_points, page_points, regions, max_interval;
  320. time_t now;
  321. usec_t dt, current_position_time, max_time = 0, min_time, curr_time, first_valid_time_in_page;
  322. struct rrdeng_region_info *region_info_array;
  323. uint8_t is_first_region_initialized;
  324. ctx = get_rrdeng_ctx_from_host(st->rrdhost);
  325. regions = 1;
  326. *max_intervalp = max_interval = 0;
  327. region_info_array = NULL;
  328. *region_info_arrayp = NULL;
  329. page_info_array = NULL;
  330. RRDDIM *temp_rd = context_param_list ? context_param_list->rd : NULL;
  331. rrdset_rdlock(st);
  332. for(rd_iter = temp_rd?temp_rd:st->dimensions, rd = NULL, min_time = (usec_t)-1 ; rd_iter ; rd_iter = rd_iter->next) {
  333. /*
  334. * Choose oldest dimension as reference. This is not equivalent to the union of all dimensions
  335. * but it is a best effort approximation with a bias towards older metrics in a chart. It
  336. * matches netdata behaviour in the sense that dimensions are generally aligned in a chart
  337. * and older dimensions contain more information about the time range. It does not work well
  338. * for metrics that have recently stopped being collected.
  339. */
  340. curr_time = pg_cache_oldest_time_in_range(ctx, rd_iter->state->rrdeng_uuid,
  341. start_time * USEC_PER_SEC, end_time * USEC_PER_SEC);
  342. if (INVALID_TIME != curr_time && curr_time < min_time) {
  343. rd = rd_iter;
  344. min_time = curr_time;
  345. }
  346. }
  347. rrdset_unlock(st);
  348. if (NULL == rd) {
  349. return 1;
  350. }
  351. pages_nr = pg_cache_preload(ctx, rd->state->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC,
  352. &page_info_array, &page_index);
  353. if (pages_nr) {
  354. /* conservative allocation, will reduce the size later if necessary */
  355. region_info_array = mallocz(sizeof(*region_info_array) * pages_nr);
  356. }
  357. is_first_region_initialized = 0;
  358. region_points = 0;
  359. /* pages loop */
  360. for (i = 0, curr = NULL, prev = NULL ; i < pages_nr ; ++i) {
  361. old_prev = prev;
  362. prev = curr;
  363. curr = &page_info_array[i];
  364. *pginfo_to_points(curr) = 0; /* initialize to invalid page */
  365. *pginfo_to_dt(curr) = 0; /* no known data collection interval yet */
  366. if (unlikely(INVALID_TIME == curr->start_time || INVALID_TIME == curr->end_time ||
  367. curr->end_time < curr->start_time)) {
  368. info("Ignoring page with invalid timestamps.");
  369. prev = old_prev;
  370. continue;
  371. }
  372. page_entries = curr->page_length / sizeof(storage_number);
  373. fatal_assert(0 != page_entries);
  374. if (likely(1 != page_entries)) {
  375. dt = (curr->end_time - curr->start_time) / (page_entries - 1);
  376. *pginfo_to_dt(curr) = ROUND_USEC_TO_SEC(dt);
  377. if (unlikely(0 == *pginfo_to_dt(curr)))
  378. *pginfo_to_dt(curr) = 1;
  379. } else {
  380. dt = 0;
  381. }
  382. for (j = 0, page_points = 0 ; j < page_entries ; ++j) {
  383. uint8_t is_metric_out_of_order, is_metric_earlier_than_range;
  384. is_metric_earlier_than_range = 0;
  385. is_metric_out_of_order = 0;
  386. current_position_time = curr->start_time + j * dt;
  387. now = current_position_time / USEC_PER_SEC;
  388. if (now > end_time) { /* there will be no more pages in the time range */
  389. break;
  390. }
  391. if (now < start_time)
  392. is_metric_earlier_than_range = 1;
  393. if (unlikely(current_position_time < max_time)) /* just went back in time */
  394. is_metric_out_of_order = 1;
  395. if (is_metric_earlier_than_range || unlikely(is_metric_out_of_order)) {
  396. if (unlikely(is_metric_out_of_order))
  397. info("Ignoring metric with out of order timestamp.");
  398. continue; /* next entry */
  399. }
  400. /* here is a valid metric */
  401. ++page_points;
  402. region_info_array[regions - 1].points = ++region_points;
  403. max_time = current_position_time;
  404. if (1 == page_points)
  405. first_valid_time_in_page = current_position_time;
  406. if (unlikely(!is_first_region_initialized)) {
  407. fatal_assert(1 == regions);
  408. /* this is the first region */
  409. region_info_array[0].start_time = current_position_time;
  410. is_first_region_initialized = 1;
  411. }
  412. }
  413. *pginfo_to_points(curr) = page_points;
  414. if (0 == page_points) {
  415. prev = old_prev;
  416. continue;
  417. }
  418. if (unlikely(0 == *pginfo_to_dt(curr))) { /* unknown data collection interval */
  419. fatal_assert(1 == page_points);
  420. if (likely(NULL != prev)) { /* get interval from previous page */
  421. *pginfo_to_dt(curr) = *pginfo_to_dt(prev);
  422. } else { /* there is no previous page in the query */
  423. struct rrdeng_page_info db_page_info;
  424. /* go to database */
  425. pg_cache_get_filtered_info_prev(ctx, page_index, curr->start_time,
  426. metrics_with_known_interval, &db_page_info);
  427. if (unlikely(db_page_info.start_time == INVALID_TIME || db_page_info.end_time == INVALID_TIME ||
  428. 0 == db_page_info.page_length)) { /* nothing in the database, default to update_every */
  429. *pginfo_to_dt(curr) = rd->update_every;
  430. } else {
  431. unsigned db_entries;
  432. usec_t db_dt;
  433. db_entries = db_page_info.page_length / sizeof(storage_number);
  434. db_dt = (db_page_info.end_time - db_page_info.start_time) / (db_entries - 1);
  435. *pginfo_to_dt(curr) = ROUND_USEC_TO_SEC(db_dt);
  436. if (unlikely(0 == *pginfo_to_dt(curr)))
  437. *pginfo_to_dt(curr) = 1;
  438. }
  439. }
  440. }
  441. if (likely(prev) && unlikely(*pginfo_to_dt(curr) != *pginfo_to_dt(prev))) {
  442. info("Data collection interval change detected in query: %"PRIu32" -> %"PRIu32,
  443. *pginfo_to_dt(prev), *pginfo_to_dt(curr));
  444. region_info_array[regions++ - 1].points -= page_points;
  445. region_info_array[regions - 1].points = region_points = page_points;
  446. region_info_array[regions - 1].start_time = first_valid_time_in_page;
  447. }
  448. if (*pginfo_to_dt(curr) > max_interval)
  449. max_interval = *pginfo_to_dt(curr);
  450. region_info_array[regions - 1].update_every = *pginfo_to_dt(curr);
  451. }
  452. if (page_info_array)
  453. freez(page_info_array);
  454. if (region_info_array) {
  455. if (likely(is_first_region_initialized)) {
  456. /* free unnecessary memory */
  457. region_info_array = reallocz(region_info_array, sizeof(*region_info_array) * regions);
  458. *region_info_arrayp = region_info_array;
  459. *max_intervalp = max_interval;
  460. } else {
  461. /* empty result */
  462. freez(region_info_array);
  463. }
  464. }
  465. return regions;
  466. }
  467. /*
  468. * Gets a handle for loading metrics from the database.
  469. * The handle must be released with rrdeng_load_metric_final().
  470. */
  471. void rrdeng_load_metric_init(RRDDIM *rd, struct rrddim_query_handle *rrdimm_handle, time_t start_time, time_t end_time)
  472. {
  473. struct rrdeng_query_handle *handle;
  474. struct rrdengine_instance *ctx;
  475. unsigned pages_nr;
  476. ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
  477. rrdimm_handle->start_time = start_time;
  478. rrdimm_handle->end_time = end_time;
  479. handle = &rrdimm_handle->rrdeng;
  480. handle->next_page_time = start_time;
  481. handle->now = start_time;
  482. handle->position = 0;
  483. handle->ctx = ctx;
  484. handle->descr = NULL;
  485. pages_nr = pg_cache_preload(ctx, rd->state->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC,
  486. NULL, &handle->page_index);
  487. if (unlikely(NULL == handle->page_index || 0 == pages_nr))
  488. /* there are no metrics to load */
  489. handle->next_page_time = INVALID_TIME;
  490. }
  491. /* Returns the metric and sets its timestamp into current_time */
  492. storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle, time_t *current_time)
  493. {
  494. struct rrdeng_query_handle *handle;
  495. struct rrdengine_instance *ctx;
  496. struct rrdeng_page_descr *descr;
  497. storage_number *page, ret;
  498. unsigned position, entries;
  499. usec_t next_page_time = 0, current_position_time, page_end_time = 0;
  500. uint32_t page_length;
  501. handle = &rrdimm_handle->rrdeng;
  502. if (unlikely(INVALID_TIME == handle->next_page_time)) {
  503. return SN_EMPTY_SLOT;
  504. }
  505. ctx = handle->ctx;
  506. if (unlikely(NULL == (descr = handle->descr))) {
  507. /* it's the first call */
  508. next_page_time = handle->next_page_time * USEC_PER_SEC;
  509. } else {
  510. pg_cache_atomic_get_pg_info(descr, &page_end_time, &page_length);
  511. }
  512. position = handle->position + 1;
  513. if (unlikely(NULL == descr ||
  514. position >= (page_length / sizeof(storage_number)))) {
  515. /* We need to get a new page */
  516. if (descr) {
  517. /* Drop old page's reference */
  518. #ifdef NETDATA_INTERNAL_CHECKS
  519. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
  520. #endif
  521. pg_cache_put(ctx, descr);
  522. handle->descr = NULL;
  523. handle->next_page_time = (page_end_time / USEC_PER_SEC) + 1;
  524. if (unlikely(handle->next_page_time > rrdimm_handle->end_time)) {
  525. goto no_more_metrics;
  526. }
  527. next_page_time = handle->next_page_time * USEC_PER_SEC;
  528. }
  529. descr = pg_cache_lookup_next(ctx, handle->page_index, &handle->page_index->id,
  530. next_page_time, rrdimm_handle->end_time * USEC_PER_SEC);
  531. if (NULL == descr) {
  532. goto no_more_metrics;
  533. }
  534. #ifdef NETDATA_INTERNAL_CHECKS
  535. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, 1);
  536. #endif
  537. handle->descr = descr;
  538. pg_cache_atomic_get_pg_info(descr, &page_end_time, &page_length);
  539. if (unlikely(INVALID_TIME == descr->start_time ||
  540. INVALID_TIME == page_end_time)) {
  541. goto no_more_metrics;
  542. }
  543. if (unlikely(descr->start_time != page_end_time && next_page_time > descr->start_time)) {
  544. /* we're in the middle of the page somewhere */
  545. entries = page_length / sizeof(storage_number);
  546. position = ((uint64_t)(next_page_time - descr->start_time)) * (entries - 1) /
  547. (page_end_time - descr->start_time);
  548. } else {
  549. position = 0;
  550. }
  551. }
  552. page = descr->pg_cache_descr->page;
  553. ret = page[position];
  554. entries = page_length / sizeof(storage_number);
  555. if (entries > 1) {
  556. usec_t dt;
  557. dt = (page_end_time - descr->start_time) / (entries - 1);
  558. current_position_time = descr->start_time + position * dt;
  559. } else {
  560. current_position_time = descr->start_time;
  561. }
  562. handle->position = position;
  563. handle->now = current_position_time / USEC_PER_SEC;
  564. /* fatal_assert(handle->now >= rrdimm_handle->start_time && handle->now <= rrdimm_handle->end_time);
  565. The above assertion is an approximation and needs to take update_every into account */
  566. if (unlikely(handle->now >= rrdimm_handle->end_time)) {
  567. /* next calls will not load any more metrics */
  568. handle->next_page_time = INVALID_TIME;
  569. }
  570. *current_time = handle->now;
  571. return ret;
  572. no_more_metrics:
  573. handle->next_page_time = INVALID_TIME;
  574. return SN_EMPTY_SLOT;
  575. }
  576. int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle)
  577. {
  578. struct rrdeng_query_handle *handle;
  579. handle = &rrdimm_handle->rrdeng;
  580. return (INVALID_TIME == handle->next_page_time);
  581. }
  582. /*
  583. * Releases the database reference from the handle for loading metrics.
  584. */
  585. void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle)
  586. {
  587. struct rrdeng_query_handle *handle;
  588. struct rrdengine_instance *ctx;
  589. struct rrdeng_page_descr *descr;
  590. handle = &rrdimm_handle->rrdeng;
  591. ctx = handle->ctx;
  592. descr = handle->descr;
  593. if (descr) {
  594. #ifdef NETDATA_INTERNAL_CHECKS
  595. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
  596. #endif
  597. pg_cache_put(ctx, descr);
  598. }
  599. }
  600. time_t rrdeng_metric_latest_time(RRDDIM *rd)
  601. {
  602. struct pg_cache_page_index *page_index;
  603. page_index = rd->state->page_index;
  604. return page_index->latest_time / USEC_PER_SEC;
  605. }
  606. time_t rrdeng_metric_oldest_time(RRDDIM *rd)
  607. {
  608. struct pg_cache_page_index *page_index;
  609. page_index = rd->state->page_index;
  610. return page_index->oldest_time / USEC_PER_SEC;
  611. }
  612. int rrdeng_metric_latest_time_by_uuid(uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t)
  613. {
  614. struct page_cache *pg_cache;
  615. struct rrdengine_instance *ctx;
  616. Pvoid_t *PValue;
  617. struct pg_cache_page_index *page_index = NULL;
  618. ctx = get_rrdeng_ctx_from_host(localhost);
  619. if (unlikely(!ctx)) {
  620. error("Failed to fetch multidb context");
  621. return 1;
  622. }
  623. pg_cache = &ctx->pg_cache;
  624. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  625. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, dim_uuid, sizeof(uuid_t));
  626. if (likely(NULL != PValue)) {
  627. page_index = *PValue;
  628. }
  629. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  630. if (likely(page_index)) {
  631. *first_entry_t = page_index->oldest_time / USEC_PER_SEC;
  632. *last_entry_t = page_index->latest_time / USEC_PER_SEC;
  633. return 0;
  634. }
  635. return 1;
  636. }
  637. /* Also gets a reference for the page */
  638. void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrdeng_page_descr **ret_descr)
  639. {
  640. struct rrdeng_page_descr *descr;
  641. struct page_cache_descr *pg_cache_descr;
  642. void *page;
  643. /* TODO: check maximum number of pages in page cache limit */
  644. descr = pg_cache_create_descr();
  645. descr->id = id; /* TODO: add page type: metric, log, something? */
  646. page = mallocz(RRDENG_BLOCK_SIZE); /*TODO: add page size */
  647. rrdeng_page_descr_mutex_lock(ctx, descr);
  648. pg_cache_descr = descr->pg_cache_descr;
  649. pg_cache_descr->page = page;
  650. pg_cache_descr->flags = RRD_PAGE_DIRTY /*| RRD_PAGE_LOCKED */ | RRD_PAGE_POPULATED /* | BEING_COLLECTED */;
  651. pg_cache_descr->refcnt = 1;
  652. debug(D_RRDENGINE, "Created new page:");
  653. if (unlikely(debug_flags & D_RRDENGINE))
  654. print_page_cache_descr(descr);
  655. rrdeng_page_descr_mutex_unlock(ctx, descr);
  656. *ret_descr = descr;
  657. return page;
  658. }
  659. /* The page must not be empty */
  660. void rrdeng_commit_page(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr,
  661. Word_t page_correlation_id)
  662. {
  663. struct page_cache *pg_cache = &ctx->pg_cache;
  664. Pvoid_t *PValue;
  665. unsigned nr_committed_pages;
  666. if (unlikely(NULL == descr)) {
  667. debug(D_RRDENGINE, "%s: page descriptor is NULL, page has already been force-committed.", __func__);
  668. return;
  669. }
  670. fatal_assert(descr->page_length);
  671. uv_rwlock_wrlock(&pg_cache->committed_page_index.lock);
  672. PValue = JudyLIns(&pg_cache->committed_page_index.JudyL_array, page_correlation_id, PJE0);
  673. *PValue = descr;
  674. nr_committed_pages = ++pg_cache->committed_page_index.nr_committed_pages;
  675. uv_rwlock_wrunlock(&pg_cache->committed_page_index.lock);
  676. if (nr_committed_pages >= pg_cache_hard_limit(ctx) / 2) {
  677. /* over 50% of pages have not been committed yet */
  678. if (ctx->drop_metrics_under_page_cache_pressure &&
  679. nr_committed_pages >= pg_cache_committed_hard_limit(ctx)) {
  680. /* 100% of pages are dirty */
  681. struct rrdeng_cmd cmd;
  682. cmd.opcode = RRDENG_INVALIDATE_OLDEST_MEMORY_PAGE;
  683. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  684. } else {
  685. if (0 == (unsigned long) ctx->stats.pg_cache_over_half_dirty_events) {
  686. /* only print the first time */
  687. errno = 0;
  688. error("Failed to flush dirty buffers quickly enough in dbengine instance \"%s\". "
  689. "Metric data at risk of not being stored in the database, "
  690. "please reduce disk load or use a faster disk.", ctx->dbfiles_path);
  691. }
  692. rrd_stat_atomic_add(&ctx->stats.pg_cache_over_half_dirty_events, 1);
  693. rrd_stat_atomic_add(&global_pg_cache_over_half_dirty_events, 1);
  694. }
  695. }
  696. pg_cache_put(ctx, descr);
  697. }
  698. /* Gets a reference for the page */
  699. void *rrdeng_get_latest_page(struct rrdengine_instance *ctx, uuid_t *id, void **handle)
  700. {
  701. struct rrdeng_page_descr *descr;
  702. struct page_cache_descr *pg_cache_descr;
  703. debug(D_RRDENGINE, "Reading existing page:");
  704. descr = pg_cache_lookup(ctx, NULL, id, INVALID_TIME);
  705. if (NULL == descr) {
  706. *handle = NULL;
  707. return NULL;
  708. }
  709. *handle = descr;
  710. pg_cache_descr = descr->pg_cache_descr;
  711. return pg_cache_descr->page;
  712. }
  713. /* Gets a reference for the page */
  714. void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time, void **handle)
  715. {
  716. struct rrdeng_page_descr *descr;
  717. struct page_cache_descr *pg_cache_descr;
  718. debug(D_RRDENGINE, "Reading existing page:");
  719. descr = pg_cache_lookup(ctx, NULL, id, point_in_time);
  720. if (NULL == descr) {
  721. *handle = NULL;
  722. return NULL;
  723. }
  724. *handle = descr;
  725. pg_cache_descr = descr->pg_cache_descr;
  726. return pg_cache_descr->page;
  727. }
  728. /*
  729. * Gathers Database Engine statistics.
  730. * Careful when modifying this function.
  731. * You must not change the indices of the statistics or user code will break.
  732. * You must not exceed RRDENG_NR_STATS or it will crash.
  733. */
  734. void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array)
  735. {
  736. if (ctx == NULL)
  737. return;
  738. struct page_cache *pg_cache = &ctx->pg_cache;
  739. array[0] = (uint64_t)ctx->stats.metric_API_producers;
  740. array[1] = (uint64_t)ctx->stats.metric_API_consumers;
  741. array[2] = (uint64_t)pg_cache->page_descriptors;
  742. array[3] = (uint64_t)pg_cache->populated_pages;
  743. array[4] = (uint64_t)pg_cache->committed_page_index.nr_committed_pages;
  744. array[5] = (uint64_t)ctx->stats.pg_cache_insertions;
  745. array[6] = (uint64_t)ctx->stats.pg_cache_deletions;
  746. array[7] = (uint64_t)ctx->stats.pg_cache_hits;
  747. array[8] = (uint64_t)ctx->stats.pg_cache_misses;
  748. array[9] = (uint64_t)ctx->stats.pg_cache_backfills;
  749. array[10] = (uint64_t)ctx->stats.pg_cache_evictions;
  750. array[11] = (uint64_t)ctx->stats.before_compress_bytes;
  751. array[12] = (uint64_t)ctx->stats.after_compress_bytes;
  752. array[13] = (uint64_t)ctx->stats.before_decompress_bytes;
  753. array[14] = (uint64_t)ctx->stats.after_decompress_bytes;
  754. array[15] = (uint64_t)ctx->stats.io_write_bytes;
  755. array[16] = (uint64_t)ctx->stats.io_write_requests;
  756. array[17] = (uint64_t)ctx->stats.io_read_bytes;
  757. array[18] = (uint64_t)ctx->stats.io_read_requests;
  758. array[19] = (uint64_t)ctx->stats.io_write_extent_bytes;
  759. array[20] = (uint64_t)ctx->stats.io_write_extents;
  760. array[21] = (uint64_t)ctx->stats.io_read_extent_bytes;
  761. array[22] = (uint64_t)ctx->stats.io_read_extents;
  762. array[23] = (uint64_t)ctx->stats.datafile_creations;
  763. array[24] = (uint64_t)ctx->stats.datafile_deletions;
  764. array[25] = (uint64_t)ctx->stats.journalfile_creations;
  765. array[26] = (uint64_t)ctx->stats.journalfile_deletions;
  766. array[27] = (uint64_t)ctx->stats.page_cache_descriptors;
  767. array[28] = (uint64_t)ctx->stats.io_errors;
  768. array[29] = (uint64_t)ctx->stats.fs_errors;
  769. array[30] = (uint64_t)global_io_errors;
  770. array[31] = (uint64_t)global_fs_errors;
  771. array[32] = (uint64_t)rrdeng_reserved_file_descriptors;
  772. array[33] = (uint64_t)ctx->stats.pg_cache_over_half_dirty_events;
  773. array[34] = (uint64_t)global_pg_cache_over_half_dirty_events;
  774. array[35] = (uint64_t)ctx->stats.flushing_pressure_page_deletions;
  775. array[36] = (uint64_t)global_flushing_pressure_page_deletions;
  776. fatal_assert(RRDENG_NR_STATS == 37);
  777. }
  778. /* Releases reference to page */
  779. void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle)
  780. {
  781. (void)ctx;
  782. pg_cache_put(ctx, (struct rrdeng_page_descr *)handle);
  783. }
  784. /*
  785. * Returns 0 on success, negative on error
  786. */
  787. int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb,
  788. unsigned disk_space_mb)
  789. {
  790. struct rrdengine_instance *ctx;
  791. int error;
  792. uint32_t max_open_files;
  793. max_open_files = rlimit_nofile.rlim_cur / 4;
  794. /* reserve RRDENG_FD_BUDGET_PER_INSTANCE file descriptors for this instance */
  795. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, RRDENG_FD_BUDGET_PER_INSTANCE);
  796. if (rrdeng_reserved_file_descriptors > max_open_files) {
  797. error(
  798. "Exceeded the budget of available file descriptors (%u/%u), cannot create new dbengine instance.",
  799. (unsigned)rrdeng_reserved_file_descriptors, (unsigned)max_open_files);
  800. rrd_stat_atomic_add(&global_fs_errors, 1);
  801. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  802. return UV_EMFILE;
  803. }
  804. if (NULL == ctxp) {
  805. ctx = &multidb_ctx;
  806. memset(ctx, 0, sizeof(*ctx));
  807. } else {
  808. *ctxp = ctx = callocz(1, sizeof(*ctx));
  809. }
  810. ctx->global_compress_alg = RRD_LZ4;
  811. if (page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB)
  812. page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
  813. ctx->max_cache_pages = page_cache_mb * (1048576LU / RRDENG_BLOCK_SIZE);
  814. /* try to keep 5% of the page cache free */
  815. ctx->cache_pages_low_watermark = (ctx->max_cache_pages * 95LLU) / 100;
  816. if (disk_space_mb < RRDENG_MIN_DISK_SPACE_MB)
  817. disk_space_mb = RRDENG_MIN_DISK_SPACE_MB;
  818. ctx->max_disk_space = disk_space_mb * 1048576LLU;
  819. strncpyz(ctx->dbfiles_path, dbfiles_path, sizeof(ctx->dbfiles_path) - 1);
  820. ctx->dbfiles_path[sizeof(ctx->dbfiles_path) - 1] = '\0';
  821. if (NULL == host)
  822. strncpyz(ctx->machine_guid, registry_get_this_machine_guid(), GUID_LEN);
  823. else
  824. strncpyz(ctx->machine_guid, host->machine_guid, GUID_LEN);
  825. ctx->drop_metrics_under_page_cache_pressure = rrdeng_drop_metrics_under_page_cache_pressure;
  826. ctx->metric_API_max_producers = 0;
  827. ctx->quiesce = NO_QUIESCE;
  828. ctx->metalog_ctx = NULL; /* only set this after the metadata log has finished initializing */
  829. ctx->host = host;
  830. memset(&ctx->worker_config, 0, sizeof(ctx->worker_config));
  831. ctx->worker_config.ctx = ctx;
  832. init_page_cache(ctx);
  833. init_commit_log(ctx);
  834. error = init_rrd_files(ctx);
  835. if (error) {
  836. goto error_after_init_rrd_files;
  837. }
  838. completion_init(&ctx->rrdengine_completion);
  839. fatal_assert(0 == uv_thread_create(&ctx->worker_config.thread, rrdeng_worker, &ctx->worker_config));
  840. /* wait for worker thread to initialize */
  841. completion_wait_for(&ctx->rrdengine_completion);
  842. completion_destroy(&ctx->rrdengine_completion);
  843. uv_thread_set_name_np(ctx->worker_config.thread, "DBENGINE");
  844. if (ctx->worker_config.error) {
  845. goto error_after_rrdeng_worker;
  846. }
  847. error = metalog_init(ctx);
  848. if (error) {
  849. error("Failed to initialize metadata log file event loop.");
  850. goto error_after_rrdeng_worker;
  851. }
  852. return 0;
  853. error_after_rrdeng_worker:
  854. finalize_rrd_files(ctx);
  855. error_after_init_rrd_files:
  856. free_page_cache(ctx);
  857. if (ctx != &multidb_ctx) {
  858. freez(ctx);
  859. *ctxp = NULL;
  860. }
  861. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  862. return UV_EIO;
  863. }
  864. /*
  865. * Returns 0 on success, 1 on error
  866. */
  867. int rrdeng_exit(struct rrdengine_instance *ctx)
  868. {
  869. struct rrdeng_cmd cmd;
  870. if (NULL == ctx) {
  871. return 1;
  872. }
  873. /* TODO: add page to page cache */
  874. cmd.opcode = RRDENG_SHUTDOWN;
  875. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  876. fatal_assert(0 == uv_thread_join(&ctx->worker_config.thread));
  877. finalize_rrd_files(ctx);
  878. //metalog_exit(ctx->metalog_ctx);
  879. free_page_cache(ctx);
  880. if (ctx != &multidb_ctx) {
  881. freez(ctx);
  882. }
  883. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  884. return 0;
  885. }
  886. void rrdeng_prepare_exit(struct rrdengine_instance *ctx)
  887. {
  888. struct rrdeng_cmd cmd;
  889. if (NULL == ctx) {
  890. return;
  891. }
  892. completion_init(&ctx->rrdengine_completion);
  893. cmd.opcode = RRDENG_QUIESCE;
  894. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  895. /* wait for dbengine to quiesce */
  896. completion_wait_for(&ctx->rrdengine_completion);
  897. completion_destroy(&ctx->rrdengine_completion);
  898. //metalog_prepare_exit(ctx->metalog_ctx);
  899. }