rrdengineapi.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrdengine.h"
  3. /* Default global database instance */
  4. struct rrdengine_instance multidb_ctx_storage_tier0;
  5. struct rrdengine_instance multidb_ctx_storage_tier1;
  6. struct rrdengine_instance multidb_ctx_storage_tier2;
  7. struct rrdengine_instance multidb_ctx_storage_tier3;
  8. struct rrdengine_instance multidb_ctx_storage_tier4;
  9. #if RRD_STORAGE_TIERS != 5
  10. #error RRD_STORAGE_TIERS is not 5 - you need to add allocations here
  11. #endif
  12. struct rrdengine_instance *multidb_ctx[RRD_STORAGE_TIERS];
  13. uint8_t tier_page_type[RRD_STORAGE_TIERS] = {PAGE_METRICS, PAGE_TIER, PAGE_TIER, PAGE_TIER, PAGE_TIER};
  14. #if PAGE_TYPE_MAX != 1
  15. #error PAGE_TYPE_MAX is not 1 - you need to add allocations here
  16. #endif
  17. size_t page_type_size[256] = {sizeof(storage_number), sizeof(storage_number_tier1_t)};
  18. __attribute__((constructor)) void initialize_multidb_ctx(void) {
  19. multidb_ctx[0] = &multidb_ctx_storage_tier0;
  20. multidb_ctx[1] = &multidb_ctx_storage_tier1;
  21. multidb_ctx[2] = &multidb_ctx_storage_tier2;
  22. multidb_ctx[3] = &multidb_ctx_storage_tier3;
  23. multidb_ctx[4] = &multidb_ctx_storage_tier4;
  24. }
  25. int db_engine_use_malloc = 0;
  26. int default_rrdeng_page_fetch_timeout = 3;
  27. int default_rrdeng_page_fetch_retries = 3;
  28. int default_rrdeng_page_cache_mb = 32;
  29. int default_rrdeng_disk_quota_mb = 256;
  30. int default_multidb_disk_quota_mb = 256;
  31. /* Default behaviour is to unblock data collection if the page cache is full of dirty pages by dropping metrics */
  32. uint8_t rrdeng_drop_metrics_under_page_cache_pressure = 1;
  33. static inline struct rrdengine_instance *get_rrdeng_ctx_from_host(RRDHOST *host, int tier) {
  34. if(tier < 0 || tier >= RRD_STORAGE_TIERS) tier = 0;
  35. if(!host->storage_instance[tier]) tier = 0;
  36. return (struct rrdengine_instance *)host->storage_instance[tier];
  37. }
  38. /* This UUID is not unique across hosts */
  39. void rrdeng_generate_legacy_uuid(const char *dim_id, char *chart_id, uuid_t *ret_uuid)
  40. {
  41. EVP_MD_CTX *evpctx;
  42. unsigned char hash_value[EVP_MAX_MD_SIZE];
  43. unsigned int hash_len;
  44. evpctx = EVP_MD_CTX_create();
  45. EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
  46. EVP_DigestUpdate(evpctx, dim_id, strlen(dim_id));
  47. EVP_DigestUpdate(evpctx, chart_id, strlen(chart_id));
  48. EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
  49. EVP_MD_CTX_destroy(evpctx);
  50. fatal_assert(hash_len > sizeof(uuid_t));
  51. memcpy(ret_uuid, hash_value, sizeof(uuid_t));
  52. }
  53. /* Transform legacy UUID to be unique across hosts deterministically */
  54. void rrdeng_convert_legacy_uuid_to_multihost(char machine_guid[GUID_LEN + 1], uuid_t *legacy_uuid, uuid_t *ret_uuid)
  55. {
  56. EVP_MD_CTX *evpctx;
  57. unsigned char hash_value[EVP_MAX_MD_SIZE];
  58. unsigned int hash_len;
  59. evpctx = EVP_MD_CTX_create();
  60. EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
  61. EVP_DigestUpdate(evpctx, machine_guid, GUID_LEN);
  62. EVP_DigestUpdate(evpctx, *legacy_uuid, sizeof(uuid_t));
  63. EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
  64. EVP_MD_CTX_destroy(evpctx);
  65. fatal_assert(hash_len > sizeof(uuid_t));
  66. memcpy(ret_uuid, hash_value, sizeof(uuid_t));
  67. }
  68. struct rrdeng_metric_handle {
  69. RRDDIM *rd;
  70. struct rrdengine_instance *ctx;
  71. uuid_t *rrdeng_uuid; // database engine metric UUID
  72. struct pg_cache_page_index *page_index;
  73. };
  74. void rrdeng_metric_free(STORAGE_METRIC_HANDLE *db_metric_handle) {
  75. freez(db_metric_handle);
  76. }
  77. STORAGE_METRIC_HANDLE *rrdeng_metric_init(RRDDIM *rd, STORAGE_INSTANCE *db_instance) {
  78. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  79. struct page_cache *pg_cache;
  80. uuid_t legacy_uuid;
  81. uuid_t multihost_legacy_uuid;
  82. Pvoid_t *PValue;
  83. struct pg_cache_page_index *page_index = NULL;
  84. int is_multihost_child = 0;
  85. RRDHOST *host = rd->rrdset->rrdhost;
  86. pg_cache = &ctx->pg_cache;
  87. rrdeng_generate_legacy_uuid(rd->id, rd->rrdset->id, &legacy_uuid);
  88. if (host != localhost && is_storage_engine_shared((STORAGE_INSTANCE *)ctx))
  89. is_multihost_child = 1;
  90. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  91. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, &legacy_uuid, sizeof(uuid_t));
  92. if (likely(NULL != PValue)) {
  93. page_index = *PValue;
  94. }
  95. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  96. if (is_multihost_child || NULL == PValue) {
  97. /* First time we see the legacy UUID or metric belongs to child host in multi-host DB.
  98. * Drop legacy support, normal path */
  99. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  100. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, &rd->metric_uuid, sizeof(uuid_t));
  101. if (likely(NULL != PValue)) {
  102. page_index = *PValue;
  103. }
  104. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  105. if (NULL == PValue) {
  106. uv_rwlock_wrlock(&pg_cache->metrics_index.lock);
  107. PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, &rd->metric_uuid, sizeof(uuid_t), PJE0);
  108. fatal_assert(NULL == *PValue); /* TODO: figure out concurrency model */
  109. *PValue = page_index = create_page_index(&rd->metric_uuid);
  110. page_index->prev = pg_cache->metrics_index.last_page_index;
  111. pg_cache->metrics_index.last_page_index = page_index;
  112. uv_rwlock_wrunlock(&pg_cache->metrics_index.lock);
  113. }
  114. } else {
  115. /* There are legacy UUIDs in the database, implement backward compatibility */
  116. rrdeng_convert_legacy_uuid_to_multihost(rd->rrdset->rrdhost->machine_guid, &legacy_uuid,
  117. &multihost_legacy_uuid);
  118. int need_to_store = uuid_compare(rd->metric_uuid, multihost_legacy_uuid);
  119. uuid_copy(rd->metric_uuid, multihost_legacy_uuid);
  120. if (unlikely(need_to_store && !ctx->tier))
  121. (void)sql_store_dimension(&rd->metric_uuid, rd->rrdset->chart_uuid, rd->id, rd->name, rd->multiplier, rd->divisor,
  122. rd->algorithm);
  123. }
  124. struct rrdeng_metric_handle *mh = mallocz(sizeof(struct rrdeng_metric_handle));
  125. mh->rd = rd;
  126. mh->ctx = ctx;
  127. mh->rrdeng_uuid = &page_index->id;
  128. mh->page_index = page_index;
  129. return (STORAGE_METRIC_HANDLE *)mh;
  130. }
  131. /*
  132. * Gets a handle for storing metrics to the database.
  133. * The handle must be released with rrdeng_store_metric_final().
  134. */
  135. STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle) {
  136. struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle;
  137. struct rrdeng_collect_handle *handle;
  138. struct pg_cache_page_index *page_index;
  139. handle = callocz(1, sizeof(struct rrdeng_collect_handle));
  140. handle->metric_handle = metric_handle;
  141. handle->ctx = metric_handle->ctx;
  142. handle->descr = NULL;
  143. handle->unaligned_page = 0;
  144. page_index = metric_handle->page_index;
  145. uv_rwlock_wrlock(&page_index->lock);
  146. ++page_index->writers;
  147. uv_rwlock_wrunlock(&page_index->lock);
  148. return (STORAGE_COLLECT_HANDLE *)handle;
  149. }
  150. /* The page must be populated and referenced */
  151. static int page_has_only_empty_metrics(struct rrdeng_page_descr *descr)
  152. {
  153. unsigned i;
  154. uint8_t has_only_empty_metrics = 1;
  155. storage_number *page;
  156. page = descr->pg_cache_descr->page;
  157. for (i = 0 ; i < descr->page_length / PAGE_POINT_SIZE_BYTES(descr); ++i) {
  158. if (SN_EMPTY_SLOT != page[i]) {
  159. has_only_empty_metrics = 0;
  160. break;
  161. }
  162. }
  163. return has_only_empty_metrics;
  164. }
  165. void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle) {
  166. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  167. // struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)handle->metric_handle;
  168. struct rrdengine_instance *ctx = handle->ctx;
  169. struct rrdeng_page_descr *descr = handle->descr;
  170. if (unlikely(!ctx)) return;
  171. if (unlikely(!descr)) return;
  172. if (likely(descr->page_length)) {
  173. int page_is_empty;
  174. rrd_stat_atomic_add(&ctx->stats.metric_API_producers, -1);
  175. page_is_empty = page_has_only_empty_metrics(descr);
  176. if (page_is_empty) {
  177. debug(D_RRDENGINE, "Page has empty metrics only, deleting:");
  178. if (unlikely(debug_flags & D_RRDENGINE))
  179. print_page_cache_descr(descr);
  180. pg_cache_put(ctx, descr);
  181. pg_cache_punch_hole(ctx, descr, 1, 0, NULL);
  182. } else
  183. rrdeng_commit_page(ctx, descr, handle->page_correlation_id);
  184. } else {
  185. dbengine_page_free(descr->pg_cache_descr->page);
  186. rrdeng_destroy_pg_cache_descr(ctx, descr->pg_cache_descr);
  187. rrdeng_page_descr_freez(descr);
  188. }
  189. handle->descr = NULL;
  190. }
  191. void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE n,
  192. NETDATA_DOUBLE min_value,
  193. NETDATA_DOUBLE max_value,
  194. uint16_t count,
  195. uint16_t anomaly_count,
  196. SN_FLAGS flags)
  197. {
  198. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  199. struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)handle->metric_handle;
  200. struct rrdengine_instance *ctx = handle->ctx;
  201. struct page_cache *pg_cache = &ctx->pg_cache;
  202. struct rrdeng_page_descr *descr = handle->descr;
  203. RRDDIM *rd = metric_handle->rd;
  204. void *page;
  205. uint8_t must_flush_unaligned_page = 0, perfect_page_alignment = 0;
  206. if (descr) {
  207. /* Make alignment decisions */
  208. if (descr->page_length == rd->rrdset->rrddim_page_alignment) {
  209. /* this is the leading dimension that defines chart alignment */
  210. perfect_page_alignment = 1;
  211. }
  212. /* is the metric far enough out of alignment with the others? */
  213. if (unlikely(descr->page_length + PAGE_POINT_SIZE_BYTES(descr) < rd->rrdset->rrddim_page_alignment)) {
  214. handle->unaligned_page = 1;
  215. debug(D_RRDENGINE, "Metric page is not aligned with chart:");
  216. if (unlikely(debug_flags & D_RRDENGINE))
  217. print_page_cache_descr(descr);
  218. }
  219. if (unlikely(handle->unaligned_page &&
  220. /* did the other metrics change page? */
  221. rd->rrdset->rrddim_page_alignment <= PAGE_POINT_SIZE_BYTES(descr))) {
  222. debug(D_RRDENGINE, "Flushing unaligned metric page.");
  223. must_flush_unaligned_page = 1;
  224. handle->unaligned_page = 0;
  225. }
  226. }
  227. if (unlikely(NULL == descr ||
  228. descr->page_length + PAGE_POINT_SIZE_BYTES(descr) > RRDENG_BLOCK_SIZE ||
  229. must_flush_unaligned_page)) {
  230. rrdeng_store_metric_flush_current_page(collection_handle);
  231. page = rrdeng_create_page(ctx, &metric_handle->page_index->id, &descr);
  232. fatal_assert(page);
  233. handle->descr = descr;
  234. handle->page_correlation_id = rrd_atomic_fetch_add(&pg_cache->committed_page_index.latest_corr_id, 1);
  235. if (0 == rd->rrdset->rrddim_page_alignment) {
  236. /* this is the leading dimension that defines chart alignment */
  237. perfect_page_alignment = 1;
  238. }
  239. }
  240. page = descr->pg_cache_descr->page;
  241. switch (descr->type) {
  242. case PAGE_METRICS: {
  243. ((storage_number *)page)[descr->page_length / PAGE_POINT_SIZE_BYTES(descr)] = pack_storage_number(n, flags);
  244. }
  245. break;
  246. case PAGE_TIER: {
  247. storage_number_tier1_t number_tier1;
  248. number_tier1.sum_value = (float)n;
  249. number_tier1.min_value = (float)min_value;
  250. number_tier1.max_value = (float)max_value;
  251. number_tier1.anomaly_count = anomaly_count;
  252. number_tier1.count = count;
  253. ((storage_number_tier1_t *)page)[descr->page_length / PAGE_POINT_SIZE_BYTES(descr)] = number_tier1;
  254. }
  255. break;
  256. default: {
  257. static bool logged = false;
  258. if(!logged) {
  259. error("DBENGINE: cannot store metric on unknown page type id %d", descr->type);
  260. logged = true;
  261. }
  262. }
  263. break;
  264. }
  265. pg_cache_atomic_set_pg_info(descr, point_in_time, descr->page_length + PAGE_POINT_SIZE_BYTES(descr));
  266. if (perfect_page_alignment)
  267. rd->rrdset->rrddim_page_alignment = descr->page_length;
  268. if (unlikely(INVALID_TIME == descr->start_time)) {
  269. unsigned long new_metric_API_producers, old_metric_API_max_producers, ret_metric_API_max_producers;
  270. descr->start_time = point_in_time;
  271. new_metric_API_producers = rrd_atomic_add_fetch(&ctx->stats.metric_API_producers, 1);
  272. while (unlikely(new_metric_API_producers > (old_metric_API_max_producers = ctx->metric_API_max_producers))) {
  273. /* Increase ctx->metric_API_max_producers */
  274. ret_metric_API_max_producers = ulong_compare_and_swap(&ctx->metric_API_max_producers,
  275. old_metric_API_max_producers,
  276. new_metric_API_producers);
  277. if (old_metric_API_max_producers == ret_metric_API_max_producers) {
  278. /* success */
  279. break;
  280. }
  281. }
  282. pg_cache_insert(ctx, metric_handle->page_index, descr);
  283. } else {
  284. pg_cache_add_new_metric_time(metric_handle->page_index, descr);
  285. }
  286. }
  287. /*
  288. * Releases the database reference from the handle for storing metrics.
  289. * Returns 1 if it's safe to delete the dimension.
  290. */
  291. int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) {
  292. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  293. struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)handle->metric_handle;
  294. struct pg_cache_page_index *page_index = metric_handle->page_index;
  295. uint8_t can_delete_metric = 0;
  296. rrdeng_store_metric_flush_current_page(collection_handle);
  297. uv_rwlock_wrlock(&page_index->lock);
  298. if (!--page_index->writers && !page_index->page_count) {
  299. can_delete_metric = 1;
  300. }
  301. uv_rwlock_wrunlock(&page_index->lock);
  302. freez(handle);
  303. return can_delete_metric;
  304. }
  305. //static inline uint32_t *pginfo_to_dt(struct rrdeng_page_info *page_info)
  306. //{
  307. // return (uint32_t *)&page_info->scratch[0];
  308. //}
  309. //
  310. //static inline uint32_t *pginfo_to_points(struct rrdeng_page_info *page_info)
  311. //{
  312. // return (uint32_t *)&page_info->scratch[sizeof(uint32_t)];
  313. //}
  314. //
  315. /*
  316. * Gets a handle for loading metrics from the database.
  317. * The handle must be released with rrdeng_load_metric_final().
  318. */
  319. void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *rrdimm_handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type)
  320. {
  321. struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle;
  322. struct rrdengine_instance *ctx = metric_handle->ctx;
  323. RRDDIM *rd = metric_handle->rd;
  324. // fprintf(stderr, "%s: %s/%s start time %ld, end time %ld\n", __FUNCTION__ , rd->rrdset->name, rd->name, start_time, end_time);
  325. struct rrdeng_query_handle *handle;
  326. unsigned pages_nr;
  327. rrdimm_handle->start_time = start_time;
  328. rrdimm_handle->end_time = end_time;
  329. handle = callocz(1, sizeof(struct rrdeng_query_handle));
  330. handle->next_page_time = start_time;
  331. handle->now = start_time;
  332. handle->tier_query_fetch_type = tier_query_fetch_type;
  333. // TODO we should store the dt of each page in each page
  334. // this will produce wrong values for dt in case the user changes
  335. // the update every of the charts or the tier grouping iterations
  336. handle->dt_sec = get_tier_grouping(ctx->tier) * (time_t)rd->update_every;
  337. handle->dt = handle->dt_sec * USEC_PER_SEC;
  338. handle->position = 0;
  339. handle->ctx = ctx;
  340. handle->metric_handle = metric_handle;
  341. handle->descr = NULL;
  342. rrdimm_handle->handle = (STORAGE_QUERY_HANDLE *)handle;
  343. pages_nr = pg_cache_preload(ctx, metric_handle->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC,
  344. NULL, &handle->page_index);
  345. if (unlikely(NULL == handle->page_index || 0 == pages_nr))
  346. // there are no metrics to load
  347. handle->next_page_time = INVALID_TIME;
  348. }
  349. static int rrdeng_load_page_next(struct rrddim_query_handle *rrdimm_handle) {
  350. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle;
  351. struct rrdengine_instance *ctx = handle->ctx;
  352. struct rrdeng_page_descr *descr = handle->descr;
  353. uint32_t page_length;
  354. usec_t page_end_time;
  355. unsigned position;
  356. if (likely(descr)) {
  357. // Drop old page's reference
  358. #ifdef NETDATA_INTERNAL_CHECKS
  359. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
  360. #endif
  361. pg_cache_put(ctx, descr);
  362. handle->descr = NULL;
  363. handle->next_page_time = (handle->page_end_time / USEC_PER_SEC) + 1;
  364. if (unlikely(handle->next_page_time > rrdimm_handle->end_time))
  365. return 1;
  366. }
  367. usec_t next_page_time = handle->next_page_time * USEC_PER_SEC;
  368. descr = pg_cache_lookup_next(ctx, handle->page_index, &handle->page_index->id, next_page_time, rrdimm_handle->end_time * USEC_PER_SEC);
  369. if (NULL == descr)
  370. return 1;
  371. #ifdef NETDATA_INTERNAL_CHECKS
  372. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, 1);
  373. #endif
  374. handle->descr = descr;
  375. pg_cache_atomic_get_pg_info(descr, &page_end_time, &page_length);
  376. if (unlikely(INVALID_TIME == descr->start_time || INVALID_TIME == page_end_time))
  377. return 1;
  378. if (unlikely(descr->start_time != page_end_time && next_page_time > descr->start_time)) {
  379. // we're in the middle of the page somewhere
  380. unsigned entries = page_length / PAGE_POINT_SIZE_BYTES(descr);
  381. position = ((uint64_t)(next_page_time - descr->start_time)) * (entries - 1) /
  382. (page_end_time - descr->start_time);
  383. }
  384. else
  385. position = 0;
  386. handle->page_end_time = page_end_time;
  387. handle->page_length = page_length;
  388. handle->page = descr->pg_cache_descr->page;
  389. usec_t entries = handle->entries = page_length / PAGE_POINT_SIZE_BYTES(descr);
  390. if (likely(entries > 1))
  391. handle->dt = (page_end_time - descr->start_time) / (entries - 1);
  392. else {
  393. // TODO we should store the dt of each page in each page
  394. // now we keep the dt of whatever was before
  395. ;
  396. }
  397. handle->dt_sec = (time_t)(handle->dt / USEC_PER_SEC);
  398. handle->position = position;
  399. return 0;
  400. }
  401. // Returns the metric and sets its timestamp into current_time
  402. // IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags)
  403. // IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES
  404. STORAGE_POINT rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle) {
  405. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle;
  406. // struct rrdeng_metric_handle *metric_handle = handle->metric_handle;
  407. STORAGE_POINT sp;
  408. struct rrdeng_page_descr *descr = handle->descr;
  409. unsigned position = handle->position + 1;
  410. time_t now = handle->now + handle->dt_sec;
  411. storage_number_tier1_t tier1_value;
  412. if (unlikely(INVALID_TIME == handle->next_page_time)) {
  413. handle->next_page_time = INVALID_TIME;
  414. handle->now = now;
  415. storage_point_empty(sp, now - handle->dt_sec, now);
  416. return sp;
  417. }
  418. if (unlikely(!descr || position >= handle->entries)) {
  419. // We need to get a new page
  420. if(rrdeng_load_page_next(rrdimm_handle)) {
  421. // next calls will not load any more metrics
  422. handle->next_page_time = INVALID_TIME;
  423. handle->now = now;
  424. storage_point_empty(sp, now - handle->dt_sec, now);
  425. return sp;
  426. }
  427. descr = handle->descr;
  428. position = handle->position;
  429. now = (time_t)((descr->start_time + position * handle->dt) / USEC_PER_SEC);
  430. }
  431. sp.start_time = now - handle->dt_sec;
  432. sp.end_time = now;
  433. handle->position = position;
  434. handle->now = now;
  435. switch(descr->type) {
  436. case PAGE_METRICS: {
  437. storage_number n = handle->page[position];
  438. sp.min = sp.max = sp.sum = unpack_storage_number(n);
  439. sp.flags = n & SN_ALL_FLAGS;
  440. sp.count = 1;
  441. sp.anomaly_count = (n & SN_ANOMALY_BIT) ? 0 : 1;
  442. }
  443. break;
  444. case PAGE_TIER: {
  445. tier1_value = ((storage_number_tier1_t *)handle->page)[position];
  446. sp.flags = tier1_value.anomaly_count ? 0 : SN_ANOMALY_BIT;
  447. sp.count = tier1_value.count;
  448. sp.anomaly_count = tier1_value.anomaly_count;
  449. sp.min = tier1_value.min_value;
  450. sp.max = tier1_value.max_value;
  451. sp.sum = tier1_value.sum_value;
  452. }
  453. break;
  454. // we don't know this page type
  455. default: {
  456. static bool logged = false;
  457. if(!logged) {
  458. error("DBENGINE: unknown page type %d found. Cannot decode it. Ignoring its metrics.", descr->type);
  459. logged = true;
  460. }
  461. storage_point_empty(sp, sp.start_time, sp.end_time);
  462. }
  463. break;
  464. }
  465. if (unlikely(now >= rrdimm_handle->end_time)) {
  466. // next calls will not load any more metrics
  467. handle->next_page_time = INVALID_TIME;
  468. }
  469. return sp;
  470. }
  471. int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle)
  472. {
  473. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle;
  474. return (INVALID_TIME == handle->next_page_time);
  475. }
  476. /*
  477. * Releases the database reference from the handle for loading metrics.
  478. */
  479. void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle)
  480. {
  481. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle;
  482. struct rrdengine_instance *ctx = handle->ctx;
  483. struct rrdeng_page_descr *descr = handle->descr;
  484. if (descr) {
  485. #ifdef NETDATA_INTERNAL_CHECKS
  486. rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
  487. #endif
  488. pg_cache_put(ctx, descr);
  489. }
  490. // whatever is allocated at rrdeng_load_metric_init() should be freed here
  491. freez(handle);
  492. rrdimm_handle->handle = NULL;
  493. }
  494. time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
  495. struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle;
  496. struct pg_cache_page_index *page_index = metric_handle->page_index;
  497. return page_index->latest_time / USEC_PER_SEC;
  498. }
  499. time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
  500. struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle;
  501. struct pg_cache_page_index *page_index = metric_handle->page_index;
  502. return page_index->oldest_time / USEC_PER_SEC;
  503. }
  504. int rrdeng_metric_latest_time_by_uuid(uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t, int tier)
  505. {
  506. struct page_cache *pg_cache;
  507. struct rrdengine_instance *ctx;
  508. Pvoid_t *PValue;
  509. struct pg_cache_page_index *page_index = NULL;
  510. ctx = get_rrdeng_ctx_from_host(localhost, tier);
  511. if (unlikely(!ctx)) {
  512. error("Failed to fetch multidb context");
  513. return 1;
  514. }
  515. pg_cache = &ctx->pg_cache;
  516. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  517. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, dim_uuid, sizeof(uuid_t));
  518. if (likely(NULL != PValue)) {
  519. page_index = *PValue;
  520. }
  521. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  522. if (likely(page_index)) {
  523. *first_entry_t = page_index->oldest_time / USEC_PER_SEC;
  524. *last_entry_t = page_index->latest_time / USEC_PER_SEC;
  525. return 0;
  526. }
  527. return 1;
  528. }
  529. int rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *si, uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t)
  530. {
  531. struct page_cache *pg_cache;
  532. struct rrdengine_instance *ctx;
  533. Pvoid_t *PValue;
  534. struct pg_cache_page_index *page_index = NULL;
  535. ctx = (struct rrdengine_instance *)si;
  536. if (unlikely(!ctx)) {
  537. error("DBENGINE: invalid STORAGE INSTANCE to %s()", __FUNCTION__);
  538. return 1;
  539. }
  540. pg_cache = &ctx->pg_cache;
  541. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  542. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, dim_uuid, sizeof(uuid_t));
  543. if (likely(NULL != PValue)) {
  544. page_index = *PValue;
  545. }
  546. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  547. if (likely(page_index)) {
  548. *first_entry_t = page_index->oldest_time / USEC_PER_SEC;
  549. *last_entry_t = page_index->latest_time / USEC_PER_SEC;
  550. return 0;
  551. }
  552. return 1;
  553. }
  554. /* Also gets a reference for the page */
  555. void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrdeng_page_descr **ret_descr)
  556. {
  557. struct rrdeng_page_descr *descr;
  558. struct page_cache_descr *pg_cache_descr;
  559. void *page;
  560. /* TODO: check maximum number of pages in page cache limit */
  561. descr = pg_cache_create_descr();
  562. descr->id = id; /* TODO: add page type: metric, log, something? */
  563. descr->type = ctx->page_type;
  564. page = dbengine_page_alloc(); /*TODO: add page size */
  565. rrdeng_page_descr_mutex_lock(ctx, descr);
  566. pg_cache_descr = descr->pg_cache_descr;
  567. pg_cache_descr->page = page;
  568. pg_cache_descr->flags = RRD_PAGE_DIRTY /*| RRD_PAGE_LOCKED */ | RRD_PAGE_POPULATED /* | BEING_COLLECTED */;
  569. pg_cache_descr->refcnt = 1;
  570. debug(D_RRDENGINE, "Created new page:");
  571. if (unlikely(debug_flags & D_RRDENGINE))
  572. print_page_cache_descr(descr);
  573. rrdeng_page_descr_mutex_unlock(ctx, descr);
  574. *ret_descr = descr;
  575. return page;
  576. }
  577. /* The page must not be empty */
  578. void rrdeng_commit_page(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr,
  579. Word_t page_correlation_id)
  580. {
  581. struct page_cache *pg_cache = &ctx->pg_cache;
  582. Pvoid_t *PValue;
  583. unsigned nr_committed_pages;
  584. if (unlikely(NULL == descr)) {
  585. debug(D_RRDENGINE, "%s: page descriptor is NULL, page has already been force-committed.", __func__);
  586. return;
  587. }
  588. fatal_assert(descr->page_length);
  589. uv_rwlock_wrlock(&pg_cache->committed_page_index.lock);
  590. PValue = JudyLIns(&pg_cache->committed_page_index.JudyL_array, page_correlation_id, PJE0);
  591. *PValue = descr;
  592. nr_committed_pages = ++pg_cache->committed_page_index.nr_committed_pages;
  593. uv_rwlock_wrunlock(&pg_cache->committed_page_index.lock);
  594. if (nr_committed_pages >= pg_cache_hard_limit(ctx) / 2) {
  595. /* over 50% of pages have not been committed yet */
  596. if (ctx->drop_metrics_under_page_cache_pressure &&
  597. nr_committed_pages >= pg_cache_committed_hard_limit(ctx)) {
  598. /* 100% of pages are dirty */
  599. struct rrdeng_cmd cmd;
  600. cmd.opcode = RRDENG_INVALIDATE_OLDEST_MEMORY_PAGE;
  601. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  602. } else {
  603. if (0 == (unsigned long) ctx->stats.pg_cache_over_half_dirty_events) {
  604. /* only print the first time */
  605. errno = 0;
  606. error("Failed to flush dirty buffers quickly enough in dbengine instance \"%s\". "
  607. "Metric data at risk of not being stored in the database, "
  608. "please reduce disk load or use a faster disk.", ctx->dbfiles_path);
  609. }
  610. rrd_stat_atomic_add(&ctx->stats.pg_cache_over_half_dirty_events, 1);
  611. rrd_stat_atomic_add(&global_pg_cache_over_half_dirty_events, 1);
  612. }
  613. }
  614. pg_cache_put(ctx, descr);
  615. }
  616. /* Gets a reference for the page */
  617. void *rrdeng_get_latest_page(struct rrdengine_instance *ctx, uuid_t *id, void **handle)
  618. {
  619. struct rrdeng_page_descr *descr;
  620. struct page_cache_descr *pg_cache_descr;
  621. debug(D_RRDENGINE, "Reading existing page:");
  622. descr = pg_cache_lookup(ctx, NULL, id, INVALID_TIME);
  623. if (NULL == descr) {
  624. *handle = NULL;
  625. return NULL;
  626. }
  627. *handle = descr;
  628. pg_cache_descr = descr->pg_cache_descr;
  629. return pg_cache_descr->page;
  630. }
  631. /* Gets a reference for the page */
  632. void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time, void **handle)
  633. {
  634. struct rrdeng_page_descr *descr;
  635. struct page_cache_descr *pg_cache_descr;
  636. debug(D_RRDENGINE, "Reading existing page:");
  637. descr = pg_cache_lookup(ctx, NULL, id, point_in_time);
  638. if (NULL == descr) {
  639. *handle = NULL;
  640. return NULL;
  641. }
  642. *handle = descr;
  643. pg_cache_descr = descr->pg_cache_descr;
  644. return pg_cache_descr->page;
  645. }
  646. /*
  647. * Gathers Database Engine statistics.
  648. * Careful when modifying this function.
  649. * You must not change the indices of the statistics or user code will break.
  650. * You must not exceed RRDENG_NR_STATS or it will crash.
  651. */
  652. void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array)
  653. {
  654. if (ctx == NULL)
  655. return;
  656. struct page_cache *pg_cache = &ctx->pg_cache;
  657. array[0] = (uint64_t)ctx->stats.metric_API_producers;
  658. array[1] = (uint64_t)ctx->stats.metric_API_consumers;
  659. array[2] = (uint64_t)pg_cache->page_descriptors;
  660. array[3] = (uint64_t)pg_cache->populated_pages;
  661. array[4] = (uint64_t)pg_cache->committed_page_index.nr_committed_pages;
  662. array[5] = (uint64_t)ctx->stats.pg_cache_insertions;
  663. array[6] = (uint64_t)ctx->stats.pg_cache_deletions;
  664. array[7] = (uint64_t)ctx->stats.pg_cache_hits;
  665. array[8] = (uint64_t)ctx->stats.pg_cache_misses;
  666. array[9] = (uint64_t)ctx->stats.pg_cache_backfills;
  667. array[10] = (uint64_t)ctx->stats.pg_cache_evictions;
  668. array[11] = (uint64_t)ctx->stats.before_compress_bytes;
  669. array[12] = (uint64_t)ctx->stats.after_compress_bytes;
  670. array[13] = (uint64_t)ctx->stats.before_decompress_bytes;
  671. array[14] = (uint64_t)ctx->stats.after_decompress_bytes;
  672. array[15] = (uint64_t)ctx->stats.io_write_bytes;
  673. array[16] = (uint64_t)ctx->stats.io_write_requests;
  674. array[17] = (uint64_t)ctx->stats.io_read_bytes;
  675. array[18] = (uint64_t)ctx->stats.io_read_requests;
  676. array[19] = (uint64_t)ctx->stats.io_write_extent_bytes;
  677. array[20] = (uint64_t)ctx->stats.io_write_extents;
  678. array[21] = (uint64_t)ctx->stats.io_read_extent_bytes;
  679. array[22] = (uint64_t)ctx->stats.io_read_extents;
  680. array[23] = (uint64_t)ctx->stats.datafile_creations;
  681. array[24] = (uint64_t)ctx->stats.datafile_deletions;
  682. array[25] = (uint64_t)ctx->stats.journalfile_creations;
  683. array[26] = (uint64_t)ctx->stats.journalfile_deletions;
  684. array[27] = (uint64_t)ctx->stats.page_cache_descriptors;
  685. array[28] = (uint64_t)ctx->stats.io_errors;
  686. array[29] = (uint64_t)ctx->stats.fs_errors;
  687. array[30] = (uint64_t)global_io_errors;
  688. array[31] = (uint64_t)global_fs_errors;
  689. array[32] = (uint64_t)rrdeng_reserved_file_descriptors;
  690. array[33] = (uint64_t)ctx->stats.pg_cache_over_half_dirty_events;
  691. array[34] = (uint64_t)global_pg_cache_over_half_dirty_events;
  692. array[35] = (uint64_t)ctx->stats.flushing_pressure_page_deletions;
  693. array[36] = (uint64_t)global_flushing_pressure_page_deletions;
  694. fatal_assert(RRDENG_NR_STATS == 37);
  695. }
  696. /* Releases reference to page */
  697. void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle)
  698. {
  699. (void)ctx;
  700. pg_cache_put(ctx, (struct rrdeng_page_descr *)handle);
  701. }
  702. /*
  703. * Returns 0 on success, negative on error
  704. */
  705. int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb,
  706. unsigned disk_space_mb, int tier) {
  707. struct rrdengine_instance *ctx;
  708. int error;
  709. uint32_t max_open_files;
  710. max_open_files = rlimit_nofile.rlim_cur / 4;
  711. /* reserve RRDENG_FD_BUDGET_PER_INSTANCE file descriptors for this instance */
  712. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, RRDENG_FD_BUDGET_PER_INSTANCE);
  713. if (rrdeng_reserved_file_descriptors > max_open_files) {
  714. error(
  715. "Exceeded the budget of available file descriptors (%u/%u), cannot create new dbengine instance.",
  716. (unsigned)rrdeng_reserved_file_descriptors,
  717. (unsigned)max_open_files);
  718. rrd_stat_atomic_add(&global_fs_errors, 1);
  719. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  720. return UV_EMFILE;
  721. }
  722. if(NULL == ctxp) {
  723. ctx = multidb_ctx[tier];
  724. memset(ctx, 0, sizeof(*ctx));
  725. }
  726. else {
  727. *ctxp = ctx = callocz(1, sizeof(*ctx));
  728. }
  729. ctx->tier = tier;
  730. ctx->page_type = tier_page_type[tier];
  731. ctx->global_compress_alg = RRD_LZ4;
  732. if (page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB)
  733. page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
  734. ctx->max_cache_pages = page_cache_mb * (1048576LU / RRDENG_BLOCK_SIZE);
  735. /* try to keep 5% of the page cache free */
  736. ctx->cache_pages_low_watermark = (ctx->max_cache_pages * 95LLU) / 100;
  737. if (disk_space_mb < RRDENG_MIN_DISK_SPACE_MB)
  738. disk_space_mb = RRDENG_MIN_DISK_SPACE_MB;
  739. ctx->max_disk_space = disk_space_mb * 1048576LLU;
  740. strncpyz(ctx->dbfiles_path, dbfiles_path, sizeof(ctx->dbfiles_path) - 1);
  741. ctx->dbfiles_path[sizeof(ctx->dbfiles_path) - 1] = '\0';
  742. if (NULL == host)
  743. strncpyz(ctx->machine_guid, registry_get_this_machine_guid(), GUID_LEN);
  744. else
  745. strncpyz(ctx->machine_guid, host->machine_guid, GUID_LEN);
  746. ctx->drop_metrics_under_page_cache_pressure = rrdeng_drop_metrics_under_page_cache_pressure;
  747. ctx->metric_API_max_producers = 0;
  748. ctx->quiesce = NO_QUIESCE;
  749. ctx->metalog_ctx = NULL; /* only set this after the metadata log has finished initializing */
  750. ctx->host = host;
  751. memset(&ctx->worker_config, 0, sizeof(ctx->worker_config));
  752. ctx->worker_config.ctx = ctx;
  753. init_page_cache(ctx);
  754. init_commit_log(ctx);
  755. error = init_rrd_files(ctx);
  756. if (error) {
  757. goto error_after_init_rrd_files;
  758. }
  759. completion_init(&ctx->rrdengine_completion);
  760. fatal_assert(0 == uv_thread_create(&ctx->worker_config.thread, rrdeng_worker, &ctx->worker_config));
  761. /* wait for worker thread to initialize */
  762. completion_wait_for(&ctx->rrdengine_completion);
  763. completion_destroy(&ctx->rrdengine_completion);
  764. uv_thread_set_name_np(ctx->worker_config.thread, "LIBUV_WORKER");
  765. if (ctx->worker_config.error) {
  766. goto error_after_rrdeng_worker;
  767. }
  768. error = metalog_init(ctx);
  769. if (error) {
  770. error("Failed to initialize metadata log file event loop.");
  771. goto error_after_rrdeng_worker;
  772. }
  773. return 0;
  774. error_after_rrdeng_worker:
  775. finalize_rrd_files(ctx);
  776. error_after_init_rrd_files:
  777. free_page_cache(ctx);
  778. if (!is_storage_engine_shared((STORAGE_INSTANCE *)ctx)) {
  779. freez(ctx);
  780. if (ctxp)
  781. *ctxp = NULL;
  782. }
  783. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  784. return UV_EIO;
  785. }
  786. /*
  787. * Returns 0 on success, 1 on error
  788. */
  789. int rrdeng_exit(struct rrdengine_instance *ctx)
  790. {
  791. struct rrdeng_cmd cmd;
  792. if (NULL == ctx) {
  793. return 1;
  794. }
  795. /* TODO: add page to page cache */
  796. cmd.opcode = RRDENG_SHUTDOWN;
  797. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  798. fatal_assert(0 == uv_thread_join(&ctx->worker_config.thread));
  799. finalize_rrd_files(ctx);
  800. //metalog_exit(ctx->metalog_ctx);
  801. free_page_cache(ctx);
  802. if(!is_storage_engine_shared((STORAGE_INSTANCE *)ctx))
  803. freez(ctx);
  804. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  805. return 0;
  806. }
  807. void rrdeng_prepare_exit(struct rrdengine_instance *ctx)
  808. {
  809. struct rrdeng_cmd cmd;
  810. if (NULL == ctx) {
  811. return;
  812. }
  813. completion_init(&ctx->rrdengine_completion);
  814. cmd.opcode = RRDENG_QUIESCE;
  815. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  816. /* wait for dbengine to quiesce */
  817. completion_wait_for(&ctx->rrdengine_completion);
  818. completion_destroy(&ctx->rrdengine_completion);
  819. //metalog_prepare_exit(ctx->metalog_ctx);
  820. }