rrdengineapi.c 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "rrdengine.h"
  3. /* Default global database instance */
  4. struct rrdengine_instance multidb_ctx_storage_tier0;
  5. struct rrdengine_instance multidb_ctx_storage_tier1;
  6. struct rrdengine_instance multidb_ctx_storage_tier2;
  7. struct rrdengine_instance multidb_ctx_storage_tier3;
  8. struct rrdengine_instance multidb_ctx_storage_tier4;
  9. #define mrg_metric_ctx(metric) (struct rrdengine_instance *)mrg_metric_section(main_mrg, metric)
  10. #if RRD_STORAGE_TIERS != 5
  11. #error RRD_STORAGE_TIERS is not 5 - you need to add allocations here
  12. #endif
  13. struct rrdengine_instance *multidb_ctx[RRD_STORAGE_TIERS];
  14. uint8_t tier_page_type[RRD_STORAGE_TIERS] = {PAGE_METRICS, PAGE_TIER, PAGE_TIER, PAGE_TIER, PAGE_TIER};
  15. #if defined(ENV32BIT)
  16. size_t tier_page_size[RRD_STORAGE_TIERS] = {2048, 1024, 192, 192, 192};
  17. #else
  18. size_t tier_page_size[RRD_STORAGE_TIERS] = {4096, 2048, 384, 384, 384};
  19. #endif
  20. #if PAGE_TYPE_MAX != 1
  21. #error PAGE_TYPE_MAX is not 1 - you need to add allocations here
  22. #endif
  23. size_t page_type_size[256] = {sizeof(storage_number), sizeof(storage_number_tier1_t)};
  24. __attribute__((constructor)) void initialize_multidb_ctx(void) {
  25. multidb_ctx[0] = &multidb_ctx_storage_tier0;
  26. multidb_ctx[1] = &multidb_ctx_storage_tier1;
  27. multidb_ctx[2] = &multidb_ctx_storage_tier2;
  28. multidb_ctx[3] = &multidb_ctx_storage_tier3;
  29. multidb_ctx[4] = &multidb_ctx_storage_tier4;
  30. }
  31. int db_engine_journal_check = 0;
  32. int default_rrdeng_disk_quota_mb = 256;
  33. int default_multidb_disk_quota_mb = 256;
  34. #if defined(ENV32BIT)
  35. int default_rrdeng_page_cache_mb = 16;
  36. int default_rrdeng_extent_cache_mb = 0;
  37. #else
  38. int default_rrdeng_page_cache_mb = 32;
  39. int default_rrdeng_extent_cache_mb = 0;
  40. #endif
  41. // ----------------------------------------------------------------------------
  42. // metrics groups
  43. static inline void rrdeng_page_alignment_acquire(struct pg_alignment *pa) {
  44. if(unlikely(!pa)) return;
  45. __atomic_add_fetch(&pa->refcount, 1, __ATOMIC_SEQ_CST);
  46. }
  47. static inline bool rrdeng_page_alignment_release(struct pg_alignment *pa) {
  48. if(unlikely(!pa)) return true;
  49. if(__atomic_sub_fetch(&pa->refcount, 1, __ATOMIC_SEQ_CST) == 0) {
  50. freez(pa);
  51. return true;
  52. }
  53. return false;
  54. }
  55. // charts call this
  56. STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *db_instance __maybe_unused, uuid_t *uuid __maybe_unused) {
  57. struct pg_alignment *pa = callocz(1, sizeof(struct pg_alignment));
  58. rrdeng_page_alignment_acquire(pa);
  59. return (STORAGE_METRICS_GROUP *)pa;
  60. }
  61. // charts call this
  62. void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance __maybe_unused, STORAGE_METRICS_GROUP *smg) {
  63. if(unlikely(!smg)) return;
  64. struct pg_alignment *pa = (struct pg_alignment *)smg;
  65. rrdeng_page_alignment_release(pa);
  66. }
  67. // ----------------------------------------------------------------------------
  68. // metric handle for legacy dbs
  69. /* This UUID is not unique across hosts */
  70. void rrdeng_generate_legacy_uuid(const char *dim_id, const char *chart_id, uuid_t *ret_uuid)
  71. {
  72. EVP_MD_CTX *evpctx;
  73. unsigned char hash_value[EVP_MAX_MD_SIZE];
  74. unsigned int hash_len;
  75. evpctx = EVP_MD_CTX_create();
  76. EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
  77. EVP_DigestUpdate(evpctx, dim_id, strlen(dim_id));
  78. EVP_DigestUpdate(evpctx, chart_id, strlen(chart_id));
  79. EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
  80. EVP_MD_CTX_destroy(evpctx);
  81. fatal_assert(hash_len > sizeof(uuid_t));
  82. memcpy(ret_uuid, hash_value, sizeof(uuid_t));
  83. }
  84. static METRIC *rrdeng_metric_get_legacy(STORAGE_INSTANCE *db_instance, const char *rd_id, const char *st_id) {
  85. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  86. uuid_t legacy_uuid;
  87. rrdeng_generate_legacy_uuid(rd_id, st_id, &legacy_uuid);
  88. return mrg_metric_get_and_acquire(main_mrg, &legacy_uuid, (Word_t) ctx);
  89. }
  90. // ----------------------------------------------------------------------------
  91. // metric handle
  92. void rrdeng_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle) {
  93. METRIC *metric = (METRIC *)db_metric_handle;
  94. mrg_metric_release(main_mrg, metric);
  95. }
  96. STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle) {
  97. METRIC *metric = (METRIC *)db_metric_handle;
  98. return (STORAGE_METRIC_HANDLE *) mrg_metric_dup(main_mrg, metric);
  99. }
  100. STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid) {
  101. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  102. return (STORAGE_METRIC_HANDLE *) mrg_metric_get_and_acquire(main_mrg, uuid, (Word_t) ctx);
  103. }
  104. static METRIC *rrdeng_metric_create(STORAGE_INSTANCE *db_instance, uuid_t *uuid) {
  105. internal_fatal(!db_instance, "DBENGINE: db_instance is NULL");
  106. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  107. MRG_ENTRY entry = {
  108. .uuid = uuid,
  109. .section = (Word_t)ctx,
  110. .first_time_s = 0,
  111. .last_time_s = 0,
  112. .latest_update_every_s = 0,
  113. };
  114. METRIC *metric = mrg_metric_add_and_acquire(main_mrg, entry, NULL);
  115. return metric;
  116. }
  117. STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance) {
  118. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  119. METRIC *metric;
  120. metric = mrg_metric_get_and_acquire(main_mrg, &rd->metric_uuid, (Word_t) ctx);
  121. if(unlikely(!metric)) {
  122. if(unlikely(ctx->config.legacy)) {
  123. // this is a single host database
  124. // generate uuid from the chart and dimensions ids
  125. // and overwrite the one supplied by rrddim
  126. metric = rrdeng_metric_get_legacy(db_instance, rrddim_id(rd), rrdset_id(rd->rrdset));
  127. if (metric)
  128. uuid_copy(rd->metric_uuid, *mrg_metric_uuid(main_mrg, metric));
  129. }
  130. if(likely(!metric))
  131. metric = rrdeng_metric_create(db_instance, &rd->metric_uuid);
  132. }
  133. #ifdef NETDATA_INTERNAL_CHECKS
  134. if(uuid_memcmp(&rd->metric_uuid, mrg_metric_uuid(main_mrg, metric)) != 0) {
  135. char uuid1[UUID_STR_LEN + 1];
  136. char uuid2[UUID_STR_LEN + 1];
  137. uuid_unparse(rd->metric_uuid, uuid1);
  138. uuid_unparse(*mrg_metric_uuid(main_mrg, metric), uuid2);
  139. fatal("DBENGINE: uuids do not match, asked for metric '%s', but got metric '%s'", uuid1, uuid2);
  140. }
  141. if(mrg_metric_ctx(metric) != ctx)
  142. fatal("DBENGINE: mixed up db instances, asked for metric from %p, got from %p",
  143. ctx, mrg_metric_ctx(metric));
  144. #endif
  145. return (STORAGE_METRIC_HANDLE *)metric;
  146. }
  147. // ----------------------------------------------------------------------------
  148. // collect ops
  149. static inline void check_and_fix_mrg_update_every(struct rrdeng_collect_handle *handle) {
  150. if(unlikely((time_t)(handle->update_every_ut / USEC_PER_SEC) != mrg_metric_get_update_every_s(main_mrg, handle->metric))) {
  151. internal_error(true, "DBENGINE: collection handle has update every %ld, but the metric registry has %ld. Fixing it.",
  152. (time_t)(handle->update_every_ut / USEC_PER_SEC), mrg_metric_get_update_every_s(main_mrg, handle->metric));
  153. if(unlikely(!handle->update_every_ut))
  154. handle->update_every_ut = (usec_t)mrg_metric_get_update_every_s(main_mrg, handle->metric) * USEC_PER_SEC;
  155. else
  156. mrg_metric_set_update_every(main_mrg, handle->metric, (time_t)(handle->update_every_ut / USEC_PER_SEC));
  157. }
  158. }
  159. static inline bool check_completed_page_consistency(struct rrdeng_collect_handle *handle __maybe_unused) {
  160. #ifdef NETDATA_INTERNAL_CHECKS
  161. if (unlikely(!handle->page || !handle->page_entries_max || !handle->page_position || !handle->page_end_time_ut))
  162. return false;
  163. struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
  164. uuid_t *uuid = mrg_metric_uuid(main_mrg, handle->metric);
  165. time_t start_time_s = pgc_page_start_time_s(handle->page);
  166. time_t end_time_s = pgc_page_end_time_s(handle->page);
  167. time_t update_every_s = pgc_page_update_every_s(handle->page);
  168. size_t page_length = handle->page_position * CTX_POINT_SIZE_BYTES(ctx);
  169. size_t entries = handle->page_position;
  170. time_t overwrite_zero_update_every_s = (time_t)(handle->update_every_ut / USEC_PER_SEC);
  171. if(end_time_s > max_acceptable_collected_time())
  172. handle->page_flags |= RRDENG_PAGE_COMPLETED_IN_FUTURE;
  173. VALIDATED_PAGE_DESCRIPTOR vd = validate_page(
  174. uuid,
  175. start_time_s,
  176. end_time_s,
  177. update_every_s,
  178. page_length,
  179. ctx->config.page_type,
  180. entries,
  181. 0, // do not check for future timestamps - we inherit the timestamps of the children
  182. overwrite_zero_update_every_s,
  183. false,
  184. "collected",
  185. handle->page_flags);
  186. return vd.is_valid;
  187. #else
  188. return true;
  189. #endif
  190. }
  191. /*
  192. * Gets a handle for storing metrics to the database.
  193. * The handle must be released with rrdeng_store_metric_final().
  194. */
  195. STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg) {
  196. METRIC *metric = (METRIC *)db_metric_handle;
  197. struct rrdengine_instance *ctx = mrg_metric_ctx(metric);
  198. bool is_1st_metric_writer = true;
  199. if(!mrg_metric_set_writer(main_mrg, metric)) {
  200. is_1st_metric_writer = false;
  201. char uuid[UUID_STR_LEN + 1];
  202. uuid_unparse(*mrg_metric_uuid(main_mrg, metric), uuid);
  203. netdata_log_error("DBENGINE: metric '%s' is already collected and should not be collected twice - expect gaps on the charts", uuid);
  204. }
  205. metric = mrg_metric_dup(main_mrg, metric);
  206. struct rrdeng_collect_handle *handle;
  207. handle = callocz(1, sizeof(struct rrdeng_collect_handle));
  208. handle->common.backend = STORAGE_ENGINE_BACKEND_DBENGINE;
  209. handle->metric = metric;
  210. handle->page = NULL;
  211. handle->data = NULL;
  212. handle->data_size = 0;
  213. handle->page_position = 0;
  214. handle->page_entries_max = 0;
  215. handle->update_every_ut = (usec_t)update_every * USEC_PER_SEC;
  216. handle->options = is_1st_metric_writer ? RRDENG_1ST_METRIC_WRITER : 0;
  217. __atomic_add_fetch(&ctx->atomic.collectors_running, 1, __ATOMIC_RELAXED);
  218. if(!is_1st_metric_writer)
  219. __atomic_add_fetch(&ctx->atomic.collectors_running_duplicate, 1, __ATOMIC_RELAXED);
  220. mrg_metric_set_update_every(main_mrg, metric, update_every);
  221. handle->alignment = (struct pg_alignment *)smg;
  222. rrdeng_page_alignment_acquire(handle->alignment);
  223. // this is important!
  224. // if we don't set the page_end_time_ut during the first collection
  225. // data collection may be able to go back in time and during the addition of new pages
  226. // clean pages may be found matching ours!
  227. time_t db_first_time_s, db_last_time_s, db_update_every_s;
  228. mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s);
  229. handle->page_end_time_ut = (usec_t)db_last_time_s * USEC_PER_SEC;
  230. return (STORAGE_COLLECT_HANDLE *)handle;
  231. }
  232. /* The page must be populated and referenced */
  233. static bool page_has_only_empty_metrics(struct rrdeng_collect_handle *handle) {
  234. switch(handle->type) {
  235. case PAGE_METRICS: {
  236. size_t slots = handle->page_position;
  237. storage_number *array = (storage_number *)pgc_page_data(handle->page);
  238. for (size_t i = 0 ; i < slots; ++i) {
  239. if(does_storage_number_exist(array[i]))
  240. return false;
  241. }
  242. }
  243. break;
  244. case PAGE_TIER: {
  245. size_t slots = handle->page_position;
  246. storage_number_tier1_t *array = (storage_number_tier1_t *)pgc_page_data(handle->page);
  247. for (size_t i = 0 ; i < slots; ++i) {
  248. if(fpclassify(array[i].sum_value) != FP_NAN)
  249. return false;
  250. }
  251. }
  252. break;
  253. default: {
  254. static bool logged = false;
  255. if(!logged) {
  256. netdata_log_error("DBENGINE: cannot check page for nulls on unknown page type id %d", (mrg_metric_ctx(handle->metric))->config.page_type);
  257. logged = true;
  258. }
  259. return false;
  260. }
  261. }
  262. return true;
  263. }
  264. void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle) {
  265. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  266. if (unlikely(!handle->page))
  267. return;
  268. if(!handle->page_position || page_has_only_empty_metrics(handle))
  269. pgc_page_to_clean_evict_or_release(main_cache, handle->page);
  270. else {
  271. check_completed_page_consistency(handle);
  272. mrg_metric_set_clean_latest_time_s(main_mrg, handle->metric, pgc_page_end_time_s(handle->page));
  273. pgc_page_hot_to_dirty_and_release(main_cache, handle->page);
  274. }
  275. mrg_metric_set_hot_latest_time_s(main_mrg, handle->metric, 0);
  276. handle->page = NULL;
  277. handle->page_flags = 0;
  278. handle->page_position = 0;
  279. handle->page_entries_max = 0;
  280. handle->data = NULL;
  281. handle->data_size = 0;
  282. // important!
  283. // we should never zero page end time ut, because this will allow
  284. // collection to go back in time
  285. // handle->page_end_time_ut = 0;
  286. // handle->page_start_time_ut;
  287. check_and_fix_mrg_update_every(handle);
  288. timing_step(TIMING_STEP_DBENGINE_FLUSH_PAGE);
  289. }
  290. static void rrdeng_store_metric_create_new_page(struct rrdeng_collect_handle *handle,
  291. struct rrdengine_instance *ctx,
  292. usec_t point_in_time_ut,
  293. void *data,
  294. size_t data_size) {
  295. time_t point_in_time_s = (time_t)(point_in_time_ut / USEC_PER_SEC);
  296. const time_t update_every_s = (time_t)(handle->update_every_ut / USEC_PER_SEC);
  297. PGC_ENTRY page_entry = {
  298. .section = (Word_t) ctx,
  299. .metric_id = mrg_metric_id(main_mrg, handle->metric),
  300. .start_time_s = point_in_time_s,
  301. .end_time_s = point_in_time_s,
  302. .size = data_size,
  303. .data = data,
  304. .update_every_s = (uint32_t) update_every_s,
  305. .hot = true
  306. };
  307. size_t conflicts = 0;
  308. bool added = true;
  309. PGC_PAGE *page = pgc_page_add_and_acquire(main_cache, page_entry, &added);
  310. while (unlikely(!added)) {
  311. conflicts++;
  312. char uuid[UUID_STR_LEN + 1];
  313. uuid_unparse(*mrg_metric_uuid(main_mrg, handle->metric), uuid);
  314. #ifdef NETDATA_INTERNAL_CHECKS
  315. internal_error(true,
  316. #else
  317. error_limit_static_global_var(erl, 1, 0);
  318. error_limit(&erl,
  319. #endif
  320. "DBENGINE: metric '%s' new page from %ld to %ld, update every %ld, has a conflict in main cache "
  321. "with existing %s%s page from %ld to %ld, update every %ld - "
  322. "is it collected more than once?",
  323. uuid,
  324. page_entry.start_time_s, page_entry.end_time_s, (time_t)page_entry.update_every_s,
  325. pgc_is_page_hot(page) ? "hot" : "not-hot",
  326. pgc_page_data(page) == DBENGINE_EMPTY_PAGE ? " gap" : "",
  327. pgc_page_start_time_s(page), pgc_page_end_time_s(page), pgc_page_update_every_s(page)
  328. );
  329. pgc_page_release(main_cache, page);
  330. point_in_time_ut -= handle->update_every_ut;
  331. point_in_time_s = (time_t)(point_in_time_ut / USEC_PER_SEC);
  332. page_entry.start_time_s = point_in_time_s;
  333. page_entry.end_time_s = point_in_time_s;
  334. page = pgc_page_add_and_acquire(main_cache, page_entry, &added);
  335. }
  336. handle->page_entries_max = data_size / CTX_POINT_SIZE_BYTES(ctx);
  337. handle->page_start_time_ut = point_in_time_ut;
  338. handle->page_end_time_ut = point_in_time_ut;
  339. handle->page_position = 1; // zero is already in our data
  340. handle->page = page;
  341. handle->page_flags = conflicts? RRDENG_PAGE_CONFLICT : 0;
  342. if(point_in_time_s > max_acceptable_collected_time())
  343. handle->page_flags |= RRDENG_PAGE_CREATED_IN_FUTURE;
  344. check_and_fix_mrg_update_every(handle);
  345. timing_step(TIMING_STEP_DBENGINE_CREATE_NEW_PAGE);
  346. }
  347. static size_t aligned_allocation_entries(size_t max_slots, size_t target_slot, time_t now_s) {
  348. size_t slots = target_slot;
  349. size_t pos = (now_s % max_slots);
  350. if(pos > slots)
  351. slots += max_slots - pos;
  352. else if(pos < slots)
  353. slots -= pos;
  354. else
  355. slots = max_slots;
  356. return slots;
  357. }
  358. static void *rrdeng_alloc_new_metric_data(struct rrdeng_collect_handle *handle, size_t *data_size, usec_t point_in_time_ut) {
  359. struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
  360. size_t max_size = tier_page_size[ctx->config.tier];
  361. size_t max_slots = max_size / CTX_POINT_SIZE_BYTES(ctx);
  362. size_t slots = aligned_allocation_entries(
  363. max_slots,
  364. indexing_partition((Word_t) handle->alignment, max_slots),
  365. (time_t) (point_in_time_ut / USEC_PER_SEC)
  366. );
  367. if(slots < max_slots / 3)
  368. slots = max_slots / 3;
  369. if(slots < 3)
  370. slots = 3;
  371. size_t size = slots * CTX_POINT_SIZE_BYTES(ctx);
  372. // internal_error(true, "PAGE ALLOC %zu bytes (%zu max)", size, max_size);
  373. internal_fatal(slots < 3 || slots > max_slots, "ooops! wrong distribution of metrics across time");
  374. internal_fatal(size > tier_page_size[ctx->config.tier] || size < CTX_POINT_SIZE_BYTES(ctx) * 2, "ooops! wrong page size");
  375. *data_size = size;
  376. void *d = dbengine_page_alloc(size);
  377. timing_step(TIMING_STEP_DBENGINE_PAGE_ALLOC);
  378. return d;
  379. }
  380. static void rrdeng_store_metric_append_point(STORAGE_COLLECT_HANDLE *collection_handle,
  381. const usec_t point_in_time_ut,
  382. const NETDATA_DOUBLE n,
  383. const NETDATA_DOUBLE min_value,
  384. const NETDATA_DOUBLE max_value,
  385. const uint16_t count,
  386. const uint16_t anomaly_count,
  387. const SN_FLAGS flags)
  388. {
  389. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  390. struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
  391. if(unlikely(!handle->data))
  392. handle->data = rrdeng_alloc_new_metric_data(handle, &handle->data_size, point_in_time_ut);
  393. timing_step(TIMING_STEP_DBENGINE_CHECK_DATA);
  394. if(likely(ctx->config.page_type == PAGE_METRICS)) {
  395. storage_number *tier0_metric_data = handle->data;
  396. tier0_metric_data[handle->page_position] = pack_storage_number(n, flags);
  397. }
  398. else if(likely(ctx->config.page_type == PAGE_TIER)) {
  399. storage_number_tier1_t *tier12_metric_data = handle->data;
  400. storage_number_tier1_t number_tier1;
  401. number_tier1.sum_value = (float) n;
  402. number_tier1.min_value = (float) min_value;
  403. number_tier1.max_value = (float) max_value;
  404. number_tier1.anomaly_count = anomaly_count;
  405. number_tier1.count = count;
  406. tier12_metric_data[handle->page_position] = number_tier1;
  407. }
  408. else
  409. fatal("DBENGINE: cannot store metric on unknown page type id %d", ctx->config.page_type);
  410. timing_step(TIMING_STEP_DBENGINE_PACK);
  411. if(unlikely(!handle->page)){
  412. rrdeng_store_metric_create_new_page(handle, ctx, point_in_time_ut, handle->data, handle->data_size);
  413. // handle->position is set to 1 already
  414. }
  415. else {
  416. // update an existing page
  417. pgc_page_hot_set_end_time_s(main_cache, handle->page, (time_t) (point_in_time_ut / USEC_PER_SEC));
  418. handle->page_end_time_ut = point_in_time_ut;
  419. if(unlikely(++handle->page_position >= handle->page_entries_max)) {
  420. internal_fatal(handle->page_position > handle->page_entries_max, "DBENGINE: exceeded page max number of points");
  421. handle->page_flags |= RRDENG_PAGE_FULL;
  422. rrdeng_store_metric_flush_current_page(collection_handle);
  423. }
  424. }
  425. timing_step(TIMING_STEP_DBENGINE_PAGE_FIN);
  426. // update the metric information
  427. mrg_metric_set_hot_latest_time_s(main_mrg, handle->metric, (time_t) (point_in_time_ut / USEC_PER_SEC));
  428. timing_step(TIMING_STEP_DBENGINE_MRG_UPDATE);
  429. }
  430. static void store_metric_next_error_log(struct rrdeng_collect_handle *handle __maybe_unused, usec_t point_in_time_ut __maybe_unused, const char *msg __maybe_unused) {
  431. #ifdef NETDATA_INTERNAL_CHECKS
  432. time_t point_in_time_s = (time_t)(point_in_time_ut / USEC_PER_SEC);
  433. char uuid[UUID_STR_LEN + 1];
  434. uuid_unparse(*mrg_metric_uuid(main_mrg, handle->metric), uuid);
  435. BUFFER *wb = NULL;
  436. if(handle->page && handle->page_flags) {
  437. wb = buffer_create(0, NULL);
  438. collect_page_flags_to_buffer(wb, handle->page_flags);
  439. }
  440. error_limit_static_global_var(erl, 1, 0);
  441. error_limit(&erl,
  442. "DBENGINE: metric '%s' collected point at %ld, %s last collection at %ld, "
  443. "update every %ld, %s page from %ld to %ld, position %u (of %u), flags: %s",
  444. uuid,
  445. point_in_time_s,
  446. msg,
  447. (time_t)(handle->page_end_time_ut / USEC_PER_SEC),
  448. (time_t)(handle->update_every_ut / USEC_PER_SEC),
  449. handle->page ? "current" : "*LAST*",
  450. (time_t)(handle->page_start_time_ut / USEC_PER_SEC),
  451. (time_t)(handle->page_end_time_ut / USEC_PER_SEC),
  452. handle->page_position, handle->page_entries_max,
  453. wb ? buffer_tostring(wb) : ""
  454. );
  455. buffer_free(wb);
  456. #else
  457. ;
  458. #endif
  459. }
  460. void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle,
  461. const usec_t point_in_time_ut,
  462. const NETDATA_DOUBLE n,
  463. const NETDATA_DOUBLE min_value,
  464. const NETDATA_DOUBLE max_value,
  465. const uint16_t count,
  466. const uint16_t anomaly_count,
  467. const SN_FLAGS flags)
  468. {
  469. timing_step(TIMING_STEP_RRDSET_STORE_METRIC);
  470. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  471. #ifdef NETDATA_INTERNAL_CHECKS
  472. if(unlikely(point_in_time_ut > (usec_t)max_acceptable_collected_time() * USEC_PER_SEC))
  473. handle->page_flags |= RRDENG_PAGE_FUTURE_POINT;
  474. #endif
  475. usec_t delta_ut = point_in_time_ut - handle->page_end_time_ut;
  476. if(likely(delta_ut == handle->update_every_ut)) {
  477. // happy path
  478. ;
  479. }
  480. else if(unlikely(point_in_time_ut > handle->page_end_time_ut)) {
  481. if(handle->page) {
  482. if (unlikely(delta_ut < handle->update_every_ut)) {
  483. handle->page_flags |= RRDENG_PAGE_STEP_TOO_SMALL;
  484. rrdeng_store_metric_flush_current_page(collection_handle);
  485. }
  486. else if (unlikely(delta_ut % handle->update_every_ut)) {
  487. handle->page_flags |= RRDENG_PAGE_STEP_UNALIGNED;
  488. rrdeng_store_metric_flush_current_page(collection_handle);
  489. }
  490. else {
  491. size_t points_gap = delta_ut / handle->update_every_ut;
  492. size_t page_remaining_points = handle->page_entries_max - handle->page_position;
  493. if (points_gap >= page_remaining_points) {
  494. handle->page_flags |= RRDENG_PAGE_BIG_GAP;
  495. rrdeng_store_metric_flush_current_page(collection_handle);
  496. }
  497. else {
  498. // loop to fill the gap
  499. handle->page_flags |= RRDENG_PAGE_GAP;
  500. usec_t stop_ut = point_in_time_ut - handle->update_every_ut;
  501. for (usec_t this_ut = handle->page_end_time_ut + handle->update_every_ut;
  502. this_ut <= stop_ut;
  503. this_ut = handle->page_end_time_ut + handle->update_every_ut) {
  504. rrdeng_store_metric_append_point(
  505. collection_handle,
  506. this_ut,
  507. NAN, NAN, NAN,
  508. 1, 0,
  509. SN_EMPTY_SLOT);
  510. }
  511. }
  512. }
  513. }
  514. }
  515. else if(unlikely(point_in_time_ut < handle->page_end_time_ut)) {
  516. handle->page_flags |= RRDENG_PAGE_PAST_COLLECTION;
  517. store_metric_next_error_log(handle, point_in_time_ut, "is older than the");
  518. return;
  519. }
  520. else /* if(unlikely(point_in_time_ut == handle->page_end_time_ut)) */ {
  521. handle->page_flags |= RRDENG_PAGE_REPEATED_COLLECTION;
  522. store_metric_next_error_log(handle, point_in_time_ut, "is at the same time as the");
  523. return;
  524. }
  525. timing_step(TIMING_STEP_DBENGINE_FIRST_CHECK);
  526. rrdeng_store_metric_append_point(collection_handle,
  527. point_in_time_ut,
  528. n, min_value, max_value,
  529. count, anomaly_count,
  530. flags);
  531. }
  532. /*
  533. * Releases the database reference from the handle for storing metrics.
  534. * Returns 1 if it's safe to delete the dimension.
  535. */
  536. int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) {
  537. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  538. struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
  539. handle->page_flags |= RRDENG_PAGE_COLLECT_FINALIZE;
  540. rrdeng_store_metric_flush_current_page(collection_handle);
  541. rrdeng_page_alignment_release(handle->alignment);
  542. __atomic_sub_fetch(&ctx->atomic.collectors_running, 1, __ATOMIC_RELAXED);
  543. if(!(handle->options & RRDENG_1ST_METRIC_WRITER))
  544. __atomic_sub_fetch(&ctx->atomic.collectors_running_duplicate, 1, __ATOMIC_RELAXED);
  545. if((handle->options & RRDENG_1ST_METRIC_WRITER) && !mrg_metric_clear_writer(main_mrg, handle->metric))
  546. internal_fatal(true, "DBENGINE: metric is already released");
  547. time_t first_time_s, last_time_s, update_every_s;
  548. mrg_metric_get_retention(main_mrg, handle->metric, &first_time_s, &last_time_s, &update_every_s);
  549. mrg_metric_release(main_mrg, handle->metric);
  550. freez(handle);
  551. if(!first_time_s && !last_time_s)
  552. return 1;
  553. return 0;
  554. }
  555. void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every) {
  556. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  557. check_and_fix_mrg_update_every(handle);
  558. METRIC *metric = handle->metric;
  559. usec_t update_every_ut = (usec_t)update_every * USEC_PER_SEC;
  560. if(update_every_ut == handle->update_every_ut)
  561. return;
  562. handle->page_flags |= RRDENG_PAGE_UPDATE_EVERY_CHANGE;
  563. rrdeng_store_metric_flush_current_page(collection_handle);
  564. mrg_metric_set_update_every(main_mrg, metric, update_every);
  565. handle->update_every_ut = update_every_ut;
  566. }
  567. // ----------------------------------------------------------------------------
  568. // query ops
  569. #ifdef NETDATA_INTERNAL_CHECKS
  570. SPINLOCK global_query_handle_spinlock = NETDATA_SPINLOCK_INITIALIZER;
  571. static struct rrdeng_query_handle *global_query_handle_ll = NULL;
  572. static void register_query_handle(struct rrdeng_query_handle *handle) {
  573. handle->query_pid = gettid();
  574. handle->started_time_s = now_realtime_sec();
  575. spinlock_lock(&global_query_handle_spinlock);
  576. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(global_query_handle_ll, handle, prev, next);
  577. spinlock_unlock(&global_query_handle_spinlock);
  578. }
  579. static void unregister_query_handle(struct rrdeng_query_handle *handle) {
  580. spinlock_lock(&global_query_handle_spinlock);
  581. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(global_query_handle_ll, handle, prev, next);
  582. spinlock_unlock(&global_query_handle_spinlock);
  583. }
  584. #else
  585. static void register_query_handle(struct rrdeng_query_handle *handle __maybe_unused) {
  586. ;
  587. }
  588. static void unregister_query_handle(struct rrdeng_query_handle *handle __maybe_unused) {
  589. ;
  590. }
  591. #endif
  592. /*
  593. * Gets a handle for loading metrics from the database.
  594. * The handle must be released with rrdeng_load_metric_final().
  595. */
  596. void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle,
  597. struct storage_engine_query_handle *rrddim_handle,
  598. time_t start_time_s,
  599. time_t end_time_s,
  600. STORAGE_PRIORITY priority)
  601. {
  602. usec_t started_ut = now_monotonic_usec();
  603. netdata_thread_disable_cancelability();
  604. METRIC *metric = (METRIC *)db_metric_handle;
  605. struct rrdengine_instance *ctx = mrg_metric_ctx(metric);
  606. struct rrdeng_query_handle *handle;
  607. handle = rrdeng_query_handle_get();
  608. register_query_handle(handle);
  609. if (unlikely(priority < STORAGE_PRIORITY_HIGH))
  610. priority = STORAGE_PRIORITY_HIGH;
  611. else if (unlikely(priority >= STORAGE_PRIORITY_INTERNAL_MAX_DONT_USE))
  612. priority = STORAGE_PRIORITY_INTERNAL_MAX_DONT_USE - 1;
  613. handle->ctx = ctx;
  614. handle->metric = metric;
  615. handle->priority = priority;
  616. // IMPORTANT!
  617. // It is crucial not to exceed the db boundaries, because dbengine
  618. // now has gap caching, so when a gap is detected a negative page
  619. // is inserted into the main cache, to avoid scanning the journals
  620. // again for pages matching the gap.
  621. time_t db_first_time_s, db_last_time_s, db_update_every_s;
  622. mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s);
  623. if(is_page_in_time_range(start_time_s, end_time_s, db_first_time_s, db_last_time_s) == PAGE_IS_IN_RANGE) {
  624. handle->start_time_s = MAX(start_time_s, db_first_time_s);
  625. handle->end_time_s = MIN(end_time_s, db_last_time_s);
  626. handle->now_s = handle->start_time_s;
  627. handle->dt_s = db_update_every_s;
  628. if (!handle->dt_s) {
  629. handle->dt_s = default_rrd_update_every;
  630. mrg_metric_set_update_every_s_if_zero(main_mrg, metric, default_rrd_update_every);
  631. }
  632. rrddim_handle->handle = (STORAGE_QUERY_HANDLE *) handle;
  633. rrddim_handle->start_time_s = handle->start_time_s;
  634. rrddim_handle->end_time_s = handle->end_time_s;
  635. rrddim_handle->priority = priority;
  636. rrddim_handle->backend = STORAGE_ENGINE_BACKEND_DBENGINE;
  637. pg_cache_preload(handle);
  638. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.query_time_init, now_monotonic_usec() - started_ut, __ATOMIC_RELAXED);
  639. }
  640. else {
  641. handle->start_time_s = start_time_s;
  642. handle->end_time_s = end_time_s;
  643. handle->now_s = start_time_s;
  644. handle->dt_s = db_update_every_s;
  645. rrddim_handle->handle = (STORAGE_QUERY_HANDLE *) handle;
  646. rrddim_handle->start_time_s = handle->start_time_s;
  647. rrddim_handle->end_time_s = 0;
  648. rrddim_handle->priority = priority;
  649. rrddim_handle->backend = STORAGE_ENGINE_BACKEND_DBENGINE;
  650. }
  651. }
  652. static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_handle, bool debug_this __maybe_unused) {
  653. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
  654. struct rrdengine_instance *ctx = handle->ctx;
  655. if (likely(handle->page)) {
  656. // we have a page to release
  657. pgc_page_release(main_cache, handle->page);
  658. handle->page = NULL;
  659. }
  660. if (unlikely(handle->now_s > rrddim_handle->end_time_s))
  661. return false;
  662. size_t entries = 0;
  663. handle->page = pg_cache_lookup_next(ctx, handle->pdc, handle->now_s, handle->dt_s, &entries);
  664. internal_fatal(handle->page && (pgc_page_data(handle->page) == DBENGINE_EMPTY_PAGE || !entries),
  665. "A page was returned, but it is empty - pg_cache_lookup_next() should be handling this case");
  666. if (unlikely(!handle->page || pgc_page_data(handle->page) == DBENGINE_EMPTY_PAGE || !entries))
  667. return false;
  668. time_t page_start_time_s = pgc_page_start_time_s(handle->page);
  669. time_t page_end_time_s = pgc_page_end_time_s(handle->page);
  670. time_t page_update_every_s = pgc_page_update_every_s(handle->page);
  671. unsigned position;
  672. if(likely(handle->now_s >= page_start_time_s && handle->now_s <= page_end_time_s)) {
  673. if(unlikely(entries == 1 || page_start_time_s == page_end_time_s || !page_update_every_s)) {
  674. position = 0;
  675. handle->now_s = page_start_time_s;
  676. }
  677. else {
  678. position = (handle->now_s - page_start_time_s) * (entries - 1) / (page_end_time_s - page_start_time_s);
  679. time_t point_end_time_s = page_start_time_s + position * page_update_every_s;
  680. while(point_end_time_s < handle->now_s && position + 1 < entries) {
  681. // https://github.com/netdata/netdata/issues/14411
  682. // we really need a while() here, because the delta may be
  683. // 2 points at higher tiers
  684. position++;
  685. point_end_time_s = page_start_time_s + position * page_update_every_s;
  686. }
  687. handle->now_s = point_end_time_s;
  688. }
  689. internal_fatal(position >= entries, "DBENGINE: wrong page position calculation");
  690. }
  691. else if(handle->now_s < page_start_time_s) {
  692. handle->now_s = page_start_time_s;
  693. position = 0;
  694. }
  695. else {
  696. internal_fatal(true, "DBENGINE: this page is entirely in our past and should not be accepted for this query in the first place");
  697. handle->now_s = page_end_time_s;
  698. position = entries - 1;
  699. }
  700. handle->entries = entries;
  701. handle->position = position;
  702. handle->metric_data = pgc_page_data((PGC_PAGE *)handle->page);
  703. handle->dt_s = page_update_every_s;
  704. return true;
  705. }
  706. // Returns the metric and sets its timestamp into current_time
  707. // IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags)
  708. // IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES
  709. STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim_handle) {
  710. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
  711. STORAGE_POINT sp;
  712. if (unlikely(handle->now_s > rrddim_handle->end_time_s)) {
  713. storage_point_empty(sp, handle->now_s - handle->dt_s, handle->now_s);
  714. goto prepare_for_next_iteration;
  715. }
  716. if (unlikely(!handle->page || handle->position >= handle->entries)) {
  717. // We need to get a new page
  718. if (!rrdeng_load_page_next(rrddim_handle, false)) {
  719. handle->now_s = rrddim_handle->end_time_s;
  720. storage_point_empty(sp, handle->now_s - handle->dt_s, handle->now_s);
  721. goto prepare_for_next_iteration;
  722. }
  723. }
  724. sp.start_time_s = handle->now_s - handle->dt_s;
  725. sp.end_time_s = handle->now_s;
  726. switch(handle->ctx->config.page_type) {
  727. case PAGE_METRICS: {
  728. storage_number n = handle->metric_data[handle->position];
  729. sp.min = sp.max = sp.sum = unpack_storage_number(n);
  730. sp.flags = n & SN_USER_FLAGS;
  731. sp.count = 1;
  732. sp.anomaly_count = is_storage_number_anomalous(n) ? 1 : 0;
  733. }
  734. break;
  735. case PAGE_TIER: {
  736. storage_number_tier1_t tier1_value = ((storage_number_tier1_t *)handle->metric_data)[handle->position];
  737. sp.flags = tier1_value.anomaly_count ? SN_FLAG_NONE : SN_FLAG_NOT_ANOMALOUS;
  738. sp.count = tier1_value.count;
  739. sp.anomaly_count = tier1_value.anomaly_count;
  740. sp.min = tier1_value.min_value;
  741. sp.max = tier1_value.max_value;
  742. sp.sum = tier1_value.sum_value;
  743. }
  744. break;
  745. // we don't know this page type
  746. default: {
  747. static bool logged = false;
  748. if(!logged) {
  749. netdata_log_error("DBENGINE: unknown page type %d found. Cannot decode it. Ignoring its metrics.", handle->ctx->config.page_type);
  750. logged = true;
  751. }
  752. storage_point_empty(sp, sp.start_time_s, sp.end_time_s);
  753. }
  754. break;
  755. }
  756. prepare_for_next_iteration:
  757. internal_fatal(sp.end_time_s < rrddim_handle->start_time_s, "DBENGINE: this point is too old for this query");
  758. internal_fatal(sp.end_time_s < handle->now_s, "DBENGINE: this point is too old for this point in time");
  759. handle->now_s += handle->dt_s;
  760. handle->position++;
  761. return sp;
  762. }
  763. int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *rrddim_handle) {
  764. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
  765. return (handle->now_s > rrddim_handle->end_time_s);
  766. }
  767. /*
  768. * Releases the database reference from the handle for loading metrics.
  769. */
  770. void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrddim_handle)
  771. {
  772. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
  773. if (handle->page)
  774. pgc_page_release(main_cache, handle->page);
  775. if(!pdc_release_and_destroy_if_unreferenced(handle->pdc, false, false))
  776. __atomic_store_n(&handle->pdc->workers_should_stop, true, __ATOMIC_RELAXED);
  777. unregister_query_handle(handle);
  778. rrdeng_query_handle_release(handle);
  779. rrddim_handle->handle = NULL;
  780. netdata_thread_enable_cancelability();
  781. }
  782. time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *rrddim_handle) {
  783. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
  784. if(handle->pdc) {
  785. rrdeng_prep_wait(handle->pdc);
  786. if (handle->pdc->optimal_end_time_s > rrddim_handle->end_time_s)
  787. rrddim_handle->end_time_s = handle->pdc->optimal_end_time_s;
  788. }
  789. return rrddim_handle->end_time_s;
  790. }
  791. time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
  792. METRIC *metric = (METRIC *)db_metric_handle;
  793. time_t latest_time_s = 0;
  794. if (metric)
  795. latest_time_s = mrg_metric_get_latest_time_s(main_mrg, metric);
  796. return latest_time_s;
  797. }
  798. time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
  799. METRIC *metric = (METRIC *)db_metric_handle;
  800. time_t oldest_time_s = 0;
  801. if (metric)
  802. oldest_time_s = mrg_metric_get_first_time_s(main_mrg, metric);
  803. return oldest_time_s;
  804. }
  805. bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *db_instance, uuid_t *dim_uuid, time_t *first_entry_s, time_t *last_entry_s)
  806. {
  807. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  808. if (unlikely(!ctx)) {
  809. netdata_log_error("DBENGINE: invalid STORAGE INSTANCE to %s()", __FUNCTION__);
  810. return false;
  811. }
  812. METRIC *metric = mrg_metric_get_and_acquire(main_mrg, dim_uuid, (Word_t) ctx);
  813. if (unlikely(!metric))
  814. return false;
  815. time_t update_every_s;
  816. mrg_metric_get_retention(main_mrg, metric, first_entry_s, last_entry_s, &update_every_s);
  817. mrg_metric_release(main_mrg, metric);
  818. return true;
  819. }
  820. uint64_t rrdeng_disk_space_max(STORAGE_INSTANCE *db_instance) {
  821. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  822. return ctx->config.max_disk_space;
  823. }
  824. uint64_t rrdeng_disk_space_used(STORAGE_INSTANCE *db_instance) {
  825. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  826. return __atomic_load_n(&ctx->atomic.current_disk_space, __ATOMIC_RELAXED);
  827. }
  828. time_t rrdeng_global_first_time_s(STORAGE_INSTANCE *db_instance) {
  829. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  830. time_t t = __atomic_load_n(&ctx->atomic.first_time_s, __ATOMIC_RELAXED);
  831. if(t == LONG_MAX || t < 0)
  832. t = 0;
  833. return t;
  834. }
  835. size_t rrdeng_currently_collected_metrics(STORAGE_INSTANCE *db_instance) {
  836. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  837. return __atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED);
  838. }
  839. /*
  840. * Gathers Database Engine statistics.
  841. * Careful when modifying this function.
  842. * You must not change the indices of the statistics or user code will break.
  843. * You must not exceed RRDENG_NR_STATS or it will crash.
  844. */
  845. void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array)
  846. {
  847. if (ctx == NULL)
  848. return;
  849. array[0] = (uint64_t)__atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED); // API producers
  850. array[1] = (uint64_t)__atomic_load_n(&ctx->atomic.inflight_queries, __ATOMIC_RELAXED); // API consumers
  851. array[2] = 0;
  852. array[3] = 0;
  853. array[4] = 0;
  854. array[5] = 0; // (uint64_t)ctx->stats.pg_cache_insertions;
  855. array[6] = 0; // (uint64_t)ctx->stats.pg_cache_deletions;
  856. array[7] = 0; // (uint64_t)ctx->stats.pg_cache_hits;
  857. array[8] = 0; // (uint64_t)ctx->stats.pg_cache_misses;
  858. array[9] = 0; // (uint64_t)ctx->stats.pg_cache_backfills;
  859. array[10] = 0; // (uint64_t)ctx->stats.pg_cache_evictions;
  860. array[11] = (uint64_t)__atomic_load_n(&ctx->stats.before_compress_bytes, __ATOMIC_RELAXED); // used
  861. array[12] = (uint64_t)__atomic_load_n(&ctx->stats.after_compress_bytes, __ATOMIC_RELAXED); // used
  862. array[13] = (uint64_t)__atomic_load_n(&ctx->stats.before_decompress_bytes, __ATOMIC_RELAXED);
  863. array[14] = (uint64_t)__atomic_load_n(&ctx->stats.after_decompress_bytes, __ATOMIC_RELAXED);
  864. array[15] = (uint64_t)__atomic_load_n(&ctx->stats.io_write_bytes, __ATOMIC_RELAXED); // used
  865. array[16] = (uint64_t)__atomic_load_n(&ctx->stats.io_write_requests, __ATOMIC_RELAXED); // used
  866. array[17] = (uint64_t)__atomic_load_n(&ctx->stats.io_read_bytes, __ATOMIC_RELAXED);
  867. array[18] = (uint64_t)__atomic_load_n(&ctx->stats.io_read_requests, __ATOMIC_RELAXED); // used
  868. array[19] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.io_write_extent_bytes, __ATOMIC_RELAXED);
  869. array[20] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.io_write_extents, __ATOMIC_RELAXED);
  870. array[21] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.io_read_extent_bytes, __ATOMIC_RELAXED);
  871. array[22] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.io_read_extents, __ATOMIC_RELAXED);
  872. array[23] = (uint64_t)__atomic_load_n(&ctx->stats.datafile_creations, __ATOMIC_RELAXED);
  873. array[24] = (uint64_t)__atomic_load_n(&ctx->stats.datafile_deletions, __ATOMIC_RELAXED);
  874. array[25] = (uint64_t)__atomic_load_n(&ctx->stats.journalfile_creations, __ATOMIC_RELAXED);
  875. array[26] = (uint64_t)__atomic_load_n(&ctx->stats.journalfile_deletions, __ATOMIC_RELAXED);
  876. array[27] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.page_cache_descriptors, __ATOMIC_RELAXED);
  877. array[28] = (uint64_t)__atomic_load_n(&ctx->stats.io_errors, __ATOMIC_RELAXED);
  878. array[29] = (uint64_t)__atomic_load_n(&ctx->stats.fs_errors, __ATOMIC_RELAXED);
  879. array[30] = (uint64_t)__atomic_load_n(&global_io_errors, __ATOMIC_RELAXED); // used
  880. array[31] = (uint64_t)__atomic_load_n(&global_fs_errors, __ATOMIC_RELAXED); // used
  881. array[32] = (uint64_t)__atomic_load_n(&rrdeng_reserved_file_descriptors, __ATOMIC_RELAXED); // used
  882. array[33] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.pg_cache_over_half_dirty_events, __ATOMIC_RELAXED);
  883. array[34] = (uint64_t)__atomic_load_n(&global_pg_cache_over_half_dirty_events, __ATOMIC_RELAXED); // used
  884. array[35] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.flushing_pressure_page_deletions, __ATOMIC_RELAXED);
  885. array[36] = (uint64_t)__atomic_load_n(&global_flushing_pressure_page_deletions, __ATOMIC_RELAXED); // used
  886. array[37] = 0; //(uint64_t)pg_cache->active_descriptors;
  887. fatal_assert(RRDENG_NR_STATS == 38);
  888. }
  889. static void rrdeng_populate_mrg(struct rrdengine_instance *ctx) {
  890. uv_rwlock_rdlock(&ctx->datafiles.rwlock);
  891. size_t datafiles = 0;
  892. for(struct rrdengine_datafile *df = ctx->datafiles.first; df ;df = df->next)
  893. datafiles++;
  894. uv_rwlock_rdunlock(&ctx->datafiles.rwlock);
  895. ssize_t cpus = (ssize_t)get_netdata_cpus() / (ssize_t)storage_tiers;
  896. if(cpus > (ssize_t)datafiles)
  897. cpus = (ssize_t)datafiles;
  898. if(cpus > (ssize_t)libuv_worker_threads)
  899. cpus = (ssize_t)libuv_worker_threads;
  900. if(cpus >= (ssize_t)get_netdata_cpus() / 2)
  901. cpus = (ssize_t)(get_netdata_cpus() / 2 - 1);
  902. if(cpus < 1)
  903. cpus = 1;
  904. netdata_log_info("DBENGINE: populating retention to MRG from %zu journal files of tier %d, using %zd threads...", datafiles, ctx->config.tier, cpus);
  905. if(datafiles > 2) {
  906. struct rrdengine_datafile *datafile;
  907. datafile = ctx->datafiles.first->prev;
  908. if(!(datafile->journalfile->v2.flags & JOURNALFILE_FLAG_IS_AVAILABLE))
  909. datafile = datafile->prev;
  910. if(datafile->journalfile->v2.flags & JOURNALFILE_FLAG_IS_AVAILABLE) {
  911. journalfile_v2_populate_retention_to_mrg(ctx, datafile->journalfile);
  912. datafile->populate_mrg.populated = true;
  913. }
  914. datafile = ctx->datafiles.first;
  915. if(datafile->journalfile->v2.flags & JOURNALFILE_FLAG_IS_AVAILABLE) {
  916. journalfile_v2_populate_retention_to_mrg(ctx, datafile->journalfile);
  917. datafile->populate_mrg.populated = true;
  918. }
  919. }
  920. ctx->loading.populate_mrg.size = cpus;
  921. ctx->loading.populate_mrg.array = callocz(ctx->loading.populate_mrg.size, sizeof(struct completion));
  922. for (size_t i = 0; i < ctx->loading.populate_mrg.size; i++) {
  923. completion_init(&ctx->loading.populate_mrg.array[i]);
  924. rrdeng_enq_cmd(ctx, RRDENG_OPCODE_CTX_POPULATE_MRG, NULL, &ctx->loading.populate_mrg.array[i],
  925. STORAGE_PRIORITY_INTERNAL_DBENGINE, NULL, NULL);
  926. }
  927. }
  928. void rrdeng_readiness_wait(struct rrdengine_instance *ctx) {
  929. for (size_t i = 0; i < ctx->loading.populate_mrg.size; i++) {
  930. completion_wait_for(&ctx->loading.populate_mrg.array[i]);
  931. completion_destroy(&ctx->loading.populate_mrg.array[i]);
  932. }
  933. freez(ctx->loading.populate_mrg.array);
  934. ctx->loading.populate_mrg.array = NULL;
  935. ctx->loading.populate_mrg.size = 0;
  936. netdata_log_info("DBENGINE: tier %d is ready for data collection and queries", ctx->config.tier);
  937. }
  938. bool rrdeng_is_legacy(STORAGE_INSTANCE *db_instance) {
  939. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  940. return ctx->config.legacy;
  941. }
  942. void rrdeng_exit_mode(struct rrdengine_instance *ctx) {
  943. __atomic_store_n(&ctx->quiesce.exit_mode, true, __ATOMIC_RELAXED);
  944. }
  945. /*
  946. * Returns 0 on success, negative on error
  947. */
  948. int rrdeng_init(struct rrdengine_instance **ctxp, const char *dbfiles_path,
  949. unsigned disk_space_mb, size_t tier) {
  950. struct rrdengine_instance *ctx;
  951. uint32_t max_open_files;
  952. max_open_files = rlimit_nofile.rlim_cur / 4;
  953. /* reserve RRDENG_FD_BUDGET_PER_INSTANCE file descriptors for this instance */
  954. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, RRDENG_FD_BUDGET_PER_INSTANCE);
  955. if (rrdeng_reserved_file_descriptors > max_open_files) {
  956. netdata_log_error(
  957. "Exceeded the budget of available file descriptors (%u/%u), cannot create new dbengine instance.",
  958. (unsigned)rrdeng_reserved_file_descriptors,
  959. (unsigned)max_open_files);
  960. rrd_stat_atomic_add(&global_fs_errors, 1);
  961. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  962. return UV_EMFILE;
  963. }
  964. if(NULL == ctxp) {
  965. ctx = multidb_ctx[tier];
  966. memset(ctx, 0, sizeof(*ctx));
  967. ctx->config.legacy = false;
  968. }
  969. else {
  970. *ctxp = ctx = callocz(1, sizeof(*ctx));
  971. ctx->config.legacy = true;
  972. }
  973. ctx->config.tier = (int)tier;
  974. ctx->config.page_type = tier_page_type[tier];
  975. ctx->config.global_compress_alg = RRD_LZ4;
  976. if (disk_space_mb < RRDENG_MIN_DISK_SPACE_MB)
  977. disk_space_mb = RRDENG_MIN_DISK_SPACE_MB;
  978. ctx->config.max_disk_space = disk_space_mb * 1048576LLU;
  979. strncpyz(ctx->config.dbfiles_path, dbfiles_path, sizeof(ctx->config.dbfiles_path) - 1);
  980. ctx->config.dbfiles_path[sizeof(ctx->config.dbfiles_path) - 1] = '\0';
  981. ctx->atomic.transaction_id = 1;
  982. ctx->quiesce.enabled = false;
  983. rw_spinlock_init(&ctx->njfv2idx.spinlock);
  984. ctx->atomic.first_time_s = LONG_MAX;
  985. if (rrdeng_dbengine_spawn(ctx) && !init_rrd_files(ctx)) {
  986. // success - we run this ctx too
  987. rrdeng_populate_mrg(ctx);
  988. return 0;
  989. }
  990. if (ctx->config.legacy) {
  991. freez(ctx);
  992. if (ctxp)
  993. *ctxp = NULL;
  994. }
  995. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  996. return UV_EIO;
  997. }
  998. size_t rrdeng_collectors_running(struct rrdengine_instance *ctx) {
  999. return __atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED);
  1000. }
  1001. /*
  1002. * Returns 0 on success, 1 on error
  1003. */
  1004. int rrdeng_exit(struct rrdengine_instance *ctx) {
  1005. if (NULL == ctx)
  1006. return 1;
  1007. // FIXME - ktsaou - properly cleanup ctx
  1008. // 1. make sure all collectors are stopped
  1009. // 2. make new queries will not be accepted (this is quiesce that has already run)
  1010. // 3. flush this section of the main cache
  1011. // 4. then wait for completion
  1012. bool logged = false;
  1013. while(__atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED) && !unittest_running) {
  1014. if(!logged) {
  1015. netdata_log_info("DBENGINE: waiting for collectors to finish on tier %d...", (ctx->config.legacy) ? -1 : ctx->config.tier);
  1016. logged = true;
  1017. }
  1018. sleep_usec(100 * USEC_PER_MS);
  1019. }
  1020. netdata_log_info("DBENGINE: flushing main cache for tier %d", (ctx->config.legacy) ? -1 : ctx->config.tier);
  1021. pgc_flush_all_hot_and_dirty_pages(main_cache, (Word_t)ctx);
  1022. netdata_log_info("DBENGINE: shutting down tier %d", (ctx->config.legacy) ? -1 : ctx->config.tier);
  1023. struct completion completion = {};
  1024. completion_init(&completion);
  1025. rrdeng_enq_cmd(ctx, RRDENG_OPCODE_CTX_SHUTDOWN, NULL, &completion, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL);
  1026. completion_wait_for(&completion);
  1027. completion_destroy(&completion);
  1028. finalize_rrd_files(ctx);
  1029. if(ctx->config.legacy)
  1030. freez(ctx);
  1031. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  1032. return 0;
  1033. }
  1034. void rrdeng_prepare_exit(struct rrdengine_instance *ctx) {
  1035. if (NULL == ctx)
  1036. return;
  1037. // FIXME - ktsaou - properly cleanup ctx
  1038. // 1. make sure all collectors are stopped
  1039. completion_init(&ctx->quiesce.completion);
  1040. rrdeng_enq_cmd(ctx, RRDENG_OPCODE_CTX_QUIESCE, NULL, NULL, STORAGE_PRIORITY_INTERNAL_DBENGINE, NULL, NULL);
  1041. }
  1042. static void populate_v2_statistics(struct rrdengine_datafile *datafile, RRDENG_SIZE_STATS *stats)
  1043. {
  1044. struct journal_v2_header *j2_header = journalfile_v2_data_acquire(datafile->journalfile, NULL, 0, 0);
  1045. void *data_start = (void *)j2_header;
  1046. if(unlikely(!j2_header))
  1047. return;
  1048. stats->extents += j2_header->extent_count;
  1049. unsigned entries;
  1050. struct journal_extent_list *extent_list = (void *) (data_start + j2_header->extent_offset);
  1051. for (entries = 0; entries < j2_header->extent_count; entries++) {
  1052. stats->extents_compressed_bytes += extent_list->datafile_size;
  1053. stats->extents_pages += extent_list->pages;
  1054. extent_list++;
  1055. }
  1056. struct journal_metric_list *metric = (void *) (data_start + j2_header->metric_offset);
  1057. time_t journal_start_time_s = (time_t) (j2_header->start_time_ut / USEC_PER_SEC);
  1058. stats->metrics += j2_header->metric_count;
  1059. for (entries = 0; entries < j2_header->metric_count; entries++) {
  1060. struct journal_page_header *metric_list_header = (void *) (data_start + metric->page_offset);
  1061. stats->metrics_pages += metric_list_header->entries;
  1062. struct journal_page_list *descr = (void *) (data_start + metric->page_offset + sizeof(struct journal_page_header));
  1063. for (uint32_t idx=0; idx < metric_list_header->entries; idx++) {
  1064. time_t update_every_s;
  1065. size_t points = descr->page_length / CTX_POINT_SIZE_BYTES(datafile->ctx);
  1066. time_t start_time_s = journal_start_time_s + descr->delta_start_s;
  1067. time_t end_time_s = journal_start_time_s + descr->delta_end_s;
  1068. if(likely(points > 1))
  1069. update_every_s = (time_t) ((end_time_s - start_time_s) / (points - 1));
  1070. else {
  1071. update_every_s = (time_t) (default_rrd_update_every * get_tier_grouping(datafile->ctx->config.tier));
  1072. stats->single_point_pages++;
  1073. }
  1074. time_t duration_s = (time_t)((end_time_s - start_time_s + update_every_s));
  1075. stats->pages_uncompressed_bytes += descr->page_length;
  1076. stats->pages_duration_secs += duration_s;
  1077. stats->points += points;
  1078. stats->page_types[descr->type].pages++;
  1079. stats->page_types[descr->type].pages_uncompressed_bytes += descr->page_length;
  1080. stats->page_types[descr->type].pages_duration_secs += duration_s;
  1081. stats->page_types[descr->type].points += points;
  1082. if(!stats->first_time_s || (start_time_s - update_every_s) < stats->first_time_s)
  1083. stats->first_time_s = (start_time_s - update_every_s);
  1084. if(!stats->last_time_s || end_time_s > stats->last_time_s)
  1085. stats->last_time_s = end_time_s;
  1086. descr++;
  1087. }
  1088. metric++;
  1089. }
  1090. journalfile_v2_data_release(datafile->journalfile);
  1091. }
  1092. RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx) {
  1093. RRDENG_SIZE_STATS stats = { 0 };
  1094. uv_rwlock_rdlock(&ctx->datafiles.rwlock);
  1095. for(struct rrdengine_datafile *df = ctx->datafiles.first; df ;df = df->next) {
  1096. stats.datafiles++;
  1097. populate_v2_statistics(df, &stats);
  1098. }
  1099. uv_rwlock_rdunlock(&ctx->datafiles.rwlock);
  1100. stats.currently_collected_metrics = __atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED);
  1101. internal_error(stats.metrics_pages != stats.extents_pages + stats.currently_collected_metrics,
  1102. "DBENGINE: metrics pages is %zu, but extents pages is %zu and API consumers is %zu",
  1103. stats.metrics_pages, stats.extents_pages, stats.currently_collected_metrics);
  1104. stats.disk_space = ctx_current_disk_space_get(ctx);
  1105. stats.max_disk_space = ctx->config.max_disk_space;
  1106. stats.database_retention_secs = (time_t)(stats.last_time_s - stats.first_time_s);
  1107. if(stats.extents_pages)
  1108. stats.average_page_size_bytes = (double)stats.pages_uncompressed_bytes / (double)stats.extents_pages;
  1109. if(stats.pages_uncompressed_bytes > 0)
  1110. stats.average_compression_savings = 100.0 - ((double)stats.extents_compressed_bytes * 100.0 / (double)stats.pages_uncompressed_bytes);
  1111. if(stats.points)
  1112. stats.average_point_duration_secs = (double)stats.pages_duration_secs / (double)stats.points;
  1113. if(stats.metrics) {
  1114. stats.average_metric_retention_secs = (double)stats.pages_duration_secs / (double)stats.metrics;
  1115. if(stats.database_retention_secs) {
  1116. double metric_coverage = stats.average_metric_retention_secs / (double)stats.database_retention_secs;
  1117. double db_retention_days = (double)stats.database_retention_secs / 86400.0;
  1118. stats.estimated_concurrently_collected_metrics = stats.metrics * metric_coverage;
  1119. stats.ephemeral_metrics_per_day_percent = ((double)stats.metrics * 100.0 / (double)stats.estimated_concurrently_collected_metrics - 100.0) / (double)db_retention_days;
  1120. }
  1121. }
  1122. // stats.sizeof_metric = 0;
  1123. stats.sizeof_datafile = struct_natural_alignment(sizeof(struct rrdengine_datafile)) + struct_natural_alignment(sizeof(struct rrdengine_journalfile));
  1124. stats.sizeof_page_in_cache = 0; // struct_natural_alignment(sizeof(struct page_cache_descr));
  1125. stats.sizeof_point_data = page_type_size[ctx->config.page_type];
  1126. stats.sizeof_page_data = tier_page_size[ctx->config.tier];
  1127. stats.pages_per_extent = rrdeng_pages_per_extent;
  1128. // stats.sizeof_metric_in_index = 40;
  1129. // stats.sizeof_page_in_index = 24;
  1130. stats.default_granularity_secs = (size_t)default_rrd_update_every * get_tier_grouping(ctx->config.tier);
  1131. return stats;
  1132. }
  1133. struct rrdeng_cache_efficiency_stats rrdeng_get_cache_efficiency_stats(void) {
  1134. // FIXME - make cache efficiency stats atomic
  1135. return rrdeng_cache_efficiency_stats;
  1136. }