rrdengineapi.c 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "database/engine/rrddiskprotocol.h"
  3. #include "rrdengine.h"
  4. /* Default global database instance */
  5. struct rrdengine_instance multidb_ctx_storage_tier0;
  6. struct rrdengine_instance multidb_ctx_storage_tier1;
  7. struct rrdengine_instance multidb_ctx_storage_tier2;
  8. struct rrdengine_instance multidb_ctx_storage_tier3;
  9. struct rrdengine_instance multidb_ctx_storage_tier4;
  10. #define mrg_metric_ctx(metric) (struct rrdengine_instance *)mrg_metric_section(main_mrg, metric)
  11. #if RRD_STORAGE_TIERS != 5
  12. #error RRD_STORAGE_TIERS is not 5 - you need to add allocations here
  13. #endif
  14. struct rrdengine_instance *multidb_ctx[RRD_STORAGE_TIERS];
  15. uint8_t tier_page_type[RRD_STORAGE_TIERS] = {PAGE_METRICS, PAGE_TIER, PAGE_TIER, PAGE_TIER, PAGE_TIER};
  16. #if defined(ENV32BIT)
  17. size_t tier_page_size[RRD_STORAGE_TIERS] = {2048, 1024, 192, 192, 192};
  18. #else
  19. size_t tier_page_size[RRD_STORAGE_TIERS] = {4096, 2048, 384, 384, 384};
  20. #endif
  21. #if PAGE_TYPE_MAX != 2
  22. #error PAGE_TYPE_MAX is not 2 - you need to add allocations here
  23. #endif
  24. size_t page_type_size[256] = {
  25. [PAGE_METRICS] = sizeof(storage_number),
  26. [PAGE_TIER] = sizeof(storage_number_tier1_t),
  27. [PAGE_GORILLA_METRICS] = sizeof(storage_number)
  28. };
  29. __attribute__((constructor)) void initialize_multidb_ctx(void) {
  30. multidb_ctx[0] = &multidb_ctx_storage_tier0;
  31. multidb_ctx[1] = &multidb_ctx_storage_tier1;
  32. multidb_ctx[2] = &multidb_ctx_storage_tier2;
  33. multidb_ctx[3] = &multidb_ctx_storage_tier3;
  34. multidb_ctx[4] = &multidb_ctx_storage_tier4;
  35. }
  36. int db_engine_journal_check = 0;
  37. int default_rrdeng_disk_quota_mb = 256;
  38. int default_multidb_disk_quota_mb = 256;
  39. #if defined(ENV32BIT)
  40. int default_rrdeng_page_cache_mb = 16;
  41. int default_rrdeng_extent_cache_mb = 0;
  42. #else
  43. int default_rrdeng_page_cache_mb = 32;
  44. int default_rrdeng_extent_cache_mb = 0;
  45. #endif
  46. // ----------------------------------------------------------------------------
  47. // metrics groups
  48. static inline void rrdeng_page_alignment_acquire(struct pg_alignment *pa) {
  49. if(unlikely(!pa)) return;
  50. __atomic_add_fetch(&pa->refcount, 1, __ATOMIC_SEQ_CST);
  51. }
  52. static inline bool rrdeng_page_alignment_release(struct pg_alignment *pa) {
  53. if(unlikely(!pa)) return true;
  54. if(__atomic_sub_fetch(&pa->refcount, 1, __ATOMIC_SEQ_CST) == 0) {
  55. freez(pa);
  56. return true;
  57. }
  58. return false;
  59. }
  60. // charts call this
  61. STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *db_instance __maybe_unused, uuid_t *uuid __maybe_unused) {
  62. struct pg_alignment *pa = callocz(1, sizeof(struct pg_alignment));
  63. rrdeng_page_alignment_acquire(pa);
  64. return (STORAGE_METRICS_GROUP *)pa;
  65. }
  66. // charts call this
  67. void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance __maybe_unused, STORAGE_METRICS_GROUP *smg) {
  68. if(unlikely(!smg)) return;
  69. struct pg_alignment *pa = (struct pg_alignment *)smg;
  70. rrdeng_page_alignment_release(pa);
  71. }
  72. // ----------------------------------------------------------------------------
  73. // metric handle for legacy dbs
  74. /* This UUID is not unique across hosts */
  75. void rrdeng_generate_legacy_uuid(const char *dim_id, const char *chart_id, uuid_t *ret_uuid)
  76. {
  77. EVP_MD_CTX *evpctx;
  78. unsigned char hash_value[EVP_MAX_MD_SIZE];
  79. unsigned int hash_len;
  80. evpctx = EVP_MD_CTX_create();
  81. EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
  82. EVP_DigestUpdate(evpctx, dim_id, strlen(dim_id));
  83. EVP_DigestUpdate(evpctx, chart_id, strlen(chart_id));
  84. EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
  85. EVP_MD_CTX_destroy(evpctx);
  86. fatal_assert(hash_len > sizeof(uuid_t));
  87. memcpy(ret_uuid, hash_value, sizeof(uuid_t));
  88. }
  89. static METRIC *rrdeng_metric_get_legacy(STORAGE_INSTANCE *db_instance, const char *rd_id, const char *st_id) {
  90. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  91. uuid_t legacy_uuid;
  92. rrdeng_generate_legacy_uuid(rd_id, st_id, &legacy_uuid);
  93. return mrg_metric_get_and_acquire(main_mrg, &legacy_uuid, (Word_t) ctx);
  94. }
  95. // ----------------------------------------------------------------------------
  96. // metric handle
  97. void rrdeng_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle) {
  98. METRIC *metric = (METRIC *)db_metric_handle;
  99. mrg_metric_release(main_mrg, metric);
  100. }
  101. STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle) {
  102. METRIC *metric = (METRIC *)db_metric_handle;
  103. return (STORAGE_METRIC_HANDLE *) mrg_metric_dup(main_mrg, metric);
  104. }
  105. STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid) {
  106. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  107. return (STORAGE_METRIC_HANDLE *) mrg_metric_get_and_acquire(main_mrg, uuid, (Word_t) ctx);
  108. }
  109. static METRIC *rrdeng_metric_create(STORAGE_INSTANCE *db_instance, uuid_t *uuid) {
  110. internal_fatal(!db_instance, "DBENGINE: db_instance is NULL");
  111. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  112. MRG_ENTRY entry = {
  113. .uuid = uuid,
  114. .section = (Word_t)ctx,
  115. .first_time_s = 0,
  116. .last_time_s = 0,
  117. .latest_update_every_s = 0,
  118. };
  119. METRIC *metric = mrg_metric_add_and_acquire(main_mrg, entry, NULL);
  120. return metric;
  121. }
  122. STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance) {
  123. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  124. METRIC *metric;
  125. metric = mrg_metric_get_and_acquire(main_mrg, &rd->metric_uuid, (Word_t) ctx);
  126. if(unlikely(!metric)) {
  127. if(unlikely(ctx->config.legacy)) {
  128. // this is a single host database
  129. // generate uuid from the chart and dimensions ids
  130. // and overwrite the one supplied by rrddim
  131. metric = rrdeng_metric_get_legacy(db_instance, rrddim_id(rd), rrdset_id(rd->rrdset));
  132. if (metric)
  133. uuid_copy(rd->metric_uuid, *mrg_metric_uuid(main_mrg, metric));
  134. }
  135. if(likely(!metric))
  136. metric = rrdeng_metric_create(db_instance, &rd->metric_uuid);
  137. }
  138. #ifdef NETDATA_INTERNAL_CHECKS
  139. if(uuid_memcmp(&rd->metric_uuid, mrg_metric_uuid(main_mrg, metric)) != 0) {
  140. char uuid1[UUID_STR_LEN + 1];
  141. char uuid2[UUID_STR_LEN + 1];
  142. uuid_unparse(rd->metric_uuid, uuid1);
  143. uuid_unparse(*mrg_metric_uuid(main_mrg, metric), uuid2);
  144. fatal("DBENGINE: uuids do not match, asked for metric '%s', but got metric '%s'", uuid1, uuid2);
  145. }
  146. if(mrg_metric_ctx(metric) != ctx)
  147. fatal("DBENGINE: mixed up db instances, asked for metric from %p, got from %p",
  148. ctx, mrg_metric_ctx(metric));
  149. #endif
  150. return (STORAGE_METRIC_HANDLE *)metric;
  151. }
  152. // ----------------------------------------------------------------------------
  153. // collect ops
  154. static inline void check_and_fix_mrg_update_every(struct rrdeng_collect_handle *handle) {
  155. if(unlikely((time_t)(handle->update_every_ut / USEC_PER_SEC) != mrg_metric_get_update_every_s(main_mrg, handle->metric))) {
  156. internal_error(true, "DBENGINE: collection handle has update every %ld, but the metric registry has %ld. Fixing it.",
  157. (time_t)(handle->update_every_ut / USEC_PER_SEC), mrg_metric_get_update_every_s(main_mrg, handle->metric));
  158. if(unlikely(!handle->update_every_ut))
  159. handle->update_every_ut = (usec_t)mrg_metric_get_update_every_s(main_mrg, handle->metric) * USEC_PER_SEC;
  160. else
  161. mrg_metric_set_update_every(main_mrg, handle->metric, (time_t)(handle->update_every_ut / USEC_PER_SEC));
  162. }
  163. }
  164. static inline bool check_completed_page_consistency(struct rrdeng_collect_handle *handle __maybe_unused) {
  165. #ifdef NETDATA_INTERNAL_CHECKS
  166. if (unlikely(!handle->pgc_page || !handle->page_entries_max || !handle->page_position || !handle->page_end_time_ut))
  167. return false;
  168. struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
  169. uuid_t *uuid = mrg_metric_uuid(main_mrg, handle->metric);
  170. time_t start_time_s = pgc_page_start_time_s(handle->pgc_page);
  171. time_t end_time_s = pgc_page_end_time_s(handle->pgc_page);
  172. time_t update_every_s = pgc_page_update_every_s(handle->pgc_page);
  173. size_t page_length = handle->page_position * CTX_POINT_SIZE_BYTES(ctx);
  174. size_t entries = handle->page_position;
  175. time_t overwrite_zero_update_every_s = (time_t)(handle->update_every_ut / USEC_PER_SEC);
  176. if(end_time_s > max_acceptable_collected_time())
  177. handle->page_flags |= RRDENG_PAGE_COMPLETED_IN_FUTURE;
  178. VALIDATED_PAGE_DESCRIPTOR vd = validate_page(
  179. uuid,
  180. start_time_s,
  181. end_time_s,
  182. update_every_s,
  183. page_length,
  184. ctx->config.page_type,
  185. entries,
  186. 0, // do not check for future timestamps - we inherit the timestamps of the children
  187. overwrite_zero_update_every_s,
  188. false,
  189. "collected",
  190. handle->page_flags);
  191. return vd.is_valid;
  192. #else
  193. return true;
  194. #endif
  195. }
  196. /*
  197. * Gets a handle for storing metrics to the database.
  198. * The handle must be released with rrdeng_store_metric_final().
  199. */
  200. STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg) {
  201. METRIC *metric = (METRIC *)db_metric_handle;
  202. struct rrdengine_instance *ctx = mrg_metric_ctx(metric);
  203. bool is_1st_metric_writer = true;
  204. if(!mrg_metric_set_writer(main_mrg, metric)) {
  205. is_1st_metric_writer = false;
  206. char uuid[UUID_STR_LEN + 1];
  207. uuid_unparse(*mrg_metric_uuid(main_mrg, metric), uuid);
  208. netdata_log_error("DBENGINE: metric '%s' is already collected and should not be collected twice - expect gaps on the charts", uuid);
  209. }
  210. metric = mrg_metric_dup(main_mrg, metric);
  211. struct rrdeng_collect_handle *handle;
  212. handle = callocz(1, sizeof(struct rrdeng_collect_handle));
  213. handle->common.backend = STORAGE_ENGINE_BACKEND_DBENGINE;
  214. handle->metric = metric;
  215. handle->pgc_page = NULL;
  216. handle->page_data = NULL;
  217. handle->page_data_size = 0;
  218. handle->page_position = 0;
  219. handle->page_entries_max = 0;
  220. handle->update_every_ut = (usec_t)update_every * USEC_PER_SEC;
  221. handle->options = is_1st_metric_writer ? RRDENG_1ST_METRIC_WRITER : 0;
  222. __atomic_add_fetch(&ctx->atomic.collectors_running, 1, __ATOMIC_RELAXED);
  223. if(!is_1st_metric_writer)
  224. __atomic_add_fetch(&ctx->atomic.collectors_running_duplicate, 1, __ATOMIC_RELAXED);
  225. mrg_metric_set_update_every(main_mrg, metric, update_every);
  226. handle->alignment = (struct pg_alignment *)smg;
  227. rrdeng_page_alignment_acquire(handle->alignment);
  228. // this is important!
  229. // if we don't set the page_end_time_ut during the first collection
  230. // data collection may be able to go back in time and during the addition of new pages
  231. // clean pages may be found matching ours!
  232. time_t db_first_time_s, db_last_time_s, db_update_every_s;
  233. mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s);
  234. handle->page_end_time_ut = (usec_t)db_last_time_s * USEC_PER_SEC;
  235. return (STORAGE_COLLECT_HANDLE *)handle;
  236. }
  237. void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle) {
  238. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  239. if (unlikely(!handle->pgc_page))
  240. return;
  241. if(pgd_is_empty(handle->page_data))
  242. pgc_page_to_clean_evict_or_release(main_cache, handle->pgc_page);
  243. else {
  244. check_completed_page_consistency(handle);
  245. mrg_metric_set_clean_latest_time_s(main_mrg, handle->metric, pgc_page_end_time_s(handle->pgc_page));
  246. pgc_page_hot_to_dirty_and_release(main_cache, handle->pgc_page);
  247. }
  248. mrg_metric_set_hot_latest_time_s(main_mrg, handle->metric, 0);
  249. handle->pgc_page = NULL;
  250. handle->page_flags = 0;
  251. handle->page_position = 0;
  252. handle->page_entries_max = 0;
  253. handle->page_data = NULL;
  254. handle->page_data_size = 0;
  255. // important!
  256. // we should never zero page end time ut, because this will allow
  257. // collection to go back in time
  258. // handle->page_end_time_ut = 0;
  259. // handle->page_start_time_ut;
  260. check_and_fix_mrg_update_every(handle);
  261. timing_step(TIMING_STEP_DBENGINE_FLUSH_PAGE);
  262. }
  263. static void rrdeng_store_metric_create_new_page(struct rrdeng_collect_handle *handle,
  264. struct rrdengine_instance *ctx,
  265. usec_t point_in_time_ut,
  266. PGD *data,
  267. size_t data_size) {
  268. time_t point_in_time_s = (time_t)(point_in_time_ut / USEC_PER_SEC);
  269. const time_t update_every_s = (time_t)(handle->update_every_ut / USEC_PER_SEC);
  270. PGC_ENTRY page_entry = {
  271. .section = (Word_t) ctx,
  272. .metric_id = mrg_metric_id(main_mrg, handle->metric),
  273. .start_time_s = point_in_time_s,
  274. .end_time_s = point_in_time_s,
  275. .size = data_size,
  276. .data = data,
  277. .update_every_s = (uint32_t) update_every_s,
  278. .hot = true
  279. };
  280. size_t conflicts = 0;
  281. bool added = true;
  282. PGC_PAGE *pgc_page = pgc_page_add_and_acquire(main_cache, page_entry, &added);
  283. while (unlikely(!added)) {
  284. conflicts++;
  285. char uuid[UUID_STR_LEN + 1];
  286. uuid_unparse(*mrg_metric_uuid(main_mrg, handle->metric), uuid);
  287. #ifdef NETDATA_INTERNAL_CHECKS
  288. internal_error(true,
  289. #else
  290. nd_log_limit_static_global_var(erl, 1, 0);
  291. nd_log_limit(&erl, NDLS_DAEMON, NDLP_WARNING,
  292. #endif
  293. "DBENGINE: metric '%s' new page from %ld to %ld, update every %ld, has a conflict in main cache "
  294. "with existing %s%s page from %ld to %ld, update every %ld - "
  295. "is it collected more than once?",
  296. uuid,
  297. page_entry.start_time_s, page_entry.end_time_s, (time_t)page_entry.update_every_s,
  298. pgc_is_page_hot(pgc_page) ? "hot" : "not-hot",
  299. pgc_page_data(pgc_page) == PGD_EMPTY ? " gap" : "",
  300. pgc_page_start_time_s(pgc_page), pgc_page_end_time_s(pgc_page), pgc_page_update_every_s(pgc_page)
  301. );
  302. pgc_page_release(main_cache, pgc_page);
  303. point_in_time_ut -= handle->update_every_ut;
  304. point_in_time_s = (time_t)(point_in_time_ut / USEC_PER_SEC);
  305. page_entry.start_time_s = point_in_time_s;
  306. page_entry.end_time_s = point_in_time_s;
  307. pgc_page = pgc_page_add_and_acquire(main_cache, page_entry, &added);
  308. }
  309. handle->page_entries_max = data_size / CTX_POINT_SIZE_BYTES(ctx);
  310. handle->page_start_time_ut = point_in_time_ut;
  311. handle->page_end_time_ut = point_in_time_ut;
  312. handle->page_position = 1; // zero is already in our data
  313. handle->pgc_page = pgc_page;
  314. handle->page_flags = conflicts? RRDENG_PAGE_CONFLICT : 0;
  315. if(point_in_time_s > max_acceptable_collected_time())
  316. handle->page_flags |= RRDENG_PAGE_CREATED_IN_FUTURE;
  317. check_and_fix_mrg_update_every(handle);
  318. timing_step(TIMING_STEP_DBENGINE_CREATE_NEW_PAGE);
  319. }
  320. static size_t aligned_allocation_entries(size_t max_slots, size_t target_slot, time_t now_s) {
  321. size_t slots = target_slot;
  322. size_t pos = (now_s % max_slots);
  323. if(pos > slots)
  324. slots += max_slots - pos;
  325. else if(pos < slots)
  326. slots -= pos;
  327. else
  328. slots = max_slots;
  329. return slots;
  330. }
  331. static PGD *rrdeng_alloc_new_page_data(struct rrdeng_collect_handle *handle, size_t *data_size, usec_t point_in_time_ut) {
  332. struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
  333. PGD *d = NULL;
  334. size_t max_size = tier_page_size[ctx->config.tier];
  335. size_t max_slots = max_size / CTX_POINT_SIZE_BYTES(ctx);
  336. size_t slots = aligned_allocation_entries(
  337. max_slots,
  338. indexing_partition((Word_t) handle->alignment, max_slots),
  339. (time_t) (point_in_time_ut / USEC_PER_SEC)
  340. );
  341. if(slots < max_slots / 3)
  342. slots = max_slots / 3;
  343. if(slots < 3)
  344. slots = 3;
  345. size_t size = slots * CTX_POINT_SIZE_BYTES(ctx);
  346. // internal_error(true, "PAGE ALLOC %zu bytes (%zu max)", size, max_size);
  347. internal_fatal(slots < 3 || slots > max_slots, "ooops! wrong distribution of metrics across time");
  348. internal_fatal(size > tier_page_size[ctx->config.tier] || size < CTX_POINT_SIZE_BYTES(ctx) * 2, "ooops! wrong page size");
  349. *data_size = size;
  350. switch (ctx->config.page_type) {
  351. case PAGE_METRICS:
  352. case PAGE_TIER:
  353. d = pgd_create(ctx->config.page_type, slots);
  354. break;
  355. case PAGE_GORILLA_METRICS:
  356. // ignore slots, and use the fixed number of slots per gorilla buffer.
  357. // gorilla will automatically add more buffers if needed.
  358. d = pgd_create(ctx->config.page_type, GORILLA_BUFFER_SLOTS);
  359. break;
  360. default:
  361. fatal("Unknown page type: %uc\n", ctx->config.page_type);
  362. }
  363. timing_step(TIMING_STEP_DBENGINE_PAGE_ALLOC);
  364. return d;
  365. }
  366. static void rrdeng_store_metric_append_point(STORAGE_COLLECT_HANDLE *collection_handle,
  367. const usec_t point_in_time_ut,
  368. const NETDATA_DOUBLE n,
  369. const NETDATA_DOUBLE min_value,
  370. const NETDATA_DOUBLE max_value,
  371. const uint16_t count,
  372. const uint16_t anomaly_count,
  373. const SN_FLAGS flags)
  374. {
  375. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  376. struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
  377. if(unlikely(!handle->page_data))
  378. handle->page_data = rrdeng_alloc_new_page_data(handle, &handle->page_data_size, point_in_time_ut);
  379. timing_step(TIMING_STEP_DBENGINE_CHECK_DATA);
  380. pgd_append_point(handle->page_data,
  381. point_in_time_ut,
  382. n, min_value, max_value, count, anomaly_count, flags,
  383. handle->page_position);
  384. timing_step(TIMING_STEP_DBENGINE_PACK);
  385. if(unlikely(!handle->pgc_page)) {
  386. rrdeng_store_metric_create_new_page(handle, ctx, point_in_time_ut, handle->page_data, handle->page_data_size);
  387. // handle->position is set to 1 already
  388. }
  389. else {
  390. // update an existing page
  391. pgc_page_hot_set_end_time_s(main_cache, handle->pgc_page, (time_t) (point_in_time_ut / USEC_PER_SEC));
  392. handle->page_end_time_ut = point_in_time_ut;
  393. if(unlikely(++handle->page_position >= handle->page_entries_max)) {
  394. internal_fatal(handle->page_position > handle->page_entries_max, "DBENGINE: exceeded page max number of points");
  395. handle->page_flags |= RRDENG_PAGE_FULL;
  396. rrdeng_store_metric_flush_current_page(collection_handle);
  397. }
  398. }
  399. timing_step(TIMING_STEP_DBENGINE_PAGE_FIN);
  400. // update the metric information
  401. mrg_metric_set_hot_latest_time_s(main_mrg, handle->metric, (time_t) (point_in_time_ut / USEC_PER_SEC));
  402. timing_step(TIMING_STEP_DBENGINE_MRG_UPDATE);
  403. }
  404. static void store_metric_next_error_log(struct rrdeng_collect_handle *handle __maybe_unused, usec_t point_in_time_ut __maybe_unused, const char *msg __maybe_unused) {
  405. #ifdef NETDATA_INTERNAL_CHECKS
  406. time_t point_in_time_s = (time_t)(point_in_time_ut / USEC_PER_SEC);
  407. char uuid[UUID_STR_LEN + 1];
  408. uuid_unparse(*mrg_metric_uuid(main_mrg, handle->metric), uuid);
  409. BUFFER *wb = NULL;
  410. if(handle->pgc_page && handle->page_flags) {
  411. wb = buffer_create(0, NULL);
  412. collect_page_flags_to_buffer(wb, handle->page_flags);
  413. }
  414. nd_log_limit_static_global_var(erl, 1, 0);
  415. nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
  416. "DBENGINE: metric '%s' collected point at %ld, %s last collection at %ld, "
  417. "update every %ld, %s page from %ld to %ld, position %u (of %u), flags: %s",
  418. uuid,
  419. point_in_time_s,
  420. msg,
  421. (time_t)(handle->page_end_time_ut / USEC_PER_SEC),
  422. (time_t)(handle->update_every_ut / USEC_PER_SEC),
  423. handle->pgc_page ? "current" : "*LAST*",
  424. (time_t)(handle->page_start_time_ut / USEC_PER_SEC),
  425. (time_t)(handle->page_end_time_ut / USEC_PER_SEC),
  426. handle->page_position, handle->page_entries_max,
  427. wb ? buffer_tostring(wb) : ""
  428. );
  429. buffer_free(wb);
  430. #else
  431. ;
  432. #endif
  433. }
  434. void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle,
  435. const usec_t point_in_time_ut,
  436. const NETDATA_DOUBLE n,
  437. const NETDATA_DOUBLE min_value,
  438. const NETDATA_DOUBLE max_value,
  439. const uint16_t count,
  440. const uint16_t anomaly_count,
  441. const SN_FLAGS flags)
  442. {
  443. timing_step(TIMING_STEP_RRDSET_STORE_METRIC);
  444. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  445. #ifdef NETDATA_INTERNAL_CHECKS
  446. if(unlikely(point_in_time_ut > (usec_t)max_acceptable_collected_time() * USEC_PER_SEC))
  447. handle->page_flags |= RRDENG_PAGE_FUTURE_POINT;
  448. #endif
  449. usec_t delta_ut = point_in_time_ut - handle->page_end_time_ut;
  450. if(likely(delta_ut == handle->update_every_ut)) {
  451. // happy path
  452. ;
  453. }
  454. else if(unlikely(point_in_time_ut > handle->page_end_time_ut)) {
  455. if(handle->pgc_page) {
  456. if (unlikely(delta_ut < handle->update_every_ut)) {
  457. handle->page_flags |= RRDENG_PAGE_STEP_TOO_SMALL;
  458. rrdeng_store_metric_flush_current_page(collection_handle);
  459. }
  460. else if (unlikely(delta_ut % handle->update_every_ut)) {
  461. handle->page_flags |= RRDENG_PAGE_STEP_UNALIGNED;
  462. rrdeng_store_metric_flush_current_page(collection_handle);
  463. }
  464. else {
  465. size_t points_gap = delta_ut / handle->update_every_ut;
  466. size_t page_remaining_points = handle->page_entries_max - handle->page_position;
  467. if (points_gap >= page_remaining_points) {
  468. handle->page_flags |= RRDENG_PAGE_BIG_GAP;
  469. rrdeng_store_metric_flush_current_page(collection_handle);
  470. }
  471. else {
  472. // loop to fill the gap
  473. handle->page_flags |= RRDENG_PAGE_GAP;
  474. usec_t stop_ut = point_in_time_ut - handle->update_every_ut;
  475. for (usec_t this_ut = handle->page_end_time_ut + handle->update_every_ut;
  476. this_ut <= stop_ut;
  477. this_ut = handle->page_end_time_ut + handle->update_every_ut) {
  478. rrdeng_store_metric_append_point(
  479. collection_handle,
  480. this_ut,
  481. NAN, NAN, NAN,
  482. 1, 0,
  483. SN_EMPTY_SLOT);
  484. }
  485. }
  486. }
  487. }
  488. }
  489. else if(unlikely(point_in_time_ut < handle->page_end_time_ut)) {
  490. handle->page_flags |= RRDENG_PAGE_PAST_COLLECTION;
  491. store_metric_next_error_log(handle, point_in_time_ut, "is older than the");
  492. return;
  493. }
  494. else /* if(unlikely(point_in_time_ut == handle->page_end_time_ut)) */ {
  495. handle->page_flags |= RRDENG_PAGE_REPEATED_COLLECTION;
  496. store_metric_next_error_log(handle, point_in_time_ut, "is at the same time as the");
  497. return;
  498. }
  499. timing_step(TIMING_STEP_DBENGINE_FIRST_CHECK);
  500. rrdeng_store_metric_append_point(collection_handle,
  501. point_in_time_ut,
  502. n, min_value, max_value,
  503. count, anomaly_count,
  504. flags);
  505. }
  506. /*
  507. * Releases the database reference from the handle for storing metrics.
  508. * Returns 1 if it's safe to delete the dimension.
  509. */
  510. int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) {
  511. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  512. struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
  513. handle->page_flags |= RRDENG_PAGE_COLLECT_FINALIZE;
  514. rrdeng_store_metric_flush_current_page(collection_handle);
  515. rrdeng_page_alignment_release(handle->alignment);
  516. __atomic_sub_fetch(&ctx->atomic.collectors_running, 1, __ATOMIC_RELAXED);
  517. if(!(handle->options & RRDENG_1ST_METRIC_WRITER))
  518. __atomic_sub_fetch(&ctx->atomic.collectors_running_duplicate, 1, __ATOMIC_RELAXED);
  519. if((handle->options & RRDENG_1ST_METRIC_WRITER) && !mrg_metric_clear_writer(main_mrg, handle->metric))
  520. internal_fatal(true, "DBENGINE: metric is already released");
  521. time_t first_time_s, last_time_s, update_every_s;
  522. mrg_metric_get_retention(main_mrg, handle->metric, &first_time_s, &last_time_s, &update_every_s);
  523. mrg_metric_release(main_mrg, handle->metric);
  524. freez(handle);
  525. if(!first_time_s && !last_time_s)
  526. return 1;
  527. return 0;
  528. }
  529. void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every) {
  530. struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
  531. check_and_fix_mrg_update_every(handle);
  532. METRIC *metric = handle->metric;
  533. usec_t update_every_ut = (usec_t)update_every * USEC_PER_SEC;
  534. if(update_every_ut == handle->update_every_ut)
  535. return;
  536. handle->page_flags |= RRDENG_PAGE_UPDATE_EVERY_CHANGE;
  537. rrdeng_store_metric_flush_current_page(collection_handle);
  538. mrg_metric_set_update_every(main_mrg, metric, update_every);
  539. handle->update_every_ut = update_every_ut;
  540. }
  541. // ----------------------------------------------------------------------------
  542. // query ops
  543. #ifdef NETDATA_INTERNAL_CHECKS
  544. SPINLOCK global_query_handle_spinlock = NETDATA_SPINLOCK_INITIALIZER;
  545. static struct rrdeng_query_handle *global_query_handle_ll = NULL;
  546. static void register_query_handle(struct rrdeng_query_handle *handle) {
  547. handle->query_pid = gettid();
  548. handle->started_time_s = now_realtime_sec();
  549. spinlock_lock(&global_query_handle_spinlock);
  550. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(global_query_handle_ll, handle, prev, next);
  551. spinlock_unlock(&global_query_handle_spinlock);
  552. }
  553. static void unregister_query_handle(struct rrdeng_query_handle *handle) {
  554. spinlock_lock(&global_query_handle_spinlock);
  555. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(global_query_handle_ll, handle, prev, next);
  556. spinlock_unlock(&global_query_handle_spinlock);
  557. }
  558. #else
  559. static void register_query_handle(struct rrdeng_query_handle *handle __maybe_unused) {
  560. ;
  561. }
  562. static void unregister_query_handle(struct rrdeng_query_handle *handle __maybe_unused) {
  563. ;
  564. }
  565. #endif
  566. /*
  567. * Gets a handle for loading metrics from the database.
  568. * The handle must be released with rrdeng_load_metric_final().
  569. */
  570. void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle,
  571. struct storage_engine_query_handle *rrddim_handle,
  572. time_t start_time_s,
  573. time_t end_time_s,
  574. STORAGE_PRIORITY priority)
  575. {
  576. usec_t started_ut = now_monotonic_usec();
  577. netdata_thread_disable_cancelability();
  578. METRIC *metric = (METRIC *)db_metric_handle;
  579. struct rrdengine_instance *ctx = mrg_metric_ctx(metric);
  580. struct rrdeng_query_handle *handle;
  581. handle = rrdeng_query_handle_get();
  582. register_query_handle(handle);
  583. if (unlikely(priority < STORAGE_PRIORITY_HIGH))
  584. priority = STORAGE_PRIORITY_HIGH;
  585. else if (unlikely(priority >= STORAGE_PRIORITY_INTERNAL_MAX_DONT_USE))
  586. priority = STORAGE_PRIORITY_INTERNAL_MAX_DONT_USE - 1;
  587. handle->ctx = ctx;
  588. handle->metric = metric;
  589. handle->priority = priority;
  590. // IMPORTANT!
  591. // It is crucial not to exceed the db boundaries, because dbengine
  592. // now has gap caching, so when a gap is detected a negative page
  593. // is inserted into the main cache, to avoid scanning the journals
  594. // again for pages matching the gap.
  595. time_t db_first_time_s, db_last_time_s, db_update_every_s;
  596. mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s);
  597. if(is_page_in_time_range(start_time_s, end_time_s, db_first_time_s, db_last_time_s) == PAGE_IS_IN_RANGE) {
  598. handle->start_time_s = MAX(start_time_s, db_first_time_s);
  599. handle->end_time_s = MIN(end_time_s, db_last_time_s);
  600. handle->now_s = handle->start_time_s;
  601. handle->dt_s = db_update_every_s;
  602. if (!handle->dt_s) {
  603. handle->dt_s = default_rrd_update_every;
  604. mrg_metric_set_update_every_s_if_zero(main_mrg, metric, default_rrd_update_every);
  605. }
  606. rrddim_handle->handle = (STORAGE_QUERY_HANDLE *) handle;
  607. rrddim_handle->start_time_s = handle->start_time_s;
  608. rrddim_handle->end_time_s = handle->end_time_s;
  609. rrddim_handle->priority = priority;
  610. rrddim_handle->backend = STORAGE_ENGINE_BACKEND_DBENGINE;
  611. pg_cache_preload(handle);
  612. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.query_time_init, now_monotonic_usec() - started_ut, __ATOMIC_RELAXED);
  613. }
  614. else {
  615. handle->start_time_s = start_time_s;
  616. handle->end_time_s = end_time_s;
  617. handle->now_s = start_time_s;
  618. handle->dt_s = db_update_every_s;
  619. rrddim_handle->handle = (STORAGE_QUERY_HANDLE *) handle;
  620. rrddim_handle->start_time_s = handle->start_time_s;
  621. rrddim_handle->end_time_s = 0;
  622. rrddim_handle->priority = priority;
  623. rrddim_handle->backend = STORAGE_ENGINE_BACKEND_DBENGINE;
  624. }
  625. }
  626. static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_handle, bool debug_this __maybe_unused) {
  627. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
  628. struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
  629. if (likely(handle->page)) {
  630. // we have a page to release
  631. pgc_page_release(main_cache, handle->page);
  632. handle->page = NULL;
  633. pgdc_reset(&handle->pgdc, NULL, UINT32_MAX);
  634. }
  635. if (unlikely(handle->now_s > rrddim_handle->end_time_s))
  636. return false;
  637. size_t entries = 0;
  638. handle->page = pg_cache_lookup_next(ctx, handle->pdc, handle->now_s, handle->dt_s, &entries);
  639. internal_fatal(handle->page && (pgc_page_data(handle->page) == PGD_EMPTY || !entries),
  640. "A page was returned, but it is empty - pg_cache_lookup_next() should be handling this case");
  641. if (unlikely(!handle->page || pgc_page_data(handle->page) == PGD_EMPTY || !entries))
  642. return false;
  643. time_t page_start_time_s = pgc_page_start_time_s(handle->page);
  644. time_t page_end_time_s = pgc_page_end_time_s(handle->page);
  645. time_t page_update_every_s = pgc_page_update_every_s(handle->page);
  646. unsigned position;
  647. if(likely(handle->now_s >= page_start_time_s && handle->now_s <= page_end_time_s)) {
  648. if(unlikely(entries == 1 || page_start_time_s == page_end_time_s || !page_update_every_s)) {
  649. position = 0;
  650. handle->now_s = page_start_time_s;
  651. }
  652. else {
  653. position = (handle->now_s - page_start_time_s) * (entries - 1) / (page_end_time_s - page_start_time_s);
  654. time_t point_end_time_s = page_start_time_s + position * page_update_every_s;
  655. while(point_end_time_s < handle->now_s && position + 1 < entries) {
  656. // https://github.com/netdata/netdata/issues/14411
  657. // we really need a while() here, because the delta may be
  658. // 2 points at higher tiers
  659. position++;
  660. point_end_time_s = page_start_time_s + position * page_update_every_s;
  661. }
  662. handle->now_s = point_end_time_s;
  663. }
  664. internal_fatal(position >= entries, "DBENGINE: wrong page position calculation");
  665. }
  666. else if(handle->now_s < page_start_time_s) {
  667. handle->now_s = page_start_time_s;
  668. position = 0;
  669. }
  670. else {
  671. internal_fatal(true, "DBENGINE: this page is entirely in our past and should not be accepted for this query in the first place");
  672. handle->now_s = page_end_time_s;
  673. position = entries - 1;
  674. }
  675. handle->entries = entries;
  676. handle->position = position;
  677. handle->dt_s = page_update_every_s;
  678. pgdc_reset(&handle->pgdc, pgc_page_data(handle->page), handle->position);
  679. return true;
  680. }
  681. // Returns the metric and sets its timestamp into current_time
  682. // IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags)
  683. // IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES
  684. STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim_handle) {
  685. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
  686. STORAGE_POINT sp;
  687. if (unlikely(handle->now_s > rrddim_handle->end_time_s)) {
  688. storage_point_empty(sp, handle->now_s - handle->dt_s, handle->now_s);
  689. goto prepare_for_next_iteration;
  690. }
  691. if (unlikely(!handle->page || handle->position >= handle->entries)) {
  692. // We need to get a new page
  693. if (!rrdeng_load_page_next(rrddim_handle, false)) {
  694. handle->now_s = rrddim_handle->end_time_s;
  695. storage_point_empty(sp, handle->now_s - handle->dt_s, handle->now_s);
  696. goto prepare_for_next_iteration;
  697. }
  698. }
  699. sp.start_time_s = handle->now_s - handle->dt_s;
  700. sp.end_time_s = handle->now_s;
  701. pgdc_get_next_point(&handle->pgdc, handle->position, &sp);
  702. prepare_for_next_iteration:
  703. internal_fatal(sp.end_time_s < rrddim_handle->start_time_s, "DBENGINE: this point is too old for this query");
  704. internal_fatal(sp.end_time_s < handle->now_s, "DBENGINE: this point is too old for this point in time");
  705. handle->now_s += handle->dt_s;
  706. handle->position++;
  707. return sp;
  708. }
  709. int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *rrddim_handle) {
  710. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
  711. return (handle->now_s > rrddim_handle->end_time_s);
  712. }
  713. /*
  714. * Releases the database reference from the handle for loading metrics.
  715. */
  716. void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrddim_handle)
  717. {
  718. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
  719. if (handle->page) {
  720. pgc_page_release(main_cache, handle->page);
  721. pgdc_reset(&handle->pgdc, NULL, UINT32_MAX);
  722. }
  723. if(!pdc_release_and_destroy_if_unreferenced(handle->pdc, false, false))
  724. __atomic_store_n(&handle->pdc->workers_should_stop, true, __ATOMIC_RELAXED);
  725. unregister_query_handle(handle);
  726. rrdeng_query_handle_release(handle);
  727. rrddim_handle->handle = NULL;
  728. netdata_thread_enable_cancelability();
  729. }
  730. time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *rrddim_handle) {
  731. struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
  732. if(handle->pdc) {
  733. rrdeng_prep_wait(handle->pdc);
  734. if (handle->pdc->optimal_end_time_s > rrddim_handle->end_time_s)
  735. rrddim_handle->end_time_s = handle->pdc->optimal_end_time_s;
  736. }
  737. return rrddim_handle->end_time_s;
  738. }
  739. time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
  740. METRIC *metric = (METRIC *)db_metric_handle;
  741. time_t latest_time_s = 0;
  742. if (metric)
  743. latest_time_s = mrg_metric_get_latest_time_s(main_mrg, metric);
  744. return latest_time_s;
  745. }
  746. time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
  747. METRIC *metric = (METRIC *)db_metric_handle;
  748. time_t oldest_time_s = 0;
  749. if (metric)
  750. oldest_time_s = mrg_metric_get_first_time_s(main_mrg, metric);
  751. return oldest_time_s;
  752. }
  753. bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *db_instance, uuid_t *dim_uuid, time_t *first_entry_s, time_t *last_entry_s)
  754. {
  755. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  756. if (unlikely(!ctx)) {
  757. netdata_log_error("DBENGINE: invalid STORAGE INSTANCE to %s()", __FUNCTION__);
  758. return false;
  759. }
  760. METRIC *metric = mrg_metric_get_and_acquire(main_mrg, dim_uuid, (Word_t) ctx);
  761. if (unlikely(!metric))
  762. return false;
  763. time_t update_every_s;
  764. mrg_metric_get_retention(main_mrg, metric, first_entry_s, last_entry_s, &update_every_s);
  765. mrg_metric_release(main_mrg, metric);
  766. return true;
  767. }
  768. uint64_t rrdeng_disk_space_max(STORAGE_INSTANCE *db_instance) {
  769. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  770. return ctx->config.max_disk_space;
  771. }
  772. uint64_t rrdeng_disk_space_used(STORAGE_INSTANCE *db_instance) {
  773. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  774. return __atomic_load_n(&ctx->atomic.current_disk_space, __ATOMIC_RELAXED);
  775. }
  776. time_t rrdeng_global_first_time_s(STORAGE_INSTANCE *db_instance) {
  777. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  778. time_t t = __atomic_load_n(&ctx->atomic.first_time_s, __ATOMIC_RELAXED);
  779. if(t == LONG_MAX || t < 0)
  780. t = 0;
  781. return t;
  782. }
  783. size_t rrdeng_currently_collected_metrics(STORAGE_INSTANCE *db_instance) {
  784. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  785. return __atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED);
  786. }
  787. /*
  788. * Gathers Database Engine statistics.
  789. * Careful when modifying this function.
  790. * You must not change the indices of the statistics or user code will break.
  791. * You must not exceed RRDENG_NR_STATS or it will crash.
  792. */
  793. void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array)
  794. {
  795. if (ctx == NULL)
  796. return;
  797. array[0] = (uint64_t)__atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED); // API producers
  798. array[1] = (uint64_t)__atomic_load_n(&ctx->atomic.inflight_queries, __ATOMIC_RELAXED); // API consumers
  799. array[2] = 0;
  800. array[3] = 0;
  801. array[4] = 0;
  802. array[5] = 0; // (uint64_t)ctx->stats.pg_cache_insertions;
  803. array[6] = 0; // (uint64_t)ctx->stats.pg_cache_deletions;
  804. array[7] = 0; // (uint64_t)ctx->stats.pg_cache_hits;
  805. array[8] = 0; // (uint64_t)ctx->stats.pg_cache_misses;
  806. array[9] = 0; // (uint64_t)ctx->stats.pg_cache_backfills;
  807. array[10] = 0; // (uint64_t)ctx->stats.pg_cache_evictions;
  808. array[11] = (uint64_t)__atomic_load_n(&ctx->stats.before_compress_bytes, __ATOMIC_RELAXED); // used
  809. array[12] = (uint64_t)__atomic_load_n(&ctx->stats.after_compress_bytes, __ATOMIC_RELAXED); // used
  810. array[13] = (uint64_t)__atomic_load_n(&ctx->stats.before_decompress_bytes, __ATOMIC_RELAXED);
  811. array[14] = (uint64_t)__atomic_load_n(&ctx->stats.after_decompress_bytes, __ATOMIC_RELAXED);
  812. array[15] = (uint64_t)__atomic_load_n(&ctx->stats.io_write_bytes, __ATOMIC_RELAXED); // used
  813. array[16] = (uint64_t)__atomic_load_n(&ctx->stats.io_write_requests, __ATOMIC_RELAXED); // used
  814. array[17] = (uint64_t)__atomic_load_n(&ctx->stats.io_read_bytes, __ATOMIC_RELAXED);
  815. array[18] = (uint64_t)__atomic_load_n(&ctx->stats.io_read_requests, __ATOMIC_RELAXED); // used
  816. array[19] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.io_write_extent_bytes, __ATOMIC_RELAXED);
  817. array[20] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.io_write_extents, __ATOMIC_RELAXED);
  818. array[21] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.io_read_extent_bytes, __ATOMIC_RELAXED);
  819. array[22] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.io_read_extents, __ATOMIC_RELAXED);
  820. array[23] = (uint64_t)__atomic_load_n(&ctx->stats.datafile_creations, __ATOMIC_RELAXED);
  821. array[24] = (uint64_t)__atomic_load_n(&ctx->stats.datafile_deletions, __ATOMIC_RELAXED);
  822. array[25] = (uint64_t)__atomic_load_n(&ctx->stats.journalfile_creations, __ATOMIC_RELAXED);
  823. array[26] = (uint64_t)__atomic_load_n(&ctx->stats.journalfile_deletions, __ATOMIC_RELAXED);
  824. array[27] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.page_cache_descriptors, __ATOMIC_RELAXED);
  825. array[28] = (uint64_t)__atomic_load_n(&ctx->stats.io_errors, __ATOMIC_RELAXED);
  826. array[29] = (uint64_t)__atomic_load_n(&ctx->stats.fs_errors, __ATOMIC_RELAXED);
  827. array[30] = (uint64_t)__atomic_load_n(&global_io_errors, __ATOMIC_RELAXED); // used
  828. array[31] = (uint64_t)__atomic_load_n(&global_fs_errors, __ATOMIC_RELAXED); // used
  829. array[32] = (uint64_t)__atomic_load_n(&rrdeng_reserved_file_descriptors, __ATOMIC_RELAXED); // used
  830. array[33] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.pg_cache_over_half_dirty_events, __ATOMIC_RELAXED);
  831. array[34] = (uint64_t)__atomic_load_n(&global_pg_cache_over_half_dirty_events, __ATOMIC_RELAXED); // used
  832. array[35] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.flushing_pressure_page_deletions, __ATOMIC_RELAXED);
  833. array[36] = (uint64_t)__atomic_load_n(&global_flushing_pressure_page_deletions, __ATOMIC_RELAXED); // used
  834. array[37] = 0; //(uint64_t)pg_cache->active_descriptors;
  835. fatal_assert(RRDENG_NR_STATS == 38);
  836. }
  837. static void rrdeng_populate_mrg(struct rrdengine_instance *ctx) {
  838. uv_rwlock_rdlock(&ctx->datafiles.rwlock);
  839. size_t datafiles = 0;
  840. for(struct rrdengine_datafile *df = ctx->datafiles.first; df ;df = df->next)
  841. datafiles++;
  842. uv_rwlock_rdunlock(&ctx->datafiles.rwlock);
  843. ssize_t cpus = (ssize_t)get_netdata_cpus() / (ssize_t)storage_tiers;
  844. if(cpus > (ssize_t)datafiles)
  845. cpus = (ssize_t)datafiles;
  846. if(cpus > (ssize_t)libuv_worker_threads)
  847. cpus = (ssize_t)libuv_worker_threads;
  848. if(cpus >= (ssize_t)get_netdata_cpus() / 2)
  849. cpus = (ssize_t)(get_netdata_cpus() / 2 - 1);
  850. if(cpus < 1)
  851. cpus = 1;
  852. netdata_log_info("DBENGINE: populating retention to MRG from %zu journal files of tier %d, using %zd threads...", datafiles, ctx->config.tier, cpus);
  853. if(datafiles > 2) {
  854. struct rrdengine_datafile *datafile;
  855. datafile = ctx->datafiles.first->prev;
  856. if(!(datafile->journalfile->v2.flags & JOURNALFILE_FLAG_IS_AVAILABLE))
  857. datafile = datafile->prev;
  858. if(datafile->journalfile->v2.flags & JOURNALFILE_FLAG_IS_AVAILABLE) {
  859. journalfile_v2_populate_retention_to_mrg(ctx, datafile->journalfile);
  860. datafile->populate_mrg.populated = true;
  861. }
  862. datafile = ctx->datafiles.first;
  863. if(datafile->journalfile->v2.flags & JOURNALFILE_FLAG_IS_AVAILABLE) {
  864. journalfile_v2_populate_retention_to_mrg(ctx, datafile->journalfile);
  865. datafile->populate_mrg.populated = true;
  866. }
  867. }
  868. ctx->loading.populate_mrg.size = cpus;
  869. ctx->loading.populate_mrg.array = callocz(ctx->loading.populate_mrg.size, sizeof(struct completion));
  870. for (size_t i = 0; i < ctx->loading.populate_mrg.size; i++) {
  871. completion_init(&ctx->loading.populate_mrg.array[i]);
  872. rrdeng_enq_cmd(ctx, RRDENG_OPCODE_CTX_POPULATE_MRG, NULL, &ctx->loading.populate_mrg.array[i],
  873. STORAGE_PRIORITY_INTERNAL_DBENGINE, NULL, NULL);
  874. }
  875. }
  876. void rrdeng_readiness_wait(struct rrdengine_instance *ctx) {
  877. for (size_t i = 0; i < ctx->loading.populate_mrg.size; i++) {
  878. completion_wait_for(&ctx->loading.populate_mrg.array[i]);
  879. completion_destroy(&ctx->loading.populate_mrg.array[i]);
  880. }
  881. freez(ctx->loading.populate_mrg.array);
  882. ctx->loading.populate_mrg.array = NULL;
  883. ctx->loading.populate_mrg.size = 0;
  884. netdata_log_info("DBENGINE: tier %d is ready for data collection and queries", ctx->config.tier);
  885. }
  886. bool rrdeng_is_legacy(STORAGE_INSTANCE *db_instance) {
  887. struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
  888. return ctx->config.legacy;
  889. }
  890. void rrdeng_exit_mode(struct rrdengine_instance *ctx) {
  891. __atomic_store_n(&ctx->quiesce.exit_mode, true, __ATOMIC_RELAXED);
  892. }
  893. /*
  894. * Returns 0 on success, negative on error
  895. */
  896. int rrdeng_init(struct rrdengine_instance **ctxp, const char *dbfiles_path,
  897. unsigned disk_space_mb, size_t tier) {
  898. struct rrdengine_instance *ctx;
  899. uint32_t max_open_files;
  900. max_open_files = rlimit_nofile.rlim_cur / 4;
  901. /* reserve RRDENG_FD_BUDGET_PER_INSTANCE file descriptors for this instance */
  902. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, RRDENG_FD_BUDGET_PER_INSTANCE);
  903. if (rrdeng_reserved_file_descriptors > max_open_files) {
  904. netdata_log_error(
  905. "Exceeded the budget of available file descriptors (%u/%u), cannot create new dbengine instance.",
  906. (unsigned)rrdeng_reserved_file_descriptors,
  907. (unsigned)max_open_files);
  908. rrd_stat_atomic_add(&global_fs_errors, 1);
  909. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  910. return UV_EMFILE;
  911. }
  912. if(NULL == ctxp) {
  913. ctx = multidb_ctx[tier];
  914. memset(ctx, 0, sizeof(*ctx));
  915. ctx->config.legacy = false;
  916. }
  917. else {
  918. *ctxp = ctx = callocz(1, sizeof(*ctx));
  919. ctx->config.legacy = true;
  920. }
  921. ctx->config.tier = (int)tier;
  922. ctx->config.page_type = tier_page_type[tier];
  923. ctx->config.global_compress_alg = RRD_LZ4;
  924. if (disk_space_mb < RRDENG_MIN_DISK_SPACE_MB)
  925. disk_space_mb = RRDENG_MIN_DISK_SPACE_MB;
  926. ctx->config.max_disk_space = disk_space_mb * 1048576LLU;
  927. strncpyz(ctx->config.dbfiles_path, dbfiles_path, sizeof(ctx->config.dbfiles_path) - 1);
  928. ctx->config.dbfiles_path[sizeof(ctx->config.dbfiles_path) - 1] = '\0';
  929. ctx->atomic.transaction_id = 1;
  930. ctx->quiesce.enabled = false;
  931. rw_spinlock_init(&ctx->njfv2idx.spinlock);
  932. ctx->atomic.first_time_s = LONG_MAX;
  933. if (rrdeng_dbengine_spawn(ctx) && !init_rrd_files(ctx)) {
  934. // success - we run this ctx too
  935. rrdeng_populate_mrg(ctx);
  936. return 0;
  937. }
  938. if (ctx->config.legacy) {
  939. freez(ctx);
  940. if (ctxp)
  941. *ctxp = NULL;
  942. }
  943. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  944. return UV_EIO;
  945. }
  946. size_t rrdeng_collectors_running(struct rrdengine_instance *ctx) {
  947. return __atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED);
  948. }
  949. /*
  950. * Returns 0 on success, 1 on error
  951. */
  952. int rrdeng_exit(struct rrdengine_instance *ctx) {
  953. if (NULL == ctx)
  954. return 1;
  955. // FIXME - ktsaou - properly cleanup ctx
  956. // 1. make sure all collectors are stopped
  957. // 2. make new queries will not be accepted (this is quiesce that has already run)
  958. // 3. flush this section of the main cache
  959. // 4. then wait for completion
  960. bool logged = false;
  961. size_t count = 10;
  962. while(__atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED) && count && !unittest_running) {
  963. if(!logged) {
  964. netdata_log_info("DBENGINE: waiting for collectors to finish on tier %d...", (ctx->config.legacy) ? -1 : ctx->config.tier);
  965. logged = true;
  966. }
  967. sleep_usec(100 * USEC_PER_MS);
  968. count--;
  969. }
  970. netdata_log_info("DBENGINE: flushing main cache for tier %d", (ctx->config.legacy) ? -1 : ctx->config.tier);
  971. pgc_flush_all_hot_and_dirty_pages(main_cache, (Word_t)ctx);
  972. netdata_log_info("DBENGINE: shutting down tier %d", (ctx->config.legacy) ? -1 : ctx->config.tier);
  973. struct completion completion = {};
  974. completion_init(&completion);
  975. rrdeng_enq_cmd(ctx, RRDENG_OPCODE_CTX_SHUTDOWN, NULL, &completion, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL);
  976. completion_wait_for(&completion);
  977. completion_destroy(&completion);
  978. finalize_rrd_files(ctx);
  979. if(ctx->config.legacy)
  980. freez(ctx);
  981. rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
  982. return 0;
  983. }
  984. void rrdeng_prepare_exit(struct rrdengine_instance *ctx) {
  985. if (NULL == ctx)
  986. return;
  987. // FIXME - ktsaou - properly cleanup ctx
  988. // 1. make sure all collectors are stopped
  989. completion_init(&ctx->quiesce.completion);
  990. rrdeng_enq_cmd(ctx, RRDENG_OPCODE_CTX_QUIESCE, NULL, NULL, STORAGE_PRIORITY_INTERNAL_DBENGINE, NULL, NULL);
  991. }
  992. static void populate_v2_statistics(struct rrdengine_datafile *datafile, RRDENG_SIZE_STATS *stats)
  993. {
  994. struct journal_v2_header *j2_header = journalfile_v2_data_acquire(datafile->journalfile, NULL, 0, 0);
  995. void *data_start = (void *)j2_header;
  996. if(unlikely(!j2_header))
  997. return;
  998. stats->extents += j2_header->extent_count;
  999. unsigned entries;
  1000. struct journal_extent_list *extent_list = (void *) (data_start + j2_header->extent_offset);
  1001. for (entries = 0; entries < j2_header->extent_count; entries++) {
  1002. stats->extents_compressed_bytes += extent_list->datafile_size;
  1003. stats->extents_pages += extent_list->pages;
  1004. extent_list++;
  1005. }
  1006. struct journal_metric_list *metric = (void *) (data_start + j2_header->metric_offset);
  1007. time_t journal_start_time_s = (time_t) (j2_header->start_time_ut / USEC_PER_SEC);
  1008. stats->metrics += j2_header->metric_count;
  1009. for (entries = 0; entries < j2_header->metric_count; entries++) {
  1010. struct journal_page_header *metric_list_header = (void *) (data_start + metric->page_offset);
  1011. stats->metrics_pages += metric_list_header->entries;
  1012. struct journal_page_list *descr = (void *) (data_start + metric->page_offset + sizeof(struct journal_page_header));
  1013. for (uint32_t idx=0; idx < metric_list_header->entries; idx++) {
  1014. time_t update_every_s;
  1015. size_t points = descr->page_length / CTX_POINT_SIZE_BYTES(datafile->ctx);
  1016. time_t start_time_s = journal_start_time_s + descr->delta_start_s;
  1017. time_t end_time_s = journal_start_time_s + descr->delta_end_s;
  1018. if(likely(points > 1))
  1019. update_every_s = (time_t) ((end_time_s - start_time_s) / (points - 1));
  1020. else {
  1021. update_every_s = (time_t) (default_rrd_update_every * get_tier_grouping(datafile->ctx->config.tier));
  1022. stats->single_point_pages++;
  1023. }
  1024. time_t duration_s = (time_t)((end_time_s - start_time_s + update_every_s));
  1025. stats->pages_uncompressed_bytes += descr->page_length;
  1026. stats->pages_duration_secs += duration_s;
  1027. stats->points += points;
  1028. stats->page_types[descr->type].pages++;
  1029. stats->page_types[descr->type].pages_uncompressed_bytes += descr->page_length;
  1030. stats->page_types[descr->type].pages_duration_secs += duration_s;
  1031. stats->page_types[descr->type].points += points;
  1032. if(!stats->first_time_s || (start_time_s - update_every_s) < stats->first_time_s)
  1033. stats->first_time_s = (start_time_s - update_every_s);
  1034. if(!stats->last_time_s || end_time_s > stats->last_time_s)
  1035. stats->last_time_s = end_time_s;
  1036. descr++;
  1037. }
  1038. metric++;
  1039. }
  1040. journalfile_v2_data_release(datafile->journalfile);
  1041. }
  1042. RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx) {
  1043. RRDENG_SIZE_STATS stats = { 0 };
  1044. uv_rwlock_rdlock(&ctx->datafiles.rwlock);
  1045. for(struct rrdengine_datafile *df = ctx->datafiles.first; df ;df = df->next) {
  1046. stats.datafiles++;
  1047. populate_v2_statistics(df, &stats);
  1048. }
  1049. uv_rwlock_rdunlock(&ctx->datafiles.rwlock);
  1050. stats.currently_collected_metrics = __atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED);
  1051. internal_error(stats.metrics_pages != stats.extents_pages + stats.currently_collected_metrics,
  1052. "DBENGINE: metrics pages is %zu, but extents pages is %zu and API consumers is %zu",
  1053. stats.metrics_pages, stats.extents_pages, stats.currently_collected_metrics);
  1054. stats.disk_space = ctx_current_disk_space_get(ctx);
  1055. stats.max_disk_space = ctx->config.max_disk_space;
  1056. stats.database_retention_secs = (time_t)(stats.last_time_s - stats.first_time_s);
  1057. if(stats.extents_pages)
  1058. stats.average_page_size_bytes = (double)stats.pages_uncompressed_bytes / (double)stats.extents_pages;
  1059. if(stats.pages_uncompressed_bytes > 0)
  1060. stats.average_compression_savings = 100.0 - ((double)stats.extents_compressed_bytes * 100.0 / (double)stats.pages_uncompressed_bytes);
  1061. if(stats.points)
  1062. stats.average_point_duration_secs = (double)stats.pages_duration_secs / (double)stats.points;
  1063. if(stats.metrics) {
  1064. stats.average_metric_retention_secs = (double)stats.pages_duration_secs / (double)stats.metrics;
  1065. if(stats.database_retention_secs) {
  1066. double metric_coverage = stats.average_metric_retention_secs / (double)stats.database_retention_secs;
  1067. double db_retention_days = (double)stats.database_retention_secs / 86400.0;
  1068. stats.estimated_concurrently_collected_metrics = stats.metrics * metric_coverage;
  1069. stats.ephemeral_metrics_per_day_percent = ((double)stats.metrics * 100.0 / (double)stats.estimated_concurrently_collected_metrics - 100.0) / (double)db_retention_days;
  1070. }
  1071. }
  1072. // stats.sizeof_metric = 0;
  1073. stats.sizeof_datafile = struct_natural_alignment(sizeof(struct rrdengine_datafile)) + struct_natural_alignment(sizeof(struct rrdengine_journalfile));
  1074. stats.sizeof_page_in_cache = 0; // struct_natural_alignment(sizeof(struct page_cache_descr));
  1075. stats.sizeof_point_data = page_type_size[ctx->config.page_type];
  1076. stats.sizeof_page_data = tier_page_size[ctx->config.tier];
  1077. stats.pages_per_extent = rrdeng_pages_per_extent;
  1078. // stats.sizeof_metric_in_index = 40;
  1079. // stats.sizeof_page_in_index = 24;
  1080. stats.default_granularity_secs = (size_t)default_rrd_update_every * get_tier_grouping(ctx->config.tier);
  1081. return stats;
  1082. }
  1083. struct rrdeng_cache_efficiency_stats rrdeng_get_cache_efficiency_stats(void) {
  1084. // FIXME - make cache efficiency stats atomic
  1085. return rrdeng_cache_efficiency_stats;
  1086. }