pagecache.c 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #define NETDATA_RRD_INTERNALS
  3. #include "rrdengine.h"
  4. MRG *main_mrg = NULL;
  5. PGC *main_cache = NULL;
  6. PGC *open_cache = NULL;
  7. PGC *extent_cache = NULL;
  8. struct rrdeng_cache_efficiency_stats rrdeng_cache_efficiency_stats = {};
  9. static void main_cache_free_clean_page_callback(PGC *cache __maybe_unused, PGC_ENTRY entry __maybe_unused)
  10. {
  11. // Release storage associated with the page
  12. pgd_free(entry.data);
  13. }
  14. static void main_cache_flush_dirty_page_init_callback(PGC *cache __maybe_unused, Word_t section) {
  15. struct rrdengine_instance *ctx = (struct rrdengine_instance *) section;
  16. // mark ctx as having flushing in progress
  17. __atomic_add_fetch(&ctx->atomic.extents_currently_being_flushed, 1, __ATOMIC_RELAXED);
  18. }
  19. static void main_cache_flush_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused)
  20. {
  21. if(!entries)
  22. return;
  23. struct rrdengine_instance *ctx = (struct rrdengine_instance *) entries_array[0].section;
  24. struct page_descr_with_data *base = NULL;
  25. for (size_t Index = 0 ; Index < entries; Index++) {
  26. time_t start_time_s = entries_array[Index].start_time_s;
  27. time_t end_time_s = entries_array[Index].end_time_s;
  28. struct page_descr_with_data *descr = page_descriptor_get();
  29. descr->id = mrg_metric_uuid(main_mrg, (METRIC *) entries_array[Index].metric_id);
  30. descr->metric_id = entries_array[Index].metric_id;
  31. descr->start_time_ut = start_time_s * USEC_PER_SEC;
  32. descr->end_time_ut = end_time_s * USEC_PER_SEC;
  33. descr->update_every_s = entries_array[Index].update_every_s;
  34. descr->pgd = pgc_page_data(pages_array[Index]);
  35. descr->type = pgd_type(descr->pgd);
  36. descr->page_length = pgd_disk_footprint(descr->pgd);
  37. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(base, descr, link.prev, link.next);
  38. // TODO: ask @stelfrag/@ktsaou about this.
  39. // internal_fatal(descr->page_length > RRDENG_BLOCK_SIZE, "DBENGINE: faulty page length calculation");
  40. }
  41. struct completion completion;
  42. completion_init(&completion);
  43. rrdeng_enq_cmd(ctx, RRDENG_OPCODE_EXTENT_WRITE, base, &completion, STORAGE_PRIORITY_INTERNAL_DBENGINE, NULL, NULL);
  44. completion_wait_for(&completion);
  45. completion_destroy(&completion);
  46. }
  47. static void open_cache_free_clean_page_callback(PGC *cache __maybe_unused, PGC_ENTRY entry __maybe_unused)
  48. {
  49. struct rrdengine_datafile *datafile = entry.data;
  50. datafile_release(datafile, DATAFILE_ACQUIRE_OPEN_CACHE);
  51. }
  52. static void open_cache_flush_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused)
  53. {
  54. ;
  55. }
  56. static void extent_cache_free_clean_page_callback(PGC *cache __maybe_unused, PGC_ENTRY entry __maybe_unused)
  57. {
  58. dbengine_extent_free(entry.data, entry.size);
  59. }
  60. static void extent_cache_flush_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused)
  61. {
  62. ;
  63. }
  64. inline TIME_RANGE_COMPARE is_page_in_time_range(time_t page_first_time_s, time_t page_last_time_s, time_t wanted_start_time_s, time_t wanted_end_time_s) {
  65. // page_first_time_s <= wanted_end_time_s && page_last_time_s >= wanted_start_time_s
  66. if(page_last_time_s < wanted_start_time_s)
  67. return PAGE_IS_IN_THE_PAST;
  68. if(page_first_time_s > wanted_end_time_s)
  69. return PAGE_IS_IN_THE_FUTURE;
  70. return PAGE_IS_IN_RANGE;
  71. }
  72. static inline struct page_details *pdc_find_page_for_time(
  73. Pcvoid_t PArray,
  74. time_t wanted_time_s,
  75. size_t *gaps,
  76. PDC_PAGE_STATUS mode,
  77. PDC_PAGE_STATUS skip_list
  78. ) {
  79. Word_t PIndexF = wanted_time_s, PIndexL = wanted_time_s;
  80. Pvoid_t *PValueF, *PValueL;
  81. struct page_details *pdF = NULL, *pdL = NULL;
  82. bool firstF = true, firstL = true;
  83. PDC_PAGE_STATUS ignore_list = PDC_PAGE_QUERY_GLOBAL_SKIP_LIST | skip_list;
  84. while ((PValueF = PDCJudyLFirstThenNext(PArray, &PIndexF, &firstF))) {
  85. pdF = *PValueF;
  86. PDC_PAGE_STATUS status = __atomic_load_n(&pdF->status, __ATOMIC_ACQUIRE);
  87. if (!(status & (ignore_list | mode)))
  88. break;
  89. pdF = NULL;
  90. }
  91. while ((PValueL = PDCJudyLLastThenPrev(PArray, &PIndexL, &firstL))) {
  92. pdL = *PValueL;
  93. PDC_PAGE_STATUS status = __atomic_load_n(&pdL->status, __ATOMIC_ACQUIRE);
  94. if(status & mode) {
  95. // don't go all the way back to the beginning
  96. // stop at the last processed
  97. pdL = NULL;
  98. break;
  99. }
  100. if (!(status & ignore_list))
  101. break;
  102. pdL = NULL;
  103. }
  104. TIME_RANGE_COMPARE rcF = (pdF) ? is_page_in_time_range(pdF->first_time_s, pdF->last_time_s, wanted_time_s, wanted_time_s) : PAGE_IS_IN_THE_FUTURE;
  105. TIME_RANGE_COMPARE rcL = (pdL) ? is_page_in_time_range(pdL->first_time_s, pdL->last_time_s, wanted_time_s, wanted_time_s) : PAGE_IS_IN_THE_PAST;
  106. if (!pdF || pdF == pdL) {
  107. // F is missing, or they are the same
  108. // return L
  109. (*gaps) += (rcL == PAGE_IS_IN_RANGE) ? 0 : 1;
  110. return pdL;
  111. }
  112. if (!pdL) {
  113. // L is missing
  114. // return F
  115. (*gaps) += (rcF == PAGE_IS_IN_RANGE) ? 0 : 1;
  116. return pdF;
  117. }
  118. if (rcF == rcL) {
  119. // both are on the same side,
  120. // but they are different pages
  121. switch (rcF) {
  122. case PAGE_IS_IN_RANGE:
  123. // pick the higher resolution
  124. if (pdF->update_every_s && pdF->update_every_s < pdL->update_every_s)
  125. return pdF;
  126. if (pdL->update_every_s && pdL->update_every_s < pdF->update_every_s)
  127. return pdL;
  128. // same resolution - pick the one that starts earlier
  129. if (pdL->first_time_s < pdF->first_time_s)
  130. return pdL;
  131. return pdF;
  132. break;
  133. case PAGE_IS_IN_THE_FUTURE:
  134. (*gaps)++;
  135. // pick the one that starts earlier
  136. if (pdL->first_time_s < pdF->first_time_s)
  137. return pdL;
  138. return pdF;
  139. break;
  140. default:
  141. case PAGE_IS_IN_THE_PAST:
  142. (*gaps)++;
  143. return NULL;
  144. break;
  145. }
  146. }
  147. if(rcF == PAGE_IS_IN_RANGE) {
  148. // (*gaps) += 0;
  149. return pdF;
  150. }
  151. if(rcL == PAGE_IS_IN_RANGE) {
  152. // (*gaps) += 0;
  153. return pdL;
  154. }
  155. if(rcF == PAGE_IS_IN_THE_FUTURE) {
  156. (*gaps)++;
  157. return pdF;
  158. }
  159. if(rcL == PAGE_IS_IN_THE_FUTURE) {
  160. (*gaps)++;
  161. return pdL;
  162. }
  163. // impossible case
  164. (*gaps)++;
  165. return NULL;
  166. }
  167. static size_t get_page_list_from_pgc(PGC *cache, METRIC *metric, struct rrdengine_instance *ctx,
  168. time_t wanted_start_time_s, time_t wanted_end_time_s,
  169. Pvoid_t *JudyL_page_array, size_t *cache_gaps,
  170. bool open_cache_mode, PDC_PAGE_STATUS tags) {
  171. size_t pages_found_in_cache = 0;
  172. Word_t metric_id = mrg_metric_id(main_mrg, metric);
  173. time_t now_s = wanted_start_time_s;
  174. uint32_t dt_s = mrg_metric_get_update_every_s(main_mrg, metric);
  175. if(!dt_s)
  176. dt_s = default_rrd_update_every;
  177. time_t previous_page_end_time_s = now_s - dt_s;
  178. bool first = true;
  179. do {
  180. PGC_PAGE *page = pgc_page_get_and_acquire(
  181. cache, (Word_t)ctx, (Word_t)metric_id, now_s,
  182. (first) ? PGC_SEARCH_CLOSEST : PGC_SEARCH_NEXT);
  183. first = false;
  184. if(!page) {
  185. if(previous_page_end_time_s < wanted_end_time_s)
  186. (*cache_gaps)++;
  187. break;
  188. }
  189. time_t page_start_time_s = pgc_page_start_time_s(page);
  190. time_t page_end_time_s = pgc_page_end_time_s(page);
  191. uint32_t page_update_every_s = pgc_page_update_every_s(page);
  192. if(!page_update_every_s)
  193. page_update_every_s = dt_s;
  194. if(is_page_in_time_range(page_start_time_s, page_end_time_s, wanted_start_time_s, wanted_end_time_s) != PAGE_IS_IN_RANGE) {
  195. // not a useful page for this query
  196. pgc_page_release(cache, page);
  197. page = NULL;
  198. if(previous_page_end_time_s < wanted_end_time_s)
  199. (*cache_gaps)++;
  200. break;
  201. }
  202. if (page_start_time_s - previous_page_end_time_s > dt_s)
  203. (*cache_gaps)++;
  204. Pvoid_t *PValue = PDCJudyLIns(JudyL_page_array, (Word_t) page_start_time_s, PJE0);
  205. if (!PValue || PValue == PJERR)
  206. fatal("DBENGINE: corrupted judy array in %s()", __FUNCTION__ );
  207. if (unlikely(*PValue))
  208. // already exists in our list
  209. pgc_page_release(cache, page);
  210. else {
  211. internal_fatal(pgc_page_metric(page) != metric_id, "Wrong metric id in page found in cache");
  212. internal_fatal(pgc_page_section(page) != (Word_t)ctx, "Wrong section in page found in cache");
  213. struct page_details *pd = page_details_get();
  214. pd->metric_id = metric_id;
  215. pd->first_time_s = page_start_time_s;
  216. pd->last_time_s = page_end_time_s;
  217. pd->update_every_s = page_update_every_s;
  218. pd->page = (open_cache_mode) ? NULL : page;
  219. pd->status |= tags;
  220. if((pd->page)) {
  221. pd->status |= PDC_PAGE_READY | PDC_PAGE_PRELOADED;
  222. if(pgd_is_empty(pgc_page_data(page)))
  223. pd->status |= PDC_PAGE_EMPTY;
  224. }
  225. if(open_cache_mode) {
  226. struct rrdengine_datafile *datafile = pgc_page_data(page);
  227. if(datafile_acquire(datafile, DATAFILE_ACQUIRE_PAGE_DETAILS)) { // for pd
  228. struct extent_io_data *xio = (struct extent_io_data *) pgc_page_custom_data(cache, page);
  229. pd->datafile.ptr = pgc_page_data(page);
  230. pd->datafile.file = xio->file;
  231. pd->datafile.extent.pos = xio->pos;
  232. pd->datafile.extent.bytes = xio->bytes;
  233. pd->datafile.fileno = pd->datafile.ptr->fileno;
  234. pd->status |= PDC_PAGE_DATAFILE_ACQUIRED | PDC_PAGE_DISK_PENDING;
  235. }
  236. else {
  237. pd->status |= PDC_PAGE_FAILED | PDC_PAGE_FAILED_TO_ACQUIRE_DATAFILE;
  238. }
  239. pgc_page_release(cache, page);
  240. }
  241. *PValue = pd;
  242. pages_found_in_cache++;
  243. }
  244. // prepare for the next iteration
  245. previous_page_end_time_s = page_end_time_s;
  246. if(page_update_every_s > 0)
  247. dt_s = page_update_every_s;
  248. // we are going to as for the NEXT page
  249. // so, set this to our first time
  250. now_s = page_start_time_s;
  251. } while(now_s <= wanted_end_time_s);
  252. return pages_found_in_cache;
  253. }
  254. static void pgc_inject_gap(struct rrdengine_instance *ctx, METRIC *metric, time_t start_time_s, time_t end_time_s) {
  255. time_t db_first_time_s, db_last_time_s;
  256. mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, NULL);
  257. if(is_page_in_time_range(start_time_s, end_time_s, db_first_time_s, db_last_time_s) != PAGE_IS_IN_RANGE)
  258. return;
  259. PGC_ENTRY page_entry = {
  260. .hot = false,
  261. .section = (Word_t)ctx,
  262. .metric_id = (Word_t)metric,
  263. .start_time_s = MAX(start_time_s, db_first_time_s),
  264. .end_time_s = MIN(end_time_s, db_last_time_s),
  265. .update_every_s = 0,
  266. .size = 0,
  267. .data = PGD_EMPTY,
  268. };
  269. if(page_entry.start_time_s >= page_entry.end_time_s)
  270. return;
  271. PGC_PAGE *page = pgc_page_add_and_acquire(main_cache, page_entry, NULL);
  272. pgc_page_release(main_cache, page);
  273. }
  274. static size_t list_has_time_gaps(
  275. struct rrdengine_instance *ctx,
  276. METRIC *metric,
  277. Pvoid_t JudyL_page_array,
  278. time_t wanted_start_time_s,
  279. time_t wanted_end_time_s,
  280. size_t *pages_total,
  281. size_t *pages_found_pass4,
  282. size_t *pages_to_load_from_disk,
  283. size_t *pages_overlapping,
  284. time_t *optimal_end_time_s,
  285. bool populate_gaps,
  286. PDC_PAGE_STATUS *common_status
  287. ) {
  288. // we will recalculate these, so zero them
  289. *pages_to_load_from_disk = 0;
  290. *pages_overlapping = 0;
  291. *optimal_end_time_s = 0;
  292. *common_status = 0;
  293. bool first;
  294. Pvoid_t *PValue;
  295. Word_t this_page_start_time;
  296. struct page_details *pd;
  297. size_t gaps = 0;
  298. Word_t metric_id = mrg_metric_id(main_mrg, metric);
  299. // ------------------------------------------------------------------------
  300. // PASS 1: remove the preprocessing flags from the pages in PDC
  301. first = true;
  302. this_page_start_time = 0;
  303. while((PValue = PDCJudyLFirstThenNext(JudyL_page_array, &this_page_start_time, &first))) {
  304. pd = *PValue;
  305. pd->status &= ~(PDC_PAGE_SKIP|PDC_PAGE_PREPROCESSED);
  306. }
  307. // ------------------------------------------------------------------------
  308. // PASS 2: emulate processing to find the useful pages
  309. time_t now_s = wanted_start_time_s;
  310. time_t dt_s = mrg_metric_get_update_every_s(main_mrg, metric);
  311. if(!dt_s)
  312. dt_s = default_rrd_update_every;
  313. size_t pages_pass2 = 0, pages_pass3 = 0;
  314. while((pd = pdc_find_page_for_time(
  315. JudyL_page_array, now_s, &gaps,
  316. PDC_PAGE_PREPROCESSED, 0))) {
  317. pd->status |= PDC_PAGE_PREPROCESSED;
  318. pages_pass2++;
  319. if(pd->update_every_s)
  320. dt_s = pd->update_every_s;
  321. if(populate_gaps && pd->first_time_s > now_s)
  322. pgc_inject_gap(ctx, metric, now_s, pd->first_time_s);
  323. now_s = pd->last_time_s + dt_s;
  324. if(now_s > wanted_end_time_s) {
  325. *optimal_end_time_s = pd->last_time_s;
  326. break;
  327. }
  328. }
  329. if(populate_gaps && now_s < wanted_end_time_s)
  330. pgc_inject_gap(ctx, metric, now_s, wanted_end_time_s);
  331. // ------------------------------------------------------------------------
  332. // PASS 3: mark as skipped all the pages not useful
  333. first = true;
  334. this_page_start_time = 0;
  335. while((PValue = PDCJudyLFirstThenNext(JudyL_page_array, &this_page_start_time, &first))) {
  336. pd = *PValue;
  337. internal_fatal(pd->metric_id != metric_id, "pd has wrong metric_id");
  338. if(!(pd->status & PDC_PAGE_PREPROCESSED)) {
  339. (*pages_overlapping)++;
  340. pd->status |= PDC_PAGE_SKIP;
  341. pd->status &= ~(PDC_PAGE_READY | PDC_PAGE_DISK_PENDING);
  342. *common_status |= pd->status;
  343. continue;
  344. }
  345. pages_pass3++;
  346. if(!pd->page) {
  347. pd->page = pgc_page_get_and_acquire(main_cache, (Word_t) ctx, (Word_t) metric_id, pd->first_time_s, PGC_SEARCH_EXACT);
  348. if(pd->page) {
  349. (*pages_found_pass4)++;
  350. pd->status &= ~PDC_PAGE_DISK_PENDING;
  351. pd->status |= PDC_PAGE_READY | PDC_PAGE_PRELOADED | PDC_PAGE_PRELOADED_PASS4;
  352. if(pgd_is_empty(pgc_page_data(pd->page)))
  353. pd->status |= PDC_PAGE_EMPTY;
  354. }
  355. else if(!(pd->status & PDC_PAGE_FAILED) && (pd->status & PDC_PAGE_DATAFILE_ACQUIRED)) {
  356. (*pages_to_load_from_disk)++;
  357. pd->status |= PDC_PAGE_DISK_PENDING;
  358. internal_fatal(pd->status & PDC_PAGE_SKIP, "page is disk pending and skipped");
  359. internal_fatal(!pd->datafile.ptr, "datafile is NULL");
  360. internal_fatal(!pd->datafile.extent.bytes, "datafile.extent.bytes zero");
  361. internal_fatal(!pd->datafile.extent.pos, "datafile.extent.pos is zero");
  362. internal_fatal(!pd->datafile.fileno, "datafile.fileno is zero");
  363. }
  364. }
  365. else {
  366. pd->status &= ~PDC_PAGE_DISK_PENDING;
  367. pd->status |= (PDC_PAGE_READY | PDC_PAGE_PRELOADED);
  368. }
  369. *common_status |= pd->status;
  370. }
  371. internal_fatal(pages_pass2 != pages_pass3,
  372. "DBENGINE: page count does not match");
  373. *pages_total = pages_pass2;
  374. return gaps;
  375. }
  376. // ----------------------------------------------------------------------------
  377. typedef void (*page_found_callback_t)(PGC_PAGE *page, void *data);
  378. static size_t get_page_list_from_journal_v2(struct rrdengine_instance *ctx, METRIC *metric, usec_t start_time_ut, usec_t end_time_ut, page_found_callback_t callback, void *callback_data) {
  379. uuid_t *uuid = mrg_metric_uuid(main_mrg, metric);
  380. Word_t metric_id = mrg_metric_id(main_mrg, metric);
  381. time_t wanted_start_time_s = (time_t)(start_time_ut / USEC_PER_SEC);
  382. time_t wanted_end_time_s = (time_t)(end_time_ut / USEC_PER_SEC);
  383. size_t pages_found = 0;
  384. NJFV2IDX_FIND_STATE state = {
  385. .init = false,
  386. .last = 0,
  387. .ctx = ctx,
  388. .wanted_start_time_s = wanted_start_time_s,
  389. .wanted_end_time_s = wanted_end_time_s,
  390. .j2_header_acquired = NULL,
  391. };
  392. struct rrdengine_datafile *datafile;
  393. while((datafile = njfv2idx_find_and_acquire_j2_header(&state))) {
  394. struct journal_v2_header *j2_header = state.j2_header_acquired;
  395. if (unlikely(!j2_header))
  396. continue;
  397. time_t journal_start_time_s = (time_t)(j2_header->start_time_ut / USEC_PER_SEC);
  398. // the datafile possibly contains useful data for this query
  399. size_t journal_metric_count = (size_t)j2_header->metric_count;
  400. struct journal_metric_list *uuid_list = (struct journal_metric_list *)((uint8_t *) j2_header + j2_header->metric_offset);
  401. struct journal_metric_list *uuid_entry = bsearch(uuid,uuid_list,journal_metric_count,sizeof(*uuid_list), journal_metric_uuid_compare);
  402. if (unlikely(!uuid_entry)) {
  403. // our UUID is not in this datafile
  404. journalfile_v2_data_release(datafile->journalfile);
  405. continue;
  406. }
  407. struct journal_page_header *page_list_header = (struct journal_page_header *) ((uint8_t *) j2_header + uuid_entry->page_offset);
  408. struct journal_page_list *page_list = (struct journal_page_list *)((uint8_t *) page_list_header + sizeof(*page_list_header));
  409. struct journal_extent_list *extent_list = (void *)((uint8_t *)j2_header + j2_header->extent_offset);
  410. uint32_t uuid_page_entries = page_list_header->entries;
  411. for (uint32_t index = 0; index < uuid_page_entries; index++) {
  412. struct journal_page_list *page_entry_in_journal = &page_list[index];
  413. time_t page_first_time_s = page_entry_in_journal->delta_start_s + journal_start_time_s;
  414. time_t page_last_time_s = page_entry_in_journal->delta_end_s + journal_start_time_s;
  415. TIME_RANGE_COMPARE prc = is_page_in_time_range(page_first_time_s, page_last_time_s, wanted_start_time_s, wanted_end_time_s);
  416. if(prc == PAGE_IS_IN_THE_PAST)
  417. continue;
  418. if(prc == PAGE_IS_IN_THE_FUTURE)
  419. break;
  420. uint32_t page_update_every_s = page_entry_in_journal->update_every_s;
  421. size_t page_length = page_entry_in_journal->page_length;
  422. if(datafile_acquire(datafile, DATAFILE_ACQUIRE_OPEN_CACHE)) { //for open cache item
  423. // add this page to open cache
  424. bool added = false;
  425. struct extent_io_data ei = {
  426. .pos = extent_list[page_entry_in_journal->extent_index].datafile_offset,
  427. .bytes = extent_list[page_entry_in_journal->extent_index].datafile_size,
  428. .page_length = page_length,
  429. .file = datafile->file,
  430. .fileno = datafile->fileno,
  431. };
  432. PGC_PAGE *page = pgc_page_add_and_acquire(open_cache, (PGC_ENTRY) {
  433. .hot = false,
  434. .section = (Word_t) ctx,
  435. .metric_id = metric_id,
  436. .start_time_s = page_first_time_s,
  437. .end_time_s = page_last_time_s,
  438. .update_every_s = page_update_every_s,
  439. .data = datafile,
  440. .size = 0,
  441. .custom_data = (uint8_t *) &ei,
  442. }, &added);
  443. if(!added)
  444. datafile_release(datafile, DATAFILE_ACQUIRE_OPEN_CACHE);
  445. callback(page, callback_data);
  446. pgc_page_release(open_cache, page);
  447. pages_found++;
  448. }
  449. }
  450. journalfile_v2_data_release(datafile->journalfile);
  451. }
  452. return pages_found;
  453. }
  454. void add_page_details_from_journal_v2(PGC_PAGE *page, void *JudyL_pptr) {
  455. struct rrdengine_datafile *datafile = pgc_page_data(page);
  456. if(!datafile_acquire(datafile, DATAFILE_ACQUIRE_PAGE_DETAILS)) // for pd
  457. return;
  458. Pvoid_t *PValue = PDCJudyLIns(JudyL_pptr, pgc_page_start_time_s(page), PJE0);
  459. if (!PValue || PValue == PJERR)
  460. fatal("DBENGINE: corrupted judy array");
  461. if (unlikely(*PValue)) {
  462. datafile_release(datafile, DATAFILE_ACQUIRE_PAGE_DETAILS);
  463. return;
  464. }
  465. Word_t metric_id = pgc_page_metric(page);
  466. // let's add it to the judy
  467. struct extent_io_data *ei = pgc_page_custom_data(open_cache, page);
  468. struct page_details *pd = page_details_get();
  469. *PValue = pd;
  470. pd->datafile.extent.pos = ei->pos;
  471. pd->datafile.extent.bytes = ei->bytes;
  472. pd->datafile.file = ei->file;
  473. pd->datafile.fileno = ei->fileno;
  474. pd->first_time_s = pgc_page_start_time_s(page);
  475. pd->last_time_s = pgc_page_end_time_s(page);
  476. pd->datafile.ptr = datafile;
  477. pd->update_every_s = (uint32_t) pgc_page_update_every_s(page);
  478. pd->metric_id = metric_id;
  479. pd->status |= PDC_PAGE_DISK_PENDING | PDC_PAGE_SOURCE_JOURNAL_V2 | PDC_PAGE_DATAFILE_ACQUIRED;
  480. }
  481. // Return a judyL will all pages that have start_time_ut and end_time_ut
  482. // Pvalue of the judy will be the end time for that page
  483. // DBENGINE2:
  484. #define time_delta(finish, pass) do { if(pass) { usec_t t = pass; (pass) = (finish) - (pass); (finish) = t; } } while(0)
  485. static Pvoid_t get_page_list(
  486. struct rrdengine_instance *ctx,
  487. METRIC *metric,
  488. usec_t start_time_ut,
  489. usec_t end_time_ut,
  490. time_t *optimal_end_time_s,
  491. size_t *pages_to_load_from_disk,
  492. PDC_PAGE_STATUS *common_status
  493. ) {
  494. *optimal_end_time_s = 0;
  495. *pages_to_load_from_disk = 0;
  496. *common_status = 0;
  497. Pvoid_t JudyL_page_array = (Pvoid_t) NULL;
  498. time_t wanted_start_time_s = (time_t)(start_time_ut / USEC_PER_SEC);
  499. time_t wanted_end_time_s = (time_t)(end_time_ut / USEC_PER_SEC);
  500. size_t pages_found_in_main_cache = 0,
  501. pages_found_in_open_cache = 0,
  502. pages_found_in_journals_v2 = 0,
  503. pages_found_pass4 = 0,
  504. pages_overlapping = 0,
  505. pages_total = 0;
  506. size_t cache_gaps = 0, query_gaps = 0;
  507. bool done_v2 = false, done_open = false;
  508. usec_t pass1_ut = 0, pass2_ut = 0, pass3_ut = 0, pass4_ut = 0, finish_ut = 0;
  509. // --------------------------------------------------------------
  510. // PASS 1: Check what the main page cache has available
  511. pass1_ut = now_monotonic_usec();
  512. size_t pages_pass1 = get_page_list_from_pgc(main_cache, metric, ctx, wanted_start_time_s, wanted_end_time_s,
  513. &JudyL_page_array, &cache_gaps,
  514. false, PDC_PAGE_SOURCE_MAIN_CACHE);
  515. query_gaps += cache_gaps;
  516. pages_found_in_main_cache += pages_pass1;
  517. pages_total += pages_pass1;
  518. if(pages_found_in_main_cache && !cache_gaps) {
  519. query_gaps = list_has_time_gaps(ctx, metric, JudyL_page_array, wanted_start_time_s, wanted_end_time_s,
  520. &pages_total, &pages_found_pass4, pages_to_load_from_disk, &pages_overlapping,
  521. optimal_end_time_s, false, common_status);
  522. if (pages_total && !query_gaps)
  523. goto we_are_done;
  524. }
  525. // --------------------------------------------------------------
  526. // PASS 2: Check what the open journal page cache has available
  527. // these will be loaded from disk
  528. pass2_ut = now_monotonic_usec();
  529. size_t pages_pass2 = get_page_list_from_pgc(open_cache, metric, ctx, wanted_start_time_s, wanted_end_time_s,
  530. &JudyL_page_array, &cache_gaps,
  531. true, PDC_PAGE_SOURCE_OPEN_CACHE);
  532. query_gaps += cache_gaps;
  533. pages_found_in_open_cache += pages_pass2;
  534. pages_total += pages_pass2;
  535. done_open = true;
  536. if(pages_found_in_open_cache) {
  537. query_gaps = list_has_time_gaps(ctx, metric, JudyL_page_array, wanted_start_time_s, wanted_end_time_s,
  538. &pages_total, &pages_found_pass4, pages_to_load_from_disk, &pages_overlapping,
  539. optimal_end_time_s, false, common_status);
  540. if (pages_total && !query_gaps)
  541. goto we_are_done;
  542. }
  543. // --------------------------------------------------------------
  544. // PASS 3: Check Journal v2 to fill the gaps
  545. pass3_ut = now_monotonic_usec();
  546. size_t pages_pass3 = get_page_list_from_journal_v2(ctx, metric, start_time_ut, end_time_ut,
  547. add_page_details_from_journal_v2, &JudyL_page_array);
  548. pages_found_in_journals_v2 += pages_pass3;
  549. pages_total += pages_pass3;
  550. done_v2 = true;
  551. // --------------------------------------------------------------
  552. // PASS 4: Check the cache again
  553. // and calculate the time gaps in the query
  554. // THIS IS REQUIRED AFTER JOURNAL V2 LOOKUP
  555. pass4_ut = now_monotonic_usec();
  556. query_gaps = list_has_time_gaps(ctx, metric, JudyL_page_array, wanted_start_time_s, wanted_end_time_s,
  557. &pages_total, &pages_found_pass4, pages_to_load_from_disk, &pages_overlapping,
  558. optimal_end_time_s, true, common_status);
  559. we_are_done:
  560. finish_ut = now_monotonic_usec();
  561. time_delta(finish_ut, pass4_ut);
  562. time_delta(finish_ut, pass3_ut);
  563. time_delta(finish_ut, pass2_ut);
  564. time_delta(finish_ut, pass1_ut);
  565. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.prep_time_in_main_cache_lookup, pass1_ut, __ATOMIC_RELAXED);
  566. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.prep_time_in_open_cache_lookup, pass2_ut, __ATOMIC_RELAXED);
  567. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.prep_time_in_journal_v2_lookup, pass3_ut, __ATOMIC_RELAXED);
  568. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.prep_time_in_pass4_lookup, pass4_ut, __ATOMIC_RELAXED);
  569. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.queries, 1, __ATOMIC_RELAXED);
  570. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.queries_planned_with_gaps, (query_gaps) ? 1 : 0, __ATOMIC_RELAXED);
  571. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.queries_open, done_open ? 1 : 0, __ATOMIC_RELAXED);
  572. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.queries_journal_v2, done_v2 ? 1 : 0, __ATOMIC_RELAXED);
  573. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_total, pages_total, __ATOMIC_RELAXED);
  574. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_meta_source_main_cache, pages_found_in_main_cache, __ATOMIC_RELAXED);
  575. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_meta_source_open_cache, pages_found_in_open_cache, __ATOMIC_RELAXED);
  576. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_meta_source_journal_v2, pages_found_in_journals_v2, __ATOMIC_RELAXED);
  577. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_data_source_main_cache, pages_found_in_main_cache, __ATOMIC_RELAXED);
  578. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_data_source_main_cache_at_pass4, pages_found_pass4, __ATOMIC_RELAXED);
  579. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_to_load_from_disk, *pages_to_load_from_disk, __ATOMIC_RELAXED);
  580. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_overlapping_skipped, pages_overlapping, __ATOMIC_RELAXED);
  581. return JudyL_page_array;
  582. }
  583. inline void rrdeng_prep_wait(PDC *pdc) {
  584. if (unlikely(pdc && !pdc->prep_done)) {
  585. usec_t started_ut = now_monotonic_usec();
  586. completion_wait_for(&pdc->prep_completion);
  587. pdc->prep_done = true;
  588. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.query_time_wait_for_prep, now_monotonic_usec() - started_ut, __ATOMIC_RELAXED);
  589. }
  590. }
  591. void rrdeng_prep_query(struct page_details_control *pdc, bool worker) {
  592. if(worker)
  593. worker_is_busy(UV_EVENT_DBENGINE_QUERY);
  594. pdc->page_list_JudyL = get_page_list(pdc->ctx, pdc->metric,
  595. pdc->start_time_s * USEC_PER_SEC,
  596. pdc->end_time_s * USEC_PER_SEC,
  597. &pdc->optimal_end_time_s,
  598. &pdc->pages_to_load_from_disk,
  599. &pdc->common_status);
  600. internal_fatal(pdc->pages_to_load_from_disk && !(pdc->common_status & PDC_PAGE_DISK_PENDING),
  601. "DBENGINE: PDC reports there are %zu pages to load from disk, "
  602. "but none of the pages has the PDC_PAGE_DISK_PENDING flag",
  603. pdc->pages_to_load_from_disk);
  604. internal_fatal(!pdc->pages_to_load_from_disk && (pdc->common_status & PDC_PAGE_DISK_PENDING),
  605. "DBENGINE: PDC reports there are no pages to load from disk, "
  606. "but one or more pages have the PDC_PAGE_DISK_PENDING flag");
  607. if (pdc->pages_to_load_from_disk && pdc->page_list_JudyL) {
  608. pdc_acquire(pdc); // we get 1 for the 1st worker in the chain: do_read_page_list_work()
  609. usec_t start_ut = now_monotonic_usec();
  610. if(likely(pdc->priority == STORAGE_PRIORITY_SYNCHRONOUS))
  611. pdc_route_synchronously(pdc->ctx, pdc);
  612. else
  613. pdc_route_asynchronously(pdc->ctx, pdc);
  614. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.prep_time_to_route, now_monotonic_usec() - start_ut, __ATOMIC_RELAXED);
  615. }
  616. else
  617. completion_mark_complete(&pdc->page_completion);
  618. completion_mark_complete(&pdc->prep_completion);
  619. pdc_release_and_destroy_if_unreferenced(pdc, true, true);
  620. if(worker)
  621. worker_is_idle();
  622. }
  623. /**
  624. * Searches for pages in a time range and triggers disk I/O if necessary and possible.
  625. * @param ctx DB context
  626. * @param handle query handle as initialized
  627. * @param start_time_ut inclusive starting time in usec
  628. * @param end_time_ut inclusive ending time in usec
  629. * @return 1 / 0 (pages found or not found)
  630. */
  631. void pg_cache_preload(struct rrdeng_query_handle *handle) {
  632. if (unlikely(!handle || !handle->metric))
  633. return;
  634. __atomic_add_fetch(&handle->ctx->atomic.inflight_queries, 1, __ATOMIC_RELAXED);
  635. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.currently_running_queries, 1, __ATOMIC_RELAXED);
  636. handle->pdc = pdc_get();
  637. handle->pdc->metric = mrg_metric_dup(main_mrg, handle->metric);
  638. handle->pdc->start_time_s = handle->start_time_s;
  639. handle->pdc->end_time_s = handle->end_time_s;
  640. handle->pdc->priority = handle->priority;
  641. handle->pdc->optimal_end_time_s = handle->end_time_s;
  642. handle->pdc->ctx = handle->ctx;
  643. handle->pdc->refcount = 1;
  644. spinlock_init(&handle->pdc->refcount_spinlock);
  645. completion_init(&handle->pdc->prep_completion);
  646. completion_init(&handle->pdc->page_completion);
  647. if(ctx_is_available_for_queries(handle->ctx)) {
  648. handle->pdc->refcount++; // we get 1 for the query thread and 1 for the prep thread
  649. if(unlikely(handle->pdc->priority == STORAGE_PRIORITY_SYNCHRONOUS))
  650. rrdeng_prep_query(handle->pdc, false);
  651. else
  652. rrdeng_enq_cmd(handle->ctx, RRDENG_OPCODE_QUERY, handle->pdc, NULL, handle->priority, NULL, NULL);
  653. }
  654. else {
  655. completion_mark_complete(&handle->pdc->prep_completion);
  656. completion_mark_complete(&handle->pdc->page_completion);
  657. }
  658. }
  659. /*
  660. * Searches for the first page between start_time and end_time and gets a reference.
  661. * start_time and end_time are inclusive.
  662. * If index is NULL lookup by UUID (id).
  663. */
  664. struct pgc_page *pg_cache_lookup_next(
  665. struct rrdengine_instance *ctx,
  666. PDC *pdc,
  667. time_t now_s,
  668. uint32_t last_update_every_s,
  669. size_t *entries
  670. ) {
  671. if (unlikely(!pdc))
  672. return NULL;
  673. rrdeng_prep_wait(pdc);
  674. if (unlikely(!pdc->page_list_JudyL))
  675. return NULL;
  676. usec_t start_ut = now_monotonic_usec();
  677. size_t gaps = 0;
  678. bool waited = false, preloaded;
  679. PGC_PAGE *page = NULL;
  680. while(!page) {
  681. bool page_from_pd = false;
  682. preloaded = false;
  683. struct page_details *pd = pdc_find_page_for_time(
  684. pdc->page_list_JudyL, now_s, &gaps,
  685. PDC_PAGE_PROCESSED, PDC_PAGE_EMPTY);
  686. if (!pd)
  687. break;
  688. page = pd->page;
  689. page_from_pd = true;
  690. preloaded = pdc_page_status_check(pd, PDC_PAGE_PRELOADED);
  691. if(!page) {
  692. if(!completion_is_done(&pdc->page_completion)) {
  693. page = pgc_page_get_and_acquire(main_cache, (Word_t)ctx,
  694. pd->metric_id, pd->first_time_s, PGC_SEARCH_EXACT);
  695. page_from_pd = false;
  696. preloaded = pdc_page_status_check(pd, PDC_PAGE_PRELOADED);
  697. }
  698. if(!page) {
  699. pdc->completed_jobs =
  700. completion_wait_for_a_job(&pdc->page_completion, pdc->completed_jobs);
  701. page = pd->page;
  702. page_from_pd = true;
  703. preloaded = pdc_page_status_check(pd, PDC_PAGE_PRELOADED);
  704. waited = true;
  705. }
  706. }
  707. if(page && pgd_is_empty(pgc_page_data(page)))
  708. pdc_page_status_set(pd, PDC_PAGE_EMPTY);
  709. if(!page || pdc_page_status_check(pd, PDC_PAGE_QUERY_GLOBAL_SKIP_LIST | PDC_PAGE_EMPTY)) {
  710. page = NULL;
  711. continue;
  712. }
  713. // we now have page and is not empty
  714. time_t page_start_time_s = pgc_page_start_time_s(page);
  715. time_t page_end_time_s = pgc_page_end_time_s(page);
  716. uint32_t page_update_every_s = pgc_page_update_every_s(page);
  717. if(unlikely(page_start_time_s == INVALID_TIME || page_end_time_s == INVALID_TIME)) {
  718. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_zero_time_skipped, 1, __ATOMIC_RELAXED);
  719. pgc_page_to_clean_evict_or_release(main_cache, page);
  720. pdc_page_status_set(pd, PDC_PAGE_INVALID | PDC_PAGE_RELEASED);
  721. pd->page = page = NULL;
  722. continue;
  723. }
  724. else {
  725. if (unlikely(page_update_every_s <= 0 || page_update_every_s > 86400)) {
  726. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_invalid_update_every_fixed, 1, __ATOMIC_RELAXED);
  727. page_update_every_s = pgc_page_fix_update_every(page, last_update_every_s);
  728. pd->update_every_s = page_update_every_s;
  729. }
  730. size_t entries_by_size = pgd_slots_used(pgc_page_data(page));
  731. size_t entries_by_time = page_entries_by_time(page_start_time_s, page_end_time_s, page_update_every_s);
  732. if(unlikely(entries_by_size < entries_by_time)) {
  733. time_t fixed_page_end_time_s = (time_t)(page_start_time_s + (entries_by_size - 1) * page_update_every_s);
  734. pd->last_time_s = page_end_time_s = pgc_page_fix_end_time_s(page, fixed_page_end_time_s);
  735. entries_by_time = (page_end_time_s - (page_start_time_s - page_update_every_s)) / page_update_every_s;
  736. internal_fatal(entries_by_size != entries_by_time, "DBENGINE: wrong entries by time again!");
  737. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_invalid_entries_fixed, 1, __ATOMIC_RELAXED);
  738. }
  739. *entries = entries_by_time;
  740. }
  741. if(unlikely(page_end_time_s < now_s)) {
  742. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_past_time_skipped, 1, __ATOMIC_RELAXED);
  743. pgc_page_release(main_cache, page);
  744. pdc_page_status_set(pd, PDC_PAGE_SKIP | PDC_PAGE_RELEASED);
  745. pd->page = page = NULL;
  746. continue;
  747. }
  748. if(page_from_pd)
  749. // PDC_PAGE_RELEASED is for pdc_destroy() to not release the page twice - the caller will release it
  750. pdc_page_status_set(pd, PDC_PAGE_RELEASED | PDC_PAGE_PROCESSED);
  751. else
  752. pdc_page_status_set(pd, PDC_PAGE_PROCESSED);
  753. }
  754. if(gaps && !pdc->executed_with_gaps)
  755. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.queries_executed_with_gaps, 1, __ATOMIC_RELAXED);
  756. pdc->executed_with_gaps = +gaps;
  757. if(page) {
  758. if(waited)
  759. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.page_next_wait_loaded, 1, __ATOMIC_RELAXED);
  760. else
  761. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.page_next_nowait_loaded, 1, __ATOMIC_RELAXED);
  762. }
  763. else {
  764. if(waited)
  765. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.page_next_wait_failed, 1, __ATOMIC_RELAXED);
  766. else
  767. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.page_next_nowait_failed, 1, __ATOMIC_RELAXED);
  768. }
  769. if(waited) {
  770. if(preloaded)
  771. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.query_time_to_slow_preload_next_page, now_monotonic_usec() - start_ut, __ATOMIC_RELAXED);
  772. else
  773. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.query_time_to_slow_disk_next_page, now_monotonic_usec() - start_ut, __ATOMIC_RELAXED);
  774. }
  775. else {
  776. if(preloaded)
  777. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.query_time_to_fast_preload_next_page, now_monotonic_usec() - start_ut, __ATOMIC_RELAXED);
  778. else
  779. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.query_time_to_fast_disk_next_page, now_monotonic_usec() - start_ut, __ATOMIC_RELAXED);
  780. }
  781. return page;
  782. }
  783. void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s, time_t end_time_s, uint32_t update_every_s,
  784. struct rrdengine_datafile *datafile, uint64_t extent_offset, unsigned extent_size, uint32_t page_length) {
  785. if(!datafile_acquire(datafile, DATAFILE_ACQUIRE_OPEN_CACHE)) // for open cache item
  786. fatal("DBENGINE: cannot acquire datafile to put page in open cache");
  787. struct extent_io_data ext_io_data = {
  788. .file = datafile->file,
  789. .fileno = datafile->fileno,
  790. .pos = extent_offset,
  791. .bytes = extent_size,
  792. .page_length = page_length
  793. };
  794. PGC_ENTRY page_entry = {
  795. .hot = true,
  796. .section = section,
  797. .metric_id = metric_id,
  798. .start_time_s = start_time_s,
  799. .end_time_s = end_time_s,
  800. .update_every_s = update_every_s,
  801. .size = 0,
  802. .data = datafile,
  803. .custom_data = (uint8_t *) &ext_io_data,
  804. };
  805. internal_fatal(!datafile->fileno, "DBENGINE: datafile supplied does not have a number");
  806. bool added = true;
  807. PGC_PAGE *page = pgc_page_add_and_acquire(open_cache, page_entry, &added);
  808. int tries = 100;
  809. while(!added && page_entry.end_time_s > pgc_page_end_time_s(page) && tries--) {
  810. pgc_page_to_clean_evict_or_release(open_cache, page);
  811. page = pgc_page_add_and_acquire(open_cache, page_entry, &added);
  812. }
  813. if(!added) {
  814. datafile_release(datafile, DATAFILE_ACQUIRE_OPEN_CACHE);
  815. internal_fatal(page_entry.end_time_s > pgc_page_end_time_s(page),
  816. "DBENGINE: cannot add longer page to open cache");
  817. }
  818. pgc_page_release(open_cache, (PGC_PAGE *)page);
  819. }
  820. size_t dynamic_open_cache_size(void) {
  821. size_t main_cache_size = pgc_get_wanted_cache_size(main_cache);
  822. size_t target_size = main_cache_size / 100 * 5;
  823. if(target_size < 2 * 1024 * 1024)
  824. target_size = 2 * 1024 * 1024;
  825. return target_size;
  826. }
  827. size_t dynamic_extent_cache_size(void) {
  828. size_t main_cache_size = pgc_get_wanted_cache_size(main_cache);
  829. size_t target_size = main_cache_size / 100 * 5;
  830. if(target_size < 3 * 1024 * 1024)
  831. target_size = 3 * 1024 * 1024;
  832. return target_size;
  833. }
  834. void pgc_and_mrg_initialize(void)
  835. {
  836. main_mrg = mrg_create(0);
  837. size_t target_cache_size = (size_t)default_rrdeng_page_cache_mb * 1024ULL * 1024ULL;
  838. size_t main_cache_size = (target_cache_size / 100) * 95;
  839. size_t open_cache_size = 0;
  840. size_t extent_cache_size = (target_cache_size / 100) * 5;
  841. if(extent_cache_size < 3 * 1024 * 1024) {
  842. extent_cache_size = 3 * 1024 * 1024;
  843. main_cache_size = target_cache_size - extent_cache_size;
  844. }
  845. extent_cache_size += (size_t)(default_rrdeng_extent_cache_mb * 1024ULL * 1024ULL);
  846. main_cache = pgc_create(
  847. "main_cache",
  848. main_cache_size,
  849. main_cache_free_clean_page_callback,
  850. (size_t) rrdeng_pages_per_extent,
  851. main_cache_flush_dirty_page_init_callback,
  852. main_cache_flush_dirty_page_callback,
  853. 10,
  854. 10240, // if there are that many threads, evict so many at once!
  855. 1000, //
  856. 5, // don't delay too much other threads
  857. PGC_OPTIONS_AUTOSCALE, // AUTOSCALE = 2x max hot pages
  858. 0, // 0 = as many as the system cpus
  859. 0
  860. );
  861. open_cache = pgc_create(
  862. "open_cache",
  863. open_cache_size, // the default is 1MB
  864. open_cache_free_clean_page_callback,
  865. 1,
  866. NULL,
  867. open_cache_flush_dirty_page_callback,
  868. 10,
  869. 10240, // if there are that many threads, evict that many at once!
  870. 1000, //
  871. 3, // don't delay too much other threads
  872. PGC_OPTIONS_AUTOSCALE | PGC_OPTIONS_EVICT_PAGES_INLINE | PGC_OPTIONS_FLUSH_PAGES_INLINE,
  873. 0, // 0 = as many as the system cpus
  874. sizeof(struct extent_io_data)
  875. );
  876. pgc_set_dynamic_target_cache_size_callback(open_cache, dynamic_open_cache_size);
  877. extent_cache = pgc_create(
  878. "extent_cache",
  879. extent_cache_size,
  880. extent_cache_free_clean_page_callback,
  881. 1,
  882. NULL,
  883. extent_cache_flush_dirty_page_callback,
  884. 5,
  885. 10, // it will lose up to that extents at once!
  886. 100, //
  887. 2, // don't delay too much other threads
  888. PGC_OPTIONS_AUTOSCALE | PGC_OPTIONS_EVICT_PAGES_INLINE | PGC_OPTIONS_FLUSH_PAGES_INLINE,
  889. 0, // 0 = as many as the system cpus
  890. 0
  891. );
  892. pgc_set_dynamic_target_cache_size_callback(extent_cache, dynamic_extent_cache_size);
  893. }