pagecache.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #define NETDATA_RRD_INTERNALS
  3. #include "rrdengine.h"
  4. /* Forward declarations */
  5. static int pg_cache_try_evict_one_page_unsafe(struct rrdengine_instance *ctx);
  6. /* always inserts into tail */
  7. static inline void pg_cache_replaceQ_insert_unsafe(struct rrdengine_instance *ctx,
  8. struct rrdeng_page_descr *descr)
  9. {
  10. struct page_cache *pg_cache = &ctx->pg_cache;
  11. struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
  12. if (likely(NULL != pg_cache->replaceQ.tail)) {
  13. pg_cache_descr->prev = pg_cache->replaceQ.tail;
  14. pg_cache->replaceQ.tail->next = pg_cache_descr;
  15. }
  16. if (unlikely(NULL == pg_cache->replaceQ.head)) {
  17. pg_cache->replaceQ.head = pg_cache_descr;
  18. }
  19. pg_cache->replaceQ.tail = pg_cache_descr;
  20. }
  21. static inline void pg_cache_replaceQ_delete_unsafe(struct rrdengine_instance *ctx,
  22. struct rrdeng_page_descr *descr)
  23. {
  24. struct page_cache *pg_cache = &ctx->pg_cache;
  25. struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr, *prev, *next;
  26. prev = pg_cache_descr->prev;
  27. next = pg_cache_descr->next;
  28. if (likely(NULL != prev)) {
  29. prev->next = next;
  30. }
  31. if (likely(NULL != next)) {
  32. next->prev = prev;
  33. }
  34. if (unlikely(pg_cache_descr == pg_cache->replaceQ.head)) {
  35. pg_cache->replaceQ.head = next;
  36. }
  37. if (unlikely(pg_cache_descr == pg_cache->replaceQ.tail)) {
  38. pg_cache->replaceQ.tail = prev;
  39. }
  40. pg_cache_descr->prev = pg_cache_descr->next = NULL;
  41. }
  42. void pg_cache_replaceQ_insert(struct rrdengine_instance *ctx,
  43. struct rrdeng_page_descr *descr)
  44. {
  45. struct page_cache *pg_cache = &ctx->pg_cache;
  46. uv_rwlock_wrlock(&pg_cache->replaceQ.lock);
  47. pg_cache_replaceQ_insert_unsafe(ctx, descr);
  48. uv_rwlock_wrunlock(&pg_cache->replaceQ.lock);
  49. }
  50. void pg_cache_replaceQ_delete(struct rrdengine_instance *ctx,
  51. struct rrdeng_page_descr *descr)
  52. {
  53. struct page_cache *pg_cache = &ctx->pg_cache;
  54. uv_rwlock_wrlock(&pg_cache->replaceQ.lock);
  55. pg_cache_replaceQ_delete_unsafe(ctx, descr);
  56. uv_rwlock_wrunlock(&pg_cache->replaceQ.lock);
  57. }
  58. void pg_cache_replaceQ_set_hot(struct rrdengine_instance *ctx,
  59. struct rrdeng_page_descr *descr)
  60. {
  61. struct page_cache *pg_cache = &ctx->pg_cache;
  62. uv_rwlock_wrlock(&pg_cache->replaceQ.lock);
  63. pg_cache_replaceQ_delete_unsafe(ctx, descr);
  64. pg_cache_replaceQ_insert_unsafe(ctx, descr);
  65. uv_rwlock_wrunlock(&pg_cache->replaceQ.lock);
  66. }
  67. struct rrdeng_page_descr *pg_cache_create_descr(void)
  68. {
  69. struct rrdeng_page_descr *descr;
  70. descr = mallocz(sizeof(*descr));
  71. descr->page_length = 0;
  72. descr->start_time = INVALID_TIME;
  73. descr->end_time = INVALID_TIME;
  74. descr->id = NULL;
  75. descr->extent = NULL;
  76. descr->pg_cache_descr_state = 0;
  77. descr->pg_cache_descr = NULL;
  78. return descr;
  79. }
  80. /* The caller must hold page descriptor lock. */
  81. void pg_cache_wake_up_waiters_unsafe(struct rrdeng_page_descr *descr)
  82. {
  83. struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
  84. if (pg_cache_descr->waiters)
  85. uv_cond_broadcast(&pg_cache_descr->cond);
  86. }
  87. void pg_cache_wake_up_waiters(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr)
  88. {
  89. rrdeng_page_descr_mutex_lock(ctx, descr);
  90. pg_cache_wake_up_waiters_unsafe(descr);
  91. rrdeng_page_descr_mutex_unlock(ctx, descr);
  92. }
  93. /*
  94. * The caller must hold page descriptor lock.
  95. * The lock will be released and re-acquired. The descriptor is not guaranteed
  96. * to exist after this function returns.
  97. */
  98. void pg_cache_wait_event_unsafe(struct rrdeng_page_descr *descr)
  99. {
  100. struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
  101. ++pg_cache_descr->waiters;
  102. uv_cond_wait(&pg_cache_descr->cond, &pg_cache_descr->mutex);
  103. --pg_cache_descr->waiters;
  104. }
  105. /*
  106. * The caller must hold page descriptor lock.
  107. * The lock will be released and re-acquired. The descriptor is not guaranteed
  108. * to exist after this function returns.
  109. * Returns UV_ETIMEDOUT if timeout_sec seconds pass.
  110. */
  111. int pg_cache_timedwait_event_unsafe(struct rrdeng_page_descr *descr, uint64_t timeout_sec)
  112. {
  113. int ret;
  114. struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
  115. ++pg_cache_descr->waiters;
  116. ret = uv_cond_timedwait(&pg_cache_descr->cond, &pg_cache_descr->mutex, timeout_sec * NSEC_PER_SEC);
  117. --pg_cache_descr->waiters;
  118. return ret;
  119. }
  120. /*
  121. * Returns page flags.
  122. * The lock will be released and re-acquired. The descriptor is not guaranteed
  123. * to exist after this function returns.
  124. */
  125. unsigned long pg_cache_wait_event(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr)
  126. {
  127. struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
  128. unsigned long flags;
  129. rrdeng_page_descr_mutex_lock(ctx, descr);
  130. pg_cache_wait_event_unsafe(descr);
  131. flags = pg_cache_descr->flags;
  132. rrdeng_page_descr_mutex_unlock(ctx, descr);
  133. return flags;
  134. }
  135. /*
  136. * The caller must hold page descriptor lock.
  137. */
  138. int pg_cache_can_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access)
  139. {
  140. struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
  141. if ((pg_cache_descr->flags & (RRD_PAGE_LOCKED | RRD_PAGE_READ_PENDING)) ||
  142. (exclusive_access && pg_cache_descr->refcnt)) {
  143. return 0;
  144. }
  145. return 1;
  146. }
  147. /*
  148. * The caller must hold page descriptor lock.
  149. * Gets a reference to the page descriptor.
  150. * Returns 1 on success and 0 on failure.
  151. */
  152. int pg_cache_try_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access)
  153. {
  154. struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
  155. if (!pg_cache_can_get_unsafe(descr, exclusive_access))
  156. return 0;
  157. if (exclusive_access)
  158. pg_cache_descr->flags |= RRD_PAGE_LOCKED;
  159. ++pg_cache_descr->refcnt;
  160. return 1;
  161. }
  162. /*
  163. * The caller must hold the page descriptor lock.
  164. * This function may block doing cleanup.
  165. */
  166. void pg_cache_put_unsafe(struct rrdeng_page_descr *descr)
  167. {
  168. struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
  169. pg_cache_descr->flags &= ~RRD_PAGE_LOCKED;
  170. if (0 == --pg_cache_descr->refcnt) {
  171. pg_cache_wake_up_waiters_unsafe(descr);
  172. }
  173. }
  174. /*
  175. * This function may block doing cleanup.
  176. */
  177. void pg_cache_put(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr)
  178. {
  179. rrdeng_page_descr_mutex_lock(ctx, descr);
  180. pg_cache_put_unsafe(descr);
  181. rrdeng_page_descr_mutex_unlock(ctx, descr);
  182. }
  183. /* The caller must hold the page cache lock */
  184. static void pg_cache_release_pages_unsafe(struct rrdengine_instance *ctx, unsigned number)
  185. {
  186. struct page_cache *pg_cache = &ctx->pg_cache;
  187. pg_cache->populated_pages -= number;
  188. }
  189. static void pg_cache_release_pages(struct rrdengine_instance *ctx, unsigned number)
  190. {
  191. struct page_cache *pg_cache = &ctx->pg_cache;
  192. uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
  193. pg_cache_release_pages_unsafe(ctx, number);
  194. uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
  195. }
  196. /*
  197. * This function returns the maximum number of pages allowed in the page cache.
  198. */
  199. unsigned long pg_cache_hard_limit(struct rrdengine_instance *ctx)
  200. {
  201. /* it's twice the number of producers since we pin 2 pages per producer */
  202. return ctx->max_cache_pages + 2 * (unsigned long)ctx->metric_API_max_producers;
  203. }
  204. /*
  205. * This function returns the low watermark number of pages in the page cache. The page cache should strive to keep the
  206. * number of pages below that number.
  207. */
  208. unsigned long pg_cache_soft_limit(struct rrdengine_instance *ctx)
  209. {
  210. /* it's twice the number of producers since we pin 2 pages per producer */
  211. return ctx->cache_pages_low_watermark + 2 * (unsigned long)ctx->metric_API_max_producers;
  212. }
  213. /*
  214. * This function returns the maximum number of dirty pages that are committed to be written to disk allowed in the page
  215. * cache.
  216. */
  217. unsigned long pg_cache_committed_hard_limit(struct rrdengine_instance *ctx)
  218. {
  219. /* We remove the active pages of the producers from the calculation and only allow the extra pinned pages */
  220. return ctx->cache_pages_low_watermark + (unsigned long)ctx->metric_API_max_producers;
  221. }
  222. /*
  223. * This function will block until it reserves #number populated pages.
  224. * It will trigger evictions or dirty page flushing if the pg_cache_hard_limit() limit is hit.
  225. */
  226. static void pg_cache_reserve_pages(struct rrdengine_instance *ctx, unsigned number)
  227. {
  228. struct page_cache *pg_cache = &ctx->pg_cache;
  229. unsigned failures = 0;
  230. const unsigned FAILURES_CEILING = 10; /* truncates exponential backoff to (2^FAILURES_CEILING x slot) */
  231. unsigned long exp_backoff_slot_usec = USEC_PER_MS * 10;
  232. assert(number < ctx->max_cache_pages);
  233. uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
  234. if (pg_cache->populated_pages + number >= pg_cache_hard_limit(ctx) + 1)
  235. debug(D_RRDENGINE, "==Page cache full. Reserving %u pages.==",
  236. number);
  237. while (pg_cache->populated_pages + number >= pg_cache_hard_limit(ctx) + 1) {
  238. if (!pg_cache_try_evict_one_page_unsafe(ctx)) {
  239. /* failed to evict */
  240. struct completion compl;
  241. struct rrdeng_cmd cmd;
  242. ++failures;
  243. uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
  244. completion_init(&compl);
  245. cmd.opcode = RRDENG_FLUSH_PAGES;
  246. cmd.completion = &compl;
  247. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  248. /* wait for some pages to be flushed */
  249. debug(D_RRDENGINE, "%s: waiting for pages to be written to disk before evicting.", __func__);
  250. completion_wait_for(&compl);
  251. completion_destroy(&compl);
  252. if (unlikely(failures > 1)) {
  253. unsigned long slots, usecs_to_sleep;
  254. /* exponential backoff */
  255. slots = random() % (2LU << MIN(failures, FAILURES_CEILING));
  256. usecs_to_sleep = slots * exp_backoff_slot_usec;
  257. if (usecs_to_sleep >= USEC_PER_SEC)
  258. error("Page cache is full. Sleeping for %llu second(s).", usecs_to_sleep / USEC_PER_SEC);
  259. (void)sleep_usec(usecs_to_sleep);
  260. }
  261. uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
  262. }
  263. }
  264. pg_cache->populated_pages += number;
  265. uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
  266. }
  267. /*
  268. * This function will attempt to reserve #number populated pages.
  269. * It may trigger evictions if the pg_cache_soft_limit() limit is hit.
  270. * Returns 0 on failure and 1 on success.
  271. */
  272. static int pg_cache_try_reserve_pages(struct rrdengine_instance *ctx, unsigned number)
  273. {
  274. struct page_cache *pg_cache = &ctx->pg_cache;
  275. unsigned count = 0;
  276. int ret = 0;
  277. assert(number < ctx->max_cache_pages);
  278. uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
  279. if (pg_cache->populated_pages + number >= pg_cache_soft_limit(ctx) + 1) {
  280. debug(D_RRDENGINE,
  281. "==Page cache full. Trying to reserve %u pages.==",
  282. number);
  283. do {
  284. if (!pg_cache_try_evict_one_page_unsafe(ctx))
  285. break;
  286. ++count;
  287. } while (pg_cache->populated_pages + number >= pg_cache_soft_limit(ctx) + 1);
  288. debug(D_RRDENGINE, "Evicted %u pages.", count);
  289. }
  290. if (pg_cache->populated_pages + number < pg_cache_hard_limit(ctx) + 1) {
  291. pg_cache->populated_pages += number;
  292. ret = 1; /* success */
  293. }
  294. uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
  295. return ret;
  296. }
  297. /* The caller must hold the page cache and the page descriptor locks in that order */
  298. static void pg_cache_evict_unsafe(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr)
  299. {
  300. struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
  301. freez(pg_cache_descr->page);
  302. pg_cache_descr->page = NULL;
  303. pg_cache_descr->flags &= ~RRD_PAGE_POPULATED;
  304. pg_cache_release_pages_unsafe(ctx, 1);
  305. ++ctx->stats.pg_cache_evictions;
  306. }
  307. /*
  308. * The caller must hold the page cache lock.
  309. * Lock order: page cache -> replaceQ -> page descriptor
  310. * This function iterates all pages and tries to evict one.
  311. * If it fails it sets in_flight_descr to the oldest descriptor that has write-back in progress,
  312. * or it sets it to NULL if no write-back is in progress.
  313. *
  314. * Returns 1 on success and 0 on failure.
  315. */
  316. static int pg_cache_try_evict_one_page_unsafe(struct rrdengine_instance *ctx)
  317. {
  318. struct page_cache *pg_cache = &ctx->pg_cache;
  319. unsigned long old_flags;
  320. struct rrdeng_page_descr *descr;
  321. struct page_cache_descr *pg_cache_descr = NULL;
  322. uv_rwlock_wrlock(&pg_cache->replaceQ.lock);
  323. for (pg_cache_descr = pg_cache->replaceQ.head ; NULL != pg_cache_descr ; pg_cache_descr = pg_cache_descr->next) {
  324. descr = pg_cache_descr->descr;
  325. rrdeng_page_descr_mutex_lock(ctx, descr);
  326. old_flags = pg_cache_descr->flags;
  327. if ((old_flags & RRD_PAGE_POPULATED) && !(old_flags & RRD_PAGE_DIRTY) && pg_cache_try_get_unsafe(descr, 1)) {
  328. /* must evict */
  329. pg_cache_evict_unsafe(ctx, descr);
  330. pg_cache_put_unsafe(descr);
  331. pg_cache_replaceQ_delete_unsafe(ctx, descr);
  332. rrdeng_page_descr_mutex_unlock(ctx, descr);
  333. uv_rwlock_wrunlock(&pg_cache->replaceQ.lock);
  334. rrdeng_try_deallocate_pg_cache_descr(ctx, descr);
  335. return 1;
  336. }
  337. rrdeng_page_descr_mutex_unlock(ctx, descr);
  338. }
  339. uv_rwlock_wrunlock(&pg_cache->replaceQ.lock);
  340. /* failed to evict */
  341. return 0;
  342. }
  343. /**
  344. * Deletes a page from the database.
  345. * Callers of this function need to make sure they're not deleting the same descriptor concurrently.
  346. * @param ctx is the database instance.
  347. * @param descr is the page descriptor.
  348. * @param remove_dirty must be non-zero if the page to be deleted is dirty.
  349. * @param is_exclusive_holder must be non-zero if the caller holds an exclusive page reference.
  350. * @param metric_id is set to the metric the page belongs to, if it's safe to delete the metric and metric_id is not
  351. * NULL. Otherwise, metric_id is not set.
  352. * @return 1 if it's safe to delete the metric, 0 otherwise.
  353. */
  354. uint8_t pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr, uint8_t remove_dirty,
  355. uint8_t is_exclusive_holder, uuid_t *metric_id)
  356. {
  357. struct page_cache *pg_cache = &ctx->pg_cache;
  358. struct page_cache_descr *pg_cache_descr = NULL;
  359. Pvoid_t *PValue;
  360. struct pg_cache_page_index *page_index = NULL;
  361. int ret;
  362. uint8_t can_delete_metric = 0;
  363. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  364. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, descr->id, sizeof(uuid_t));
  365. fatal_assert(NULL != PValue);
  366. page_index = *PValue;
  367. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  368. uv_rwlock_wrlock(&page_index->lock);
  369. ret = JudyLDel(&page_index->JudyL_array, (Word_t)(descr->start_time / USEC_PER_SEC), PJE0);
  370. if (unlikely(0 == ret)) {
  371. uv_rwlock_wrunlock(&page_index->lock);
  372. error("Page under deletion was not in index.");
  373. if (unlikely(debug_flags & D_RRDENGINE)) {
  374. print_page_descr(descr);
  375. }
  376. goto destroy;
  377. }
  378. --page_index->page_count;
  379. if (!page_index->writers && !page_index->page_count) {
  380. can_delete_metric = 1;
  381. if (metric_id) {
  382. memcpy(metric_id, page_index->id, sizeof(uuid_t));
  383. }
  384. }
  385. uv_rwlock_wrunlock(&page_index->lock);
  386. fatal_assert(1 == ret);
  387. uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
  388. ++ctx->stats.pg_cache_deletions;
  389. --pg_cache->page_descriptors;
  390. uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
  391. rrdeng_page_descr_mutex_lock(ctx, descr);
  392. pg_cache_descr = descr->pg_cache_descr;
  393. if (!is_exclusive_holder) {
  394. /* If we don't hold an exclusive page reference get one */
  395. while (!pg_cache_try_get_unsafe(descr, 1)) {
  396. debug(D_RRDENGINE, "%s: Waiting for locked page:", __func__);
  397. if (unlikely(debug_flags & D_RRDENGINE))
  398. print_page_cache_descr(descr);
  399. pg_cache_wait_event_unsafe(descr);
  400. }
  401. }
  402. if (remove_dirty) {
  403. pg_cache_descr->flags &= ~RRD_PAGE_DIRTY;
  404. } else {
  405. /* even a locked page could be dirty */
  406. while (unlikely(pg_cache_descr->flags & RRD_PAGE_DIRTY)) {
  407. debug(D_RRDENGINE, "%s: Found dirty page, waiting for it to be flushed:", __func__);
  408. if (unlikely(debug_flags & D_RRDENGINE))
  409. print_page_cache_descr(descr);
  410. pg_cache_wait_event_unsafe(descr);
  411. }
  412. }
  413. rrdeng_page_descr_mutex_unlock(ctx, descr);
  414. if (pg_cache_descr->flags & RRD_PAGE_POPULATED) {
  415. /* only after locking can it be safely deleted from LRU */
  416. pg_cache_replaceQ_delete(ctx, descr);
  417. uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
  418. pg_cache_evict_unsafe(ctx, descr);
  419. uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
  420. }
  421. pg_cache_put(ctx, descr);
  422. rrdeng_try_deallocate_pg_cache_descr(ctx, descr);
  423. while (descr->pg_cache_descr_state & PG_CACHE_DESCR_ALLOCATED) {
  424. rrdeng_try_deallocate_pg_cache_descr(ctx, descr); /* spin */
  425. (void)sleep_usec(1000); /* 1 msec */
  426. }
  427. destroy:
  428. freez(descr);
  429. pg_cache_update_metric_times(page_index);
  430. return can_delete_metric;
  431. }
  432. static inline int is_page_in_time_range(struct rrdeng_page_descr *descr, usec_t start_time, usec_t end_time)
  433. {
  434. usec_t pg_start, pg_end;
  435. pg_start = descr->start_time;
  436. pg_end = descr->end_time;
  437. return (pg_start < start_time && pg_end >= start_time) ||
  438. (pg_start >= start_time && pg_start <= end_time);
  439. }
  440. static inline int is_point_in_time_in_page(struct rrdeng_page_descr *descr, usec_t point_in_time)
  441. {
  442. return (point_in_time >= descr->start_time && point_in_time <= descr->end_time);
  443. }
  444. /* The caller must hold the page index lock */
  445. static inline struct rrdeng_page_descr *
  446. find_first_page_in_time_range(struct pg_cache_page_index *page_index, usec_t start_time, usec_t end_time)
  447. {
  448. struct rrdeng_page_descr *descr = NULL;
  449. Pvoid_t *PValue;
  450. Word_t Index;
  451. Index = (Word_t)(start_time / USEC_PER_SEC);
  452. PValue = JudyLLast(page_index->JudyL_array, &Index, PJE0);
  453. if (likely(NULL != PValue)) {
  454. descr = *PValue;
  455. if (is_page_in_time_range(descr, start_time, end_time)) {
  456. return descr;
  457. }
  458. }
  459. Index = (Word_t)(start_time / USEC_PER_SEC);
  460. PValue = JudyLFirst(page_index->JudyL_array, &Index, PJE0);
  461. if (likely(NULL != PValue)) {
  462. descr = *PValue;
  463. if (is_page_in_time_range(descr, start_time, end_time)) {
  464. return descr;
  465. }
  466. }
  467. return NULL;
  468. }
  469. /* Update metric oldest and latest timestamps efficiently when adding new values */
  470. void pg_cache_add_new_metric_time(struct pg_cache_page_index *page_index, struct rrdeng_page_descr *descr)
  471. {
  472. usec_t oldest_time = page_index->oldest_time;
  473. usec_t latest_time = page_index->latest_time;
  474. if (unlikely(oldest_time == INVALID_TIME || descr->start_time < oldest_time)) {
  475. page_index->oldest_time = descr->start_time;
  476. }
  477. if (likely(descr->end_time > latest_time || latest_time == INVALID_TIME)) {
  478. page_index->latest_time = descr->end_time;
  479. }
  480. }
  481. /* Update metric oldest and latest timestamps when removing old values */
  482. void pg_cache_update_metric_times(struct pg_cache_page_index *page_index)
  483. {
  484. Pvoid_t *firstPValue, *lastPValue;
  485. Word_t firstIndex, lastIndex;
  486. struct rrdeng_page_descr *descr;
  487. usec_t oldest_time = INVALID_TIME;
  488. usec_t latest_time = INVALID_TIME;
  489. uv_rwlock_rdlock(&page_index->lock);
  490. /* Find first page in range */
  491. firstIndex = (Word_t)0;
  492. firstPValue = JudyLFirst(page_index->JudyL_array, &firstIndex, PJE0);
  493. if (likely(NULL != firstPValue)) {
  494. descr = *firstPValue;
  495. oldest_time = descr->start_time;
  496. }
  497. lastIndex = (Word_t)-1;
  498. lastPValue = JudyLLast(page_index->JudyL_array, &lastIndex, PJE0);
  499. if (likely(NULL != lastPValue)) {
  500. descr = *lastPValue;
  501. latest_time = descr->end_time;
  502. }
  503. uv_rwlock_rdunlock(&page_index->lock);
  504. if (unlikely(NULL == firstPValue)) {
  505. fatal_assert(NULL == lastPValue);
  506. page_index->oldest_time = page_index->latest_time = INVALID_TIME;
  507. return;
  508. }
  509. page_index->oldest_time = oldest_time;
  510. page_index->latest_time = latest_time;
  511. }
  512. /* If index is NULL lookup by UUID (descr->id) */
  513. void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index *index,
  514. struct rrdeng_page_descr *descr)
  515. {
  516. struct page_cache *pg_cache = &ctx->pg_cache;
  517. Pvoid_t *PValue;
  518. struct pg_cache_page_index *page_index;
  519. unsigned long pg_cache_descr_state = descr->pg_cache_descr_state;
  520. if (0 != pg_cache_descr_state) {
  521. /* there is page cache descriptor pre-allocated state */
  522. struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
  523. fatal_assert(pg_cache_descr_state & PG_CACHE_DESCR_ALLOCATED);
  524. if (pg_cache_descr->flags & RRD_PAGE_POPULATED) {
  525. pg_cache_reserve_pages(ctx, 1);
  526. if (!(pg_cache_descr->flags & RRD_PAGE_DIRTY))
  527. pg_cache_replaceQ_insert(ctx, descr);
  528. }
  529. }
  530. if (unlikely(NULL == index)) {
  531. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  532. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, descr->id, sizeof(uuid_t));
  533. fatal_assert(NULL != PValue);
  534. page_index = *PValue;
  535. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  536. } else {
  537. page_index = index;
  538. }
  539. uv_rwlock_wrlock(&page_index->lock);
  540. PValue = JudyLIns(&page_index->JudyL_array, (Word_t)(descr->start_time / USEC_PER_SEC), PJE0);
  541. *PValue = descr;
  542. ++page_index->page_count;
  543. pg_cache_add_new_metric_time(page_index, descr);
  544. uv_rwlock_wrunlock(&page_index->lock);
  545. uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
  546. ++ctx->stats.pg_cache_insertions;
  547. ++pg_cache->page_descriptors;
  548. uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
  549. }
  550. usec_t pg_cache_oldest_time_in_range(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time)
  551. {
  552. struct page_cache *pg_cache = &ctx->pg_cache;
  553. struct rrdeng_page_descr *descr = NULL;
  554. Pvoid_t *PValue;
  555. struct pg_cache_page_index *page_index = NULL;
  556. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  557. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t));
  558. if (likely(NULL != PValue)) {
  559. page_index = *PValue;
  560. }
  561. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  562. if (NULL == PValue) {
  563. return INVALID_TIME;
  564. }
  565. uv_rwlock_rdlock(&page_index->lock);
  566. descr = find_first_page_in_time_range(page_index, start_time, end_time);
  567. if (NULL == descr) {
  568. uv_rwlock_rdunlock(&page_index->lock);
  569. return INVALID_TIME;
  570. }
  571. uv_rwlock_rdunlock(&page_index->lock);
  572. return descr->start_time;
  573. }
  574. /**
  575. * Return page information for the first page before point_in_time that satisfies the filter.
  576. * @param ctx DB context
  577. * @param page_index page index of a metric
  578. * @param point_in_time the pages that are searched must be older than this timestamp
  579. * @param filter decides if the page satisfies the caller's criteria
  580. * @param page_info the result of the search is set in this pointer
  581. */
  582. void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_cache_page_index *page_index,
  583. usec_t point_in_time, pg_cache_page_info_filter_t *filter,
  584. struct rrdeng_page_info *page_info)
  585. {
  586. struct page_cache *pg_cache = &ctx->pg_cache;
  587. struct rrdeng_page_descr *descr = NULL;
  588. Pvoid_t *PValue;
  589. Word_t Index;
  590. (void)pg_cache;
  591. fatal_assert(NULL != page_index);
  592. Index = (Word_t)(point_in_time / USEC_PER_SEC);
  593. uv_rwlock_rdlock(&page_index->lock);
  594. do {
  595. PValue = JudyLPrev(page_index->JudyL_array, &Index, PJE0);
  596. descr = unlikely(NULL == PValue) ? NULL : *PValue;
  597. } while (descr != NULL && !filter(descr));
  598. if (unlikely(NULL == descr)) {
  599. page_info->page_length = 0;
  600. page_info->start_time = INVALID_TIME;
  601. page_info->end_time = INVALID_TIME;
  602. } else {
  603. page_info->page_length = descr->page_length;
  604. page_info->start_time = descr->start_time;
  605. page_info->end_time = descr->end_time;
  606. }
  607. uv_rwlock_rdunlock(&page_index->lock);
  608. }
  609. /**
  610. * Searches for an unallocated page without triggering disk I/O. Attempts to reserve the page and get a reference.
  611. * @param ctx DB context
  612. * @param id lookup by UUID
  613. * @param start_time exact starting time in usec
  614. * @param ret_page_indexp Sets the page index pointer (*ret_page_indexp) for the given UUID.
  615. * @return the page descriptor or NULL on failure. It can fail if:
  616. * 1. The page is already allocated to the page cache.
  617. * 2. It did not succeed to get a reference.
  618. * 3. It did not succeed to reserve a spot in the page cache.
  619. */
  620. struct rrdeng_page_descr *pg_cache_lookup_unpopulated_and_lock(struct rrdengine_instance *ctx, uuid_t *id,
  621. usec_t start_time)
  622. {
  623. struct page_cache *pg_cache = &ctx->pg_cache;
  624. struct rrdeng_page_descr *descr = NULL;
  625. struct page_cache_descr *pg_cache_descr = NULL;
  626. unsigned long flags;
  627. Pvoid_t *PValue;
  628. struct pg_cache_page_index *page_index = NULL;
  629. Word_t Index;
  630. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  631. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t));
  632. if (likely(NULL != PValue)) {
  633. page_index = *PValue;
  634. }
  635. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  636. if ((NULL == PValue) || !pg_cache_try_reserve_pages(ctx, 1)) {
  637. /* Failed to find page or failed to reserve a spot in the cache */
  638. return NULL;
  639. }
  640. uv_rwlock_rdlock(&page_index->lock);
  641. Index = (Word_t)(start_time / USEC_PER_SEC);
  642. PValue = JudyLGet(page_index->JudyL_array, Index, PJE0);
  643. if (likely(NULL != PValue)) {
  644. descr = *PValue;
  645. }
  646. if (NULL == PValue || 0 == descr->page_length) {
  647. /* Failed to find non-empty page */
  648. uv_rwlock_rdunlock(&page_index->lock);
  649. pg_cache_release_pages(ctx, 1);
  650. return NULL;
  651. }
  652. rrdeng_page_descr_mutex_lock(ctx, descr);
  653. pg_cache_descr = descr->pg_cache_descr;
  654. flags = pg_cache_descr->flags;
  655. uv_rwlock_rdunlock(&page_index->lock);
  656. if ((flags & RRD_PAGE_POPULATED) || !pg_cache_try_get_unsafe(descr, 1)) {
  657. /* Failed to get reference or page is already populated */
  658. rrdeng_page_descr_mutex_unlock(ctx, descr);
  659. pg_cache_release_pages(ctx, 1);
  660. return NULL;
  661. }
  662. /* success */
  663. rrdeng_page_descr_mutex_unlock(ctx, descr);
  664. rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1);
  665. return descr;
  666. }
  667. /**
  668. * Searches for pages in a time range and triggers disk I/O if necessary and possible.
  669. * Does not get a reference.
  670. * @param ctx DB context
  671. * @param id UUID
  672. * @param start_time inclusive starting time in usec
  673. * @param end_time inclusive ending time in usec
  674. * @param page_info_arrayp It allocates (*page_arrayp) and populates it with information of pages that overlap
  675. * with the time range [start_time,end_time]. The caller must free (*page_info_arrayp) with freez().
  676. * If page_info_arrayp is set to NULL nothing was allocated.
  677. * @param ret_page_indexp Sets the page index pointer (*ret_page_indexp) for the given UUID.
  678. * @return the number of pages that overlap with the time range [start_time,end_time].
  679. */
  680. unsigned pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time,
  681. struct rrdeng_page_info **page_info_arrayp, struct pg_cache_page_index **ret_page_indexp)
  682. {
  683. struct page_cache *pg_cache = &ctx->pg_cache;
  684. struct rrdeng_page_descr *descr = NULL, *preload_array[PAGE_CACHE_MAX_PRELOAD_PAGES];
  685. struct page_cache_descr *pg_cache_descr = NULL;
  686. unsigned i, j, k, preload_count, count, page_info_array_max_size;
  687. unsigned long flags;
  688. Pvoid_t *PValue;
  689. struct pg_cache_page_index *page_index = NULL;
  690. Word_t Index;
  691. uint8_t failed_to_reserve;
  692. fatal_assert(NULL != ret_page_indexp);
  693. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  694. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t));
  695. if (likely(NULL != PValue)) {
  696. *ret_page_indexp = page_index = *PValue;
  697. }
  698. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  699. if (NULL == PValue) {
  700. debug(D_RRDENGINE, "%s: No page was found to attempt preload.", __func__);
  701. *ret_page_indexp = NULL;
  702. return 0;
  703. }
  704. uv_rwlock_rdlock(&page_index->lock);
  705. descr = find_first_page_in_time_range(page_index, start_time, end_time);
  706. if (NULL == descr) {
  707. uv_rwlock_rdunlock(&page_index->lock);
  708. debug(D_RRDENGINE, "%s: No page was found to attempt preload.", __func__);
  709. *ret_page_indexp = NULL;
  710. return 0;
  711. } else {
  712. Index = (Word_t)(descr->start_time / USEC_PER_SEC);
  713. }
  714. if (page_info_arrayp) {
  715. page_info_array_max_size = PAGE_CACHE_MAX_PRELOAD_PAGES * sizeof(struct rrdeng_page_info);
  716. *page_info_arrayp = mallocz(page_info_array_max_size);
  717. }
  718. for (count = 0, preload_count = 0 ;
  719. descr != NULL && is_page_in_time_range(descr, start_time, end_time) ;
  720. PValue = JudyLNext(page_index->JudyL_array, &Index, PJE0),
  721. descr = unlikely(NULL == PValue) ? NULL : *PValue) {
  722. /* Iterate all pages in range */
  723. if (unlikely(0 == descr->page_length))
  724. continue;
  725. if (page_info_arrayp) {
  726. if (unlikely(count >= page_info_array_max_size / sizeof(struct rrdeng_page_info))) {
  727. page_info_array_max_size += PAGE_CACHE_MAX_PRELOAD_PAGES * sizeof(struct rrdeng_page_info);
  728. *page_info_arrayp = reallocz(*page_info_arrayp, page_info_array_max_size);
  729. }
  730. (*page_info_arrayp)[count].start_time = descr->start_time;
  731. (*page_info_arrayp)[count].end_time = descr->end_time;
  732. (*page_info_arrayp)[count].page_length = descr->page_length;
  733. }
  734. ++count;
  735. rrdeng_page_descr_mutex_lock(ctx, descr);
  736. pg_cache_descr = descr->pg_cache_descr;
  737. flags = pg_cache_descr->flags;
  738. if (pg_cache_can_get_unsafe(descr, 0)) {
  739. if (flags & RRD_PAGE_POPULATED) {
  740. /* success */
  741. rrdeng_page_descr_mutex_unlock(ctx, descr);
  742. debug(D_RRDENGINE, "%s: Page was found in memory.", __func__);
  743. continue;
  744. }
  745. }
  746. if (!(flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 1)) {
  747. preload_array[preload_count++] = descr;
  748. if (PAGE_CACHE_MAX_PRELOAD_PAGES == preload_count) {
  749. rrdeng_page_descr_mutex_unlock(ctx, descr);
  750. break;
  751. }
  752. }
  753. rrdeng_page_descr_mutex_unlock(ctx, descr);
  754. }
  755. uv_rwlock_rdunlock(&page_index->lock);
  756. failed_to_reserve = 0;
  757. for (i = 0 ; i < preload_count && !failed_to_reserve ; ++i) {
  758. struct rrdeng_cmd cmd;
  759. struct rrdeng_page_descr *next;
  760. descr = preload_array[i];
  761. if (NULL == descr) {
  762. continue;
  763. }
  764. if (!pg_cache_try_reserve_pages(ctx, 1)) {
  765. failed_to_reserve = 1;
  766. break;
  767. }
  768. cmd.opcode = RRDENG_READ_EXTENT;
  769. cmd.read_extent.page_cache_descr[0] = descr;
  770. /* don't use this page again */
  771. preload_array[i] = NULL;
  772. for (j = 0, k = 1 ; j < preload_count ; ++j) {
  773. next = preload_array[j];
  774. if (NULL == next) {
  775. continue;
  776. }
  777. if (descr->extent == next->extent) {
  778. /* same extent, consolidate */
  779. if (!pg_cache_try_reserve_pages(ctx, 1)) {
  780. failed_to_reserve = 1;
  781. break;
  782. }
  783. cmd.read_extent.page_cache_descr[k++] = next;
  784. /* don't use this page again */
  785. preload_array[j] = NULL;
  786. }
  787. }
  788. cmd.read_extent.page_count = k;
  789. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  790. }
  791. if (failed_to_reserve) {
  792. debug(D_RRDENGINE, "%s: Failed to reserve enough memory, canceling I/O.", __func__);
  793. for (i = 0 ; i < preload_count ; ++i) {
  794. descr = preload_array[i];
  795. if (NULL == descr) {
  796. continue;
  797. }
  798. pg_cache_put(ctx, descr);
  799. }
  800. }
  801. if (!preload_count) {
  802. /* no such page */
  803. debug(D_RRDENGINE, "%s: No page was eligible to attempt preload.", __func__);
  804. }
  805. if (unlikely(0 == count && page_info_arrayp)) {
  806. freez(*page_info_arrayp);
  807. *page_info_arrayp = NULL;
  808. }
  809. return count;
  810. }
  811. /*
  812. * Searches for a page and gets a reference.
  813. * When point_in_time is INVALID_TIME get any page.
  814. * If index is NULL lookup by UUID (id).
  815. */
  816. struct rrdeng_page_descr *
  817. pg_cache_lookup(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id,
  818. usec_t point_in_time)
  819. {
  820. struct page_cache *pg_cache = &ctx->pg_cache;
  821. struct rrdeng_page_descr *descr = NULL;
  822. struct page_cache_descr *pg_cache_descr = NULL;
  823. unsigned long flags;
  824. Pvoid_t *PValue;
  825. struct pg_cache_page_index *page_index = NULL;
  826. Word_t Index;
  827. uint8_t page_not_in_cache;
  828. if (unlikely(NULL == index)) {
  829. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  830. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t));
  831. if (likely(NULL != PValue)) {
  832. page_index = *PValue;
  833. }
  834. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  835. if (NULL == PValue) {
  836. return NULL;
  837. }
  838. } else {
  839. page_index = index;
  840. }
  841. pg_cache_reserve_pages(ctx, 1);
  842. page_not_in_cache = 0;
  843. uv_rwlock_rdlock(&page_index->lock);
  844. while (1) {
  845. Index = (Word_t)(point_in_time / USEC_PER_SEC);
  846. PValue = JudyLLast(page_index->JudyL_array, &Index, PJE0);
  847. if (likely(NULL != PValue)) {
  848. descr = *PValue;
  849. }
  850. if (NULL == PValue ||
  851. 0 == descr->page_length ||
  852. (INVALID_TIME != point_in_time &&
  853. !is_point_in_time_in_page(descr, point_in_time))) {
  854. /* non-empty page not found */
  855. uv_rwlock_rdunlock(&page_index->lock);
  856. pg_cache_release_pages(ctx, 1);
  857. return NULL;
  858. }
  859. rrdeng_page_descr_mutex_lock(ctx, descr);
  860. pg_cache_descr = descr->pg_cache_descr;
  861. flags = pg_cache_descr->flags;
  862. if ((flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 0)) {
  863. /* success */
  864. rrdeng_page_descr_mutex_unlock(ctx, descr);
  865. debug(D_RRDENGINE, "%s: Page was found in memory.", __func__);
  866. break;
  867. }
  868. if (!(flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 1)) {
  869. struct rrdeng_cmd cmd;
  870. uv_rwlock_rdunlock(&page_index->lock);
  871. cmd.opcode = RRDENG_READ_PAGE;
  872. cmd.read_page.page_cache_descr = descr;
  873. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  874. debug(D_RRDENGINE, "%s: Waiting for page to be asynchronously read from disk:", __func__);
  875. if(unlikely(debug_flags & D_RRDENGINE))
  876. print_page_cache_descr(descr);
  877. while (!(pg_cache_descr->flags & RRD_PAGE_POPULATED)) {
  878. pg_cache_wait_event_unsafe(descr);
  879. }
  880. /* success */
  881. /* Downgrade exclusive reference to allow other readers */
  882. pg_cache_descr->flags &= ~RRD_PAGE_LOCKED;
  883. pg_cache_wake_up_waiters_unsafe(descr);
  884. rrdeng_page_descr_mutex_unlock(ctx, descr);
  885. rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1);
  886. return descr;
  887. }
  888. uv_rwlock_rdunlock(&page_index->lock);
  889. debug(D_RRDENGINE, "%s: Waiting for page to be unlocked:", __func__);
  890. if(unlikely(debug_flags & D_RRDENGINE))
  891. print_page_cache_descr(descr);
  892. if (!(flags & RRD_PAGE_POPULATED))
  893. page_not_in_cache = 1;
  894. pg_cache_wait_event_unsafe(descr);
  895. rrdeng_page_descr_mutex_unlock(ctx, descr);
  896. /* reset scan to find again */
  897. uv_rwlock_rdlock(&page_index->lock);
  898. }
  899. uv_rwlock_rdunlock(&page_index->lock);
  900. if (!(flags & RRD_PAGE_DIRTY))
  901. pg_cache_replaceQ_set_hot(ctx, descr);
  902. pg_cache_release_pages(ctx, 1);
  903. if (page_not_in_cache)
  904. rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1);
  905. else
  906. rrd_stat_atomic_add(&ctx->stats.pg_cache_hits, 1);
  907. return descr;
  908. }
  909. /*
  910. * Searches for the first page between start_time and end_time and gets a reference.
  911. * start_time and end_time are inclusive.
  912. * If index is NULL lookup by UUID (id).
  913. */
  914. struct rrdeng_page_descr *
  915. pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id,
  916. usec_t start_time, usec_t end_time)
  917. {
  918. struct page_cache *pg_cache = &ctx->pg_cache;
  919. struct rrdeng_page_descr *descr = NULL;
  920. struct page_cache_descr *pg_cache_descr = NULL;
  921. unsigned long flags;
  922. Pvoid_t *PValue;
  923. struct pg_cache_page_index *page_index = NULL;
  924. uint8_t page_not_in_cache;
  925. if (unlikely(NULL == index)) {
  926. uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
  927. PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t));
  928. if (likely(NULL != PValue)) {
  929. page_index = *PValue;
  930. }
  931. uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
  932. if (NULL == PValue) {
  933. return NULL;
  934. }
  935. } else {
  936. page_index = index;
  937. }
  938. pg_cache_reserve_pages(ctx, 1);
  939. page_not_in_cache = 0;
  940. uv_rwlock_rdlock(&page_index->lock);
  941. while (1) {
  942. descr = find_first_page_in_time_range(page_index, start_time, end_time);
  943. if (NULL == descr || 0 == descr->page_length) {
  944. /* non-empty page not found */
  945. uv_rwlock_rdunlock(&page_index->lock);
  946. pg_cache_release_pages(ctx, 1);
  947. return NULL;
  948. }
  949. rrdeng_page_descr_mutex_lock(ctx, descr);
  950. pg_cache_descr = descr->pg_cache_descr;
  951. flags = pg_cache_descr->flags;
  952. if ((flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 0)) {
  953. /* success */
  954. rrdeng_page_descr_mutex_unlock(ctx, descr);
  955. debug(D_RRDENGINE, "%s: Page was found in memory.", __func__);
  956. break;
  957. }
  958. if (!(flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 1)) {
  959. struct rrdeng_cmd cmd;
  960. uv_rwlock_rdunlock(&page_index->lock);
  961. cmd.opcode = RRDENG_READ_PAGE;
  962. cmd.read_page.page_cache_descr = descr;
  963. rrdeng_enq_cmd(&ctx->worker_config, &cmd);
  964. debug(D_RRDENGINE, "%s: Waiting for page to be asynchronously read from disk:", __func__);
  965. if(unlikely(debug_flags & D_RRDENGINE))
  966. print_page_cache_descr(descr);
  967. while (!(pg_cache_descr->flags & RRD_PAGE_POPULATED)) {
  968. pg_cache_wait_event_unsafe(descr);
  969. }
  970. /* success */
  971. /* Downgrade exclusive reference to allow other readers */
  972. pg_cache_descr->flags &= ~RRD_PAGE_LOCKED;
  973. pg_cache_wake_up_waiters_unsafe(descr);
  974. rrdeng_page_descr_mutex_unlock(ctx, descr);
  975. rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1);
  976. return descr;
  977. }
  978. uv_rwlock_rdunlock(&page_index->lock);
  979. debug(D_RRDENGINE, "%s: Waiting for page to be unlocked:", __func__);
  980. if(unlikely(debug_flags & D_RRDENGINE))
  981. print_page_cache_descr(descr);
  982. if (!(flags & RRD_PAGE_POPULATED))
  983. page_not_in_cache = 1;
  984. pg_cache_wait_event_unsafe(descr);
  985. rrdeng_page_descr_mutex_unlock(ctx, descr);
  986. /* reset scan to find again */
  987. uv_rwlock_rdlock(&page_index->lock);
  988. }
  989. uv_rwlock_rdunlock(&page_index->lock);
  990. if (!(flags & RRD_PAGE_DIRTY))
  991. pg_cache_replaceQ_set_hot(ctx, descr);
  992. pg_cache_release_pages(ctx, 1);
  993. if (page_not_in_cache)
  994. rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1);
  995. else
  996. rrd_stat_atomic_add(&ctx->stats.pg_cache_hits, 1);
  997. return descr;
  998. }
  999. struct pg_cache_page_index *create_page_index(uuid_t *id)
  1000. {
  1001. struct pg_cache_page_index *page_index;
  1002. page_index = mallocz(sizeof(*page_index));
  1003. page_index->JudyL_array = (Pvoid_t) NULL;
  1004. uuid_copy(page_index->id, *id);
  1005. fatal_assert(0 == uv_rwlock_init(&page_index->lock));
  1006. page_index->oldest_time = INVALID_TIME;
  1007. page_index->latest_time = INVALID_TIME;
  1008. page_index->prev = NULL;
  1009. page_index->page_count = 0;
  1010. page_index->writers = 0;
  1011. return page_index;
  1012. }
  1013. static void init_metrics_index(struct rrdengine_instance *ctx)
  1014. {
  1015. struct page_cache *pg_cache = &ctx->pg_cache;
  1016. pg_cache->metrics_index.JudyHS_array = (Pvoid_t) NULL;
  1017. pg_cache->metrics_index.last_page_index = NULL;
  1018. fatal_assert(0 == uv_rwlock_init(&pg_cache->metrics_index.lock));
  1019. }
  1020. static void init_replaceQ(struct rrdengine_instance *ctx)
  1021. {
  1022. struct page_cache *pg_cache = &ctx->pg_cache;
  1023. pg_cache->replaceQ.head = NULL;
  1024. pg_cache->replaceQ.tail = NULL;
  1025. fatal_assert(0 == uv_rwlock_init(&pg_cache->replaceQ.lock));
  1026. }
  1027. static void init_committed_page_index(struct rrdengine_instance *ctx)
  1028. {
  1029. struct page_cache *pg_cache = &ctx->pg_cache;
  1030. pg_cache->committed_page_index.JudyL_array = (Pvoid_t) NULL;
  1031. fatal_assert(0 == uv_rwlock_init(&pg_cache->committed_page_index.lock));
  1032. pg_cache->committed_page_index.latest_corr_id = 0;
  1033. pg_cache->committed_page_index.nr_committed_pages = 0;
  1034. }
  1035. void init_page_cache(struct rrdengine_instance *ctx)
  1036. {
  1037. struct page_cache *pg_cache = &ctx->pg_cache;
  1038. pg_cache->page_descriptors = 0;
  1039. pg_cache->populated_pages = 0;
  1040. fatal_assert(0 == uv_rwlock_init(&pg_cache->pg_cache_rwlock));
  1041. init_metrics_index(ctx);
  1042. init_replaceQ(ctx);
  1043. init_committed_page_index(ctx);
  1044. }
  1045. void free_page_cache(struct rrdengine_instance *ctx)
  1046. {
  1047. struct page_cache *pg_cache = &ctx->pg_cache;
  1048. Word_t ret_Judy, bytes_freed = 0;
  1049. Pvoid_t *PValue;
  1050. struct pg_cache_page_index *page_index, *prev_page_index;
  1051. Word_t Index;
  1052. struct rrdeng_page_descr *descr;
  1053. struct page_cache_descr *pg_cache_descr;
  1054. /* Free committed page index */
  1055. ret_Judy = JudyLFreeArray(&pg_cache->committed_page_index.JudyL_array, PJE0);
  1056. fatal_assert(NULL == pg_cache->committed_page_index.JudyL_array);
  1057. bytes_freed += ret_Judy;
  1058. for (page_index = pg_cache->metrics_index.last_page_index ;
  1059. page_index != NULL ;
  1060. page_index = prev_page_index) {
  1061. prev_page_index = page_index->prev;
  1062. /* Find first page in range */
  1063. Index = (Word_t) 0;
  1064. PValue = JudyLFirst(page_index->JudyL_array, &Index, PJE0);
  1065. descr = unlikely(NULL == PValue) ? NULL : *PValue;
  1066. while (descr != NULL) {
  1067. /* Iterate all page descriptors of this metric */
  1068. if (descr->pg_cache_descr_state & PG_CACHE_DESCR_ALLOCATED) {
  1069. /* Check rrdenglocking.c */
  1070. pg_cache_descr = descr->pg_cache_descr;
  1071. if (pg_cache_descr->flags & RRD_PAGE_POPULATED) {
  1072. freez(pg_cache_descr->page);
  1073. bytes_freed += RRDENG_BLOCK_SIZE;
  1074. }
  1075. rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr);
  1076. bytes_freed += sizeof(*pg_cache_descr);
  1077. }
  1078. freez(descr);
  1079. bytes_freed += sizeof(*descr);
  1080. PValue = JudyLNext(page_index->JudyL_array, &Index, PJE0);
  1081. descr = unlikely(NULL == PValue) ? NULL : *PValue;
  1082. }
  1083. /* Free page index */
  1084. ret_Judy = JudyLFreeArray(&page_index->JudyL_array, PJE0);
  1085. fatal_assert(NULL == page_index->JudyL_array);
  1086. bytes_freed += ret_Judy;
  1087. freez(page_index);
  1088. bytes_freed += sizeof(*page_index);
  1089. }
  1090. /* Free metrics index */
  1091. ret_Judy = JudyHSFreeArray(&pg_cache->metrics_index.JudyHS_array, PJE0);
  1092. fatal_assert(NULL == pg_cache->metrics_index.JudyHS_array);
  1093. bytes_freed += ret_Judy;
  1094. info("Freed %lu bytes of memory from page cache.", bytes_freed);
  1095. }