pagecache.h 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #ifndef NETDATA_PAGECACHE_H
  3. #define NETDATA_PAGECACHE_H
  4. #include "rrdengine.h"
  5. /* Forward declarations */
  6. struct rrdengine_instance;
  7. struct extent_info;
  8. struct rrdeng_page_descr;
  9. #define INVALID_TIME (0)
  10. #define MAX_PAGE_CACHE_FETCH_RETRIES (3)
  11. #define PAGE_CACHE_FETCH_WAIT_TIMEOUT (3)
  12. /* Page flags */
  13. #define RRD_PAGE_DIRTY (1LU << 0)
  14. #define RRD_PAGE_LOCKED (1LU << 1)
  15. #define RRD_PAGE_READ_PENDING (1LU << 2)
  16. #define RRD_PAGE_WRITE_PENDING (1LU << 3)
  17. #define RRD_PAGE_POPULATED (1LU << 4)
  18. struct page_cache_descr {
  19. struct rrdeng_page_descr *descr; /* parent descriptor */
  20. void *page;
  21. unsigned long flags;
  22. struct page_cache_descr *prev; /* LRU */
  23. struct page_cache_descr *next; /* LRU */
  24. unsigned refcnt;
  25. uv_mutex_t mutex; /* always take it after the page cache lock or after the commit lock */
  26. uv_cond_t cond;
  27. unsigned waiters;
  28. };
  29. /* Page cache descriptor flags, state = 0 means no descriptor */
  30. #define PG_CACHE_DESCR_ALLOCATED (1LU << 0)
  31. #define PG_CACHE_DESCR_DESTROY (1LU << 1)
  32. #define PG_CACHE_DESCR_LOCKED (1LU << 2)
  33. #define PG_CACHE_DESCR_SHIFT (3)
  34. #define PG_CACHE_DESCR_USERS_MASK (((unsigned long)-1) << PG_CACHE_DESCR_SHIFT)
  35. #define PG_CACHE_DESCR_FLAGS_MASK (((unsigned long)-1) >> (BITS_PER_ULONG - PG_CACHE_DESCR_SHIFT))
  36. /*
  37. * Page cache descriptor state bits (works for both 32-bit and 64-bit architectures):
  38. *
  39. * 63 ... 31 ... 3 | 2 | 1 | 0|
  40. * -----------------------------+------------+------------+-----------|
  41. * number of descriptor users | DESTROY | LOCKED | ALLOCATED |
  42. */
  43. struct rrdeng_page_descr {
  44. uuid_t *id; /* never changes */
  45. struct extent_info *extent;
  46. /* points to ephemeral page cache descriptor if the page resides in the cache */
  47. struct page_cache_descr *pg_cache_descr;
  48. /* Compare-And-Swap target for page cache descriptor allocation algorithm */
  49. volatile unsigned long pg_cache_descr_state;
  50. /* page information */
  51. usec_t start_time;
  52. usec_t end_time;
  53. uint32_t page_length;
  54. uint8_t type;
  55. };
  56. #define PAGE_INFO_SCRATCH_SZ (8)
  57. struct rrdeng_page_info {
  58. uint8_t scratch[PAGE_INFO_SCRATCH_SZ]; /* scratch area to be used by page-cache users */
  59. usec_t start_time;
  60. usec_t end_time;
  61. uint32_t page_length;
  62. };
  63. /* returns 1 for success, 0 for failure */
  64. typedef int pg_cache_page_info_filter_t(struct rrdeng_page_descr *);
  65. #define PAGE_CACHE_MAX_PRELOAD_PAGES (256)
  66. /* maps time ranges to pages */
  67. struct pg_cache_page_index {
  68. uuid_t id;
  69. /*
  70. * care: JudyL_array indices are converted from useconds to seconds to fit in one word in 32-bit architectures
  71. * TODO: examine if we want to support better granularity than seconds
  72. */
  73. Pvoid_t JudyL_array;
  74. Word_t page_count;
  75. unsigned short writers;
  76. uv_rwlock_t lock;
  77. /*
  78. * Only one effective writer, data deletion workqueue.
  79. * It's also written during the DB loading phase.
  80. */
  81. usec_t oldest_time;
  82. /*
  83. * Only one effective writer, data collection thread.
  84. * It's also written by the data deletion workqueue when data collection is disabled for this metric.
  85. */
  86. usec_t latest_time;
  87. struct pg_cache_page_index *prev;
  88. };
  89. /* maps UUIDs to page indices */
  90. struct pg_cache_metrics_index {
  91. uv_rwlock_t lock;
  92. Pvoid_t JudyHS_array;
  93. struct pg_cache_page_index *last_page_index;
  94. };
  95. /* gathers dirty pages to be written on disk */
  96. struct pg_cache_committed_page_index {
  97. uv_rwlock_t lock;
  98. Pvoid_t JudyL_array;
  99. /*
  100. * Dirty page correlation ID is a hint. Dirty pages that are correlated should have
  101. * a small correlation ID difference. Dirty pages in memory should never have the
  102. * same ID at the same time for correctness.
  103. */
  104. Word_t latest_corr_id;
  105. unsigned nr_committed_pages;
  106. };
  107. /*
  108. * Gathers populated pages to be evicted.
  109. * Relies on page cache descriptors being there as it uses their memory.
  110. */
  111. struct pg_cache_replaceQ {
  112. uv_rwlock_t lock; /* LRU lock */
  113. struct page_cache_descr *head; /* LRU */
  114. struct page_cache_descr *tail; /* MRU */
  115. };
  116. struct page_cache { /* TODO: add statistics */
  117. uv_rwlock_t pg_cache_rwlock; /* page cache lock */
  118. struct pg_cache_metrics_index metrics_index;
  119. struct pg_cache_committed_page_index committed_page_index;
  120. struct pg_cache_replaceQ replaceQ;
  121. unsigned page_descriptors;
  122. unsigned populated_pages;
  123. };
  124. extern void pg_cache_wake_up_waiters_unsafe(struct rrdeng_page_descr *descr);
  125. extern void pg_cache_wake_up_waiters(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr);
  126. extern void pg_cache_wait_event_unsafe(struct rrdeng_page_descr *descr);
  127. extern unsigned long pg_cache_wait_event(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr);
  128. extern void pg_cache_replaceQ_insert(struct rrdengine_instance *ctx,
  129. struct rrdeng_page_descr *descr);
  130. extern void pg_cache_replaceQ_delete(struct rrdengine_instance *ctx,
  131. struct rrdeng_page_descr *descr);
  132. extern void pg_cache_replaceQ_set_hot(struct rrdengine_instance *ctx,
  133. struct rrdeng_page_descr *descr);
  134. extern struct rrdeng_page_descr *pg_cache_create_descr(void);
  135. extern int pg_cache_try_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access);
  136. extern void pg_cache_put_unsafe(struct rrdeng_page_descr *descr);
  137. extern void pg_cache_put(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr);
  138. extern void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index *index,
  139. struct rrdeng_page_descr *descr);
  140. extern uint8_t pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr,
  141. uint8_t remove_dirty, uint8_t is_exclusive_holder, uuid_t *metric_id);
  142. extern usec_t pg_cache_oldest_time_in_range(struct rrdengine_instance *ctx, uuid_t *id,
  143. usec_t start_time, usec_t end_time);
  144. extern void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_cache_page_index *page_index,
  145. usec_t point_in_time, pg_cache_page_info_filter_t *filter,
  146. struct rrdeng_page_info *page_info);
  147. extern struct rrdeng_page_descr *pg_cache_lookup_unpopulated_and_lock(struct rrdengine_instance *ctx, uuid_t *id,
  148. usec_t start_time);
  149. extern unsigned
  150. pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time,
  151. struct rrdeng_page_info **page_info_arrayp, struct pg_cache_page_index **ret_page_indexp);
  152. extern struct rrdeng_page_descr *
  153. pg_cache_lookup(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id,
  154. usec_t point_in_time);
  155. extern struct rrdeng_page_descr *
  156. pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id,
  157. usec_t start_time, usec_t end_time);
  158. extern struct pg_cache_page_index *create_page_index(uuid_t *id);
  159. extern void init_page_cache(struct rrdengine_instance *ctx);
  160. extern void free_page_cache(struct rrdengine_instance *ctx);
  161. extern void pg_cache_add_new_metric_time(struct pg_cache_page_index *page_index, struct rrdeng_page_descr *descr);
  162. extern void pg_cache_update_metric_times(struct pg_cache_page_index *page_index);
  163. extern unsigned long pg_cache_hard_limit(struct rrdengine_instance *ctx);
  164. extern unsigned long pg_cache_soft_limit(struct rrdengine_instance *ctx);
  165. extern unsigned long pg_cache_committed_hard_limit(struct rrdengine_instance *ctx);
  166. extern void rrdeng_page_descr_aral_go_singlethreaded(void);
  167. extern void rrdeng_page_descr_aral_go_multithreaded(void);
  168. extern void rrdeng_page_descr_use_malloc(void);
  169. extern void rrdeng_page_descr_use_mmap(void);
  170. extern bool rrdeng_page_descr_is_mmap(void);
  171. extern struct rrdeng_page_descr *rrdeng_page_descr_mallocz(void);
  172. extern void rrdeng_page_descr_freez(struct rrdeng_page_descr *descr);
  173. static inline void
  174. pg_cache_atomic_get_pg_info(struct rrdeng_page_descr *descr, usec_t *end_timep, uint32_t *page_lengthp)
  175. {
  176. usec_t end_time, old_end_time;
  177. uint32_t page_length;
  178. if (NULL == descr->extent) {
  179. /* this page is currently being modified, get consistent info locklessly */
  180. do {
  181. end_time = descr->end_time;
  182. __sync_synchronize();
  183. old_end_time = end_time;
  184. page_length = descr->page_length;
  185. __sync_synchronize();
  186. end_time = descr->end_time;
  187. __sync_synchronize();
  188. } while ((end_time != old_end_time || (end_time & 1) != 0));
  189. *end_timep = end_time;
  190. *page_lengthp = page_length;
  191. } else {
  192. *end_timep = descr->end_time;
  193. *page_lengthp = descr->page_length;
  194. }
  195. }
  196. /* The caller must hold a reference to the page and must have already set the new data */
  197. static inline void pg_cache_atomic_set_pg_info(struct rrdeng_page_descr *descr, usec_t end_time, uint32_t page_length)
  198. {
  199. fatal_assert(!(end_time & 1));
  200. __sync_synchronize();
  201. descr->end_time |= 1; /* mark start of uncertainty period by adding 1 microsecond */
  202. __sync_synchronize();
  203. descr->page_length = page_length;
  204. __sync_synchronize();
  205. descr->end_time = end_time; /* mark end of uncertainty period */
  206. }
  207. #endif /* NETDATA_PAGECACHE_H */