rrdengine.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #ifndef NETDATA_RRDENGINE_H
  3. #define NETDATA_RRDENGINE_H
  4. #ifndef _GNU_SOURCE
  5. #define _GNU_SOURCE
  6. #endif
  7. #include <fcntl.h>
  8. #include <lz4.h>
  9. #include <Judy.h>
  10. #include <openssl/sha.h>
  11. #include <openssl/evp.h>
  12. #include "daemon/common.h"
  13. #include "../rrd.h"
  14. #include "rrddiskprotocol.h"
  15. #include "rrdenginelib.h"
  16. #include "datafile.h"
  17. #include "journalfile.h"
  18. #include "rrdengineapi.h"
  19. #include "pagecache.h"
  20. #include "metric.h"
  21. #include "cache.h"
  22. #include "pdc.h"
  23. extern unsigned rrdeng_pages_per_extent;
  24. /* Forward declarations */
  25. struct rrdengine_instance;
  26. struct rrdeng_cmd;
  27. #define MAX_PAGES_PER_EXTENT (64) /* TODO: can go higher only when journal supports bigger than 4KiB transactions */
  28. #define RRDENG_FILE_NUMBER_SCAN_TMPL "%1u-%10u"
  29. #define RRDENG_FILE_NUMBER_PRINT_TMPL "%1.1u-%10.10u"
  30. typedef struct page_details_control {
  31. struct rrdengine_instance *ctx;
  32. struct metric *metric;
  33. struct completion prep_completion;
  34. struct completion page_completion; // sync between the query thread and the workers
  35. Pvoid_t page_list_JudyL; // the list of page details
  36. unsigned completed_jobs; // the number of jobs completed last time the query thread checked
  37. bool workers_should_stop; // true when the query thread left and the workers should stop
  38. bool prep_done;
  39. SPINLOCK refcount_spinlock; // spinlock to protect refcount
  40. int32_t refcount; // the number of workers currently working on this request + 1 for the query thread
  41. size_t executed_with_gaps;
  42. time_t start_time_s;
  43. time_t end_time_s;
  44. STORAGE_PRIORITY priority;
  45. time_t optimal_end_time_s;
  46. } PDC;
  47. PDC *pdc_get(void);
  48. typedef enum __attribute__ ((__packed__)) {
  49. // final status for all pages
  50. // if a page does not have one of these, it is considered unroutable
  51. PDC_PAGE_READY = (1 << 0), // ready to be processed (pd->page is not null)
  52. PDC_PAGE_FAILED = (1 << 1), // failed to be loaded (pd->page is null)
  53. PDC_PAGE_SKIP = (1 << 2), // don't use this page, it is not good for us
  54. PDC_PAGE_INVALID = (1 << 3), // don't use this page, it is invalid
  55. PDC_PAGE_EMPTY = (1 << 4), // the page is empty, does not have any data
  56. // other statuses for tracking issues
  57. PDC_PAGE_PREPROCESSED = (1 << 5), // used during preprocessing
  58. PDC_PAGE_PROCESSED = (1 << 6), // processed by the query caller
  59. PDC_PAGE_RELEASED = (1 << 7), // already released
  60. // data found in cache (preloaded) or on disk?
  61. PDC_PAGE_PRELOADED = (1 << 8), // data found in memory
  62. PDC_PAGE_DISK_PENDING = (1 << 9), // data need to be loaded from disk
  63. // worker related statuses
  64. PDC_PAGE_FAILED_INVALID_EXTENT = (1 << 10),
  65. PDC_PAGE_FAILED_NOT_IN_EXTENT = (1 << 11),
  66. PDC_PAGE_FAILED_TO_MAP_EXTENT = (1 << 12),
  67. PDC_PAGE_FAILED_TO_ACQUIRE_DATAFILE= (1 << 13),
  68. PDC_PAGE_EXTENT_FROM_CACHE = (1 << 14),
  69. PDC_PAGE_EXTENT_FROM_DISK = (1 << 15),
  70. PDC_PAGE_CANCELLED = (1 << 16), // the query thread had left when we try to load the page
  71. PDC_PAGE_SOURCE_MAIN_CACHE = (1 << 17),
  72. PDC_PAGE_SOURCE_OPEN_CACHE = (1 << 18),
  73. PDC_PAGE_SOURCE_JOURNAL_V2 = (1 << 19),
  74. PDC_PAGE_PRELOADED_PASS4 = (1 << 20),
  75. // datafile acquired
  76. PDC_PAGE_DATAFILE_ACQUIRED = (1 << 30),
  77. } PDC_PAGE_STATUS;
  78. #define PDC_PAGE_QUERY_GLOBAL_SKIP_LIST (PDC_PAGE_FAILED | PDC_PAGE_SKIP | PDC_PAGE_INVALID | PDC_PAGE_RELEASED)
  79. struct page_details {
  80. struct {
  81. struct rrdengine_datafile *ptr;
  82. uv_file file;
  83. unsigned fileno;
  84. struct {
  85. uint64_t pos;
  86. uint32_t bytes;
  87. } extent;
  88. } datafile;
  89. struct pgc_page *page;
  90. Word_t metric_id;
  91. time_t first_time_s;
  92. time_t last_time_s;
  93. uint32_t update_every_s;
  94. uint16_t page_length;
  95. PDC_PAGE_STATUS status;
  96. struct {
  97. struct page_details *prev;
  98. struct page_details *next;
  99. } load;
  100. };
  101. struct page_details *page_details_get(void);
  102. #define pdc_page_status_check(pd, flag) (__atomic_load_n(&((pd)->status), __ATOMIC_ACQUIRE) & (flag))
  103. #define pdc_page_status_set(pd, flag) __atomic_or_fetch(&((pd)->status), flag, __ATOMIC_RELEASE)
  104. #define pdc_page_status_clear(pd, flag) __atomic_and_fetch(&((od)->status), ~(flag), __ATOMIC_RELEASE)
  105. struct jv2_extents_info {
  106. size_t index;
  107. uint64_t pos;
  108. unsigned bytes;
  109. size_t number_of_pages;
  110. };
  111. struct jv2_metrics_info {
  112. uuid_t *uuid;
  113. uint32_t page_list_header;
  114. time_t first_time_s;
  115. time_t last_time_s;
  116. size_t number_of_pages;
  117. Pvoid_t JudyL_pages_by_start_time;
  118. };
  119. struct jv2_page_info {
  120. time_t start_time_s;
  121. time_t end_time_s;
  122. time_t update_every_s;
  123. size_t page_length;
  124. uint32_t extent_index;
  125. void *custom_data;
  126. // private
  127. struct pgc_page *page;
  128. };
  129. typedef enum __attribute__ ((__packed__)) {
  130. RRDENG_1ST_METRIC_WRITER = (1 << 0),
  131. } RRDENG_COLLECT_HANDLE_OPTIONS;
  132. typedef enum __attribute__ ((__packed__)) {
  133. RRDENG_PAGE_PAST_COLLECTION = (1 << 0),
  134. RRDENG_PAGE_REPEATED_COLLECTION = (1 << 1),
  135. RRDENG_PAGE_BIG_GAP = (1 << 2),
  136. RRDENG_PAGE_GAP = (1 << 3),
  137. RRDENG_PAGE_FUTURE_POINT = (1 << 4),
  138. RRDENG_PAGE_CREATED_IN_FUTURE = (1 << 5),
  139. RRDENG_PAGE_COMPLETED_IN_FUTURE = (1 << 6),
  140. RRDENG_PAGE_UNALIGNED = (1 << 7),
  141. RRDENG_PAGE_CONFLICT = (1 << 8),
  142. RRDENG_PAGE_FULL = (1 << 9),
  143. RRDENG_PAGE_COLLECT_FINALIZE = (1 << 10),
  144. RRDENG_PAGE_UPDATE_EVERY_CHANGE = (1 << 11),
  145. RRDENG_PAGE_STEP_TOO_SMALL = (1 << 12),
  146. RRDENG_PAGE_STEP_UNALIGNED = (1 << 13),
  147. } RRDENG_COLLECT_PAGE_FLAGS;
  148. struct rrdeng_collect_handle {
  149. struct storage_collect_handle common; // has to be first item
  150. RRDENG_COLLECT_PAGE_FLAGS page_flags;
  151. RRDENG_COLLECT_HANDLE_OPTIONS options;
  152. uint8_t type;
  153. struct metric *metric;
  154. struct pgc_page *page;
  155. void *data;
  156. size_t data_size;
  157. struct pg_alignment *alignment;
  158. uint32_t page_entries_max;
  159. uint32_t page_position; // keep track of the current page size, to make sure we don't exceed it
  160. usec_t page_start_time_ut;
  161. usec_t page_end_time_ut;
  162. usec_t update_every_ut;
  163. };
  164. struct rrdeng_query_handle {
  165. struct metric *metric;
  166. struct pgc_page *page;
  167. struct rrdengine_instance *ctx;
  168. storage_number *metric_data;
  169. struct page_details_control *pdc;
  170. // the request
  171. time_t start_time_s;
  172. time_t end_time_s;
  173. STORAGE_PRIORITY priority;
  174. // internal data
  175. time_t now_s;
  176. time_t dt_s;
  177. unsigned position;
  178. unsigned entries;
  179. #ifdef NETDATA_INTERNAL_CHECKS
  180. usec_t started_time_s;
  181. pid_t query_pid;
  182. struct rrdeng_query_handle *prev, *next;
  183. #endif
  184. };
  185. struct rrdeng_query_handle *rrdeng_query_handle_get(void);
  186. void rrdeng_query_handle_release(struct rrdeng_query_handle *handle);
  187. enum rrdeng_opcode {
  188. /* can be used to return empty status or flush the command queue */
  189. RRDENG_OPCODE_NOOP = 0,
  190. RRDENG_OPCODE_QUERY,
  191. RRDENG_OPCODE_EXTENT_WRITE,
  192. RRDENG_OPCODE_EXTENT_READ,
  193. RRDENG_OPCODE_FLUSHED_TO_OPEN,
  194. RRDENG_OPCODE_DATABASE_ROTATE,
  195. RRDENG_OPCODE_JOURNAL_INDEX,
  196. RRDENG_OPCODE_FLUSH_INIT,
  197. RRDENG_OPCODE_EVICT_INIT,
  198. RRDENG_OPCODE_CTX_SHUTDOWN,
  199. RRDENG_OPCODE_CTX_QUIESCE,
  200. RRDENG_OPCODE_CTX_POPULATE_MRG,
  201. RRDENG_OPCODE_CLEANUP,
  202. RRDENG_OPCODE_MAX
  203. };
  204. // WORKERS IDS:
  205. // RRDENG_MAX_OPCODE : reserved for the cleanup
  206. // RRDENG_MAX_OPCODE + opcode : reserved for the callbacks of each opcode
  207. // RRDENG_MAX_OPCODE + RRDENG_MAX_OPCODE : reserved for the timer
  208. #define RRDENG_TIMER_CB (RRDENG_OPCODE_MAX + RRDENG_OPCODE_MAX)
  209. #define RRDENG_FLUSH_TRANSACTION_BUFFER_CB (RRDENG_TIMER_CB + 1)
  210. #define RRDENG_OPCODES_WAITING (RRDENG_TIMER_CB + 2)
  211. #define RRDENG_WORKS_DISPATCHED (RRDENG_TIMER_CB + 3)
  212. #define RRDENG_WORKS_EXECUTING (RRDENG_TIMER_CB + 4)
  213. struct extent_io_data {
  214. unsigned fileno;
  215. uv_file file;
  216. uint64_t pos;
  217. unsigned bytes;
  218. uint16_t page_length;
  219. };
  220. struct extent_io_descriptor {
  221. struct rrdengine_instance *ctx;
  222. uv_fs_t uv_fs_request;
  223. uv_buf_t iov;
  224. uv_file file;
  225. void *buf;
  226. struct wal *wal;
  227. uint64_t pos;
  228. unsigned bytes;
  229. struct completion *completion;
  230. unsigned descr_count;
  231. struct page_descr_with_data *descr_array[MAX_PAGES_PER_EXTENT];
  232. struct rrdengine_datafile *datafile;
  233. struct extent_io_descriptor *next; /* multiple requests to be served by the same cached extent */
  234. };
  235. struct generic_io_descriptor {
  236. struct rrdengine_instance *ctx;
  237. uv_fs_t req;
  238. uv_buf_t iov;
  239. void *buf;
  240. void *data;
  241. uint64_t pos;
  242. unsigned bytes;
  243. struct completion *completion;
  244. };
  245. typedef struct wal {
  246. uint64_t transaction_id;
  247. void *buf;
  248. size_t size;
  249. size_t buf_size;
  250. struct generic_io_descriptor io_descr;
  251. struct {
  252. struct wal *prev;
  253. struct wal *next;
  254. } cache;
  255. } WAL;
  256. WAL *wal_get(struct rrdengine_instance *ctx, unsigned size);
  257. void wal_release(WAL *wal);
  258. /*
  259. * Debug statistics not used by code logic.
  260. * They only describe operations since DB engine instance load time.
  261. */
  262. struct rrdengine_statistics {
  263. rrdeng_stats_t before_decompress_bytes;
  264. rrdeng_stats_t after_decompress_bytes;
  265. rrdeng_stats_t before_compress_bytes;
  266. rrdeng_stats_t after_compress_bytes;
  267. rrdeng_stats_t io_write_bytes;
  268. rrdeng_stats_t io_write_requests;
  269. rrdeng_stats_t io_read_bytes;
  270. rrdeng_stats_t io_read_requests;
  271. rrdeng_stats_t datafile_creations;
  272. rrdeng_stats_t datafile_deletions;
  273. rrdeng_stats_t journalfile_creations;
  274. rrdeng_stats_t journalfile_deletions;
  275. rrdeng_stats_t io_errors;
  276. rrdeng_stats_t fs_errors;
  277. };
  278. /* I/O errors global counter */
  279. extern rrdeng_stats_t global_io_errors;
  280. /* File-System errors global counter */
  281. extern rrdeng_stats_t global_fs_errors;
  282. /* number of File-Descriptors that have been reserved by dbengine */
  283. extern rrdeng_stats_t rrdeng_reserved_file_descriptors;
  284. /* inability to flush global counters */
  285. extern rrdeng_stats_t global_pg_cache_over_half_dirty_events;
  286. extern rrdeng_stats_t global_flushing_pressure_page_deletions; /* number of deleted pages */
  287. struct rrdengine_instance {
  288. struct {
  289. bool legacy; // true when the db is autonomous for a single host
  290. int tier; // the tier of this ctx
  291. uint8_t page_type; // default page type for this context
  292. uint64_t max_disk_space; // the max disk space this ctx is allowed to use
  293. uint8_t global_compress_alg; // the wanted compression algorithm
  294. char dbfiles_path[FILENAME_MAX + 1];
  295. } config;
  296. struct {
  297. uv_rwlock_t rwlock; // the linked list of datafiles is protected by this lock
  298. struct rrdengine_datafile *first; // oldest - the newest with ->first->prev
  299. } datafiles;
  300. struct {
  301. unsigned last_fileno; // newest index of datafile and journalfile
  302. unsigned last_flush_fileno; // newest index of datafile received data
  303. size_t collectors_running;
  304. size_t collectors_running_duplicate;
  305. size_t inflight_queries; // the number of queries currently running
  306. uint64_t current_disk_space; // the current disk space size used
  307. uint64_t transaction_id; // the transaction id of the next extent flushing
  308. bool migration_to_v2_running;
  309. bool now_deleting_files;
  310. unsigned extents_currently_being_flushed; // non-zero until we commit data to disk (both datafile and journal file)
  311. } atomic;
  312. struct {
  313. bool exit_mode;
  314. bool enabled; // when set (before shutdown), queries are prohibited
  315. struct completion completion;
  316. } quiesce;
  317. struct {
  318. struct {
  319. size_t size;
  320. struct completion *array;
  321. } populate_mrg;
  322. bool create_new_datafile_pair;
  323. } loading;
  324. struct rrdengine_statistics stats;
  325. };
  326. #define ctx_current_disk_space_get(ctx) __atomic_load_n(&(ctx)->atomic.current_disk_space, __ATOMIC_RELAXED)
  327. #define ctx_current_disk_space_increase(ctx, size) __atomic_add_fetch(&(ctx)->atomic.current_disk_space, size, __ATOMIC_RELAXED)
  328. #define ctx_current_disk_space_decrease(ctx, size) __atomic_sub_fetch(&(ctx)->atomic.current_disk_space, size, __ATOMIC_RELAXED)
  329. static inline void ctx_io_read_op_bytes(struct rrdengine_instance *ctx, size_t bytes) {
  330. __atomic_add_fetch(&ctx->stats.io_read_bytes, bytes, __ATOMIC_RELAXED);
  331. __atomic_add_fetch(&ctx->stats.io_read_requests, 1, __ATOMIC_RELAXED);
  332. }
  333. static inline void ctx_io_write_op_bytes(struct rrdengine_instance *ctx, size_t bytes) {
  334. __atomic_add_fetch(&ctx->stats.io_write_bytes, bytes, __ATOMIC_RELAXED);
  335. __atomic_add_fetch(&ctx->stats.io_write_requests, 1, __ATOMIC_RELAXED);
  336. }
  337. static inline void ctx_io_error(struct rrdengine_instance *ctx) {
  338. __atomic_add_fetch(&ctx->stats.io_errors, 1, __ATOMIC_RELAXED);
  339. rrd_stat_atomic_add(&global_io_errors, 1);
  340. }
  341. static inline void ctx_fs_error(struct rrdengine_instance *ctx) {
  342. __atomic_add_fetch(&ctx->stats.fs_errors, 1, __ATOMIC_RELAXED);
  343. rrd_stat_atomic_add(&global_fs_errors, 1);
  344. }
  345. #define ctx_last_fileno_get(ctx) __atomic_load_n(&(ctx)->atomic.last_fileno, __ATOMIC_RELAXED)
  346. #define ctx_last_fileno_increment(ctx) __atomic_add_fetch(&(ctx)->atomic.last_fileno, 1, __ATOMIC_RELAXED)
  347. #define ctx_last_flush_fileno_get(ctx) __atomic_load_n(&(ctx)->atomic.last_flush_fileno, __ATOMIC_RELAXED)
  348. static inline void ctx_last_flush_fileno_set(struct rrdengine_instance *ctx, unsigned fileno) {
  349. unsigned old_fileno = ctx_last_flush_fileno_get(ctx);
  350. do {
  351. if(old_fileno >= fileno)
  352. return;
  353. } while(!__atomic_compare_exchange_n(&ctx->atomic.last_flush_fileno, &old_fileno, fileno, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
  354. }
  355. #define ctx_is_available_for_queries(ctx) (__atomic_load_n(&(ctx)->quiesce.enabled, __ATOMIC_RELAXED) == false && __atomic_load_n(&(ctx)->quiesce.exit_mode, __ATOMIC_RELAXED) == false)
  356. void *dbengine_page_alloc(size_t size);
  357. void dbengine_page_free(void *page, size_t size);
  358. void *dbengine_extent_alloc(size_t size);
  359. void dbengine_extent_free(void *extent, size_t size);
  360. bool rrdeng_ctx_exceeded_disk_quota(struct rrdengine_instance *ctx);
  361. int init_rrd_files(struct rrdengine_instance *ctx);
  362. void finalize_rrd_files(struct rrdengine_instance *ctx);
  363. bool rrdeng_dbengine_spawn(struct rrdengine_instance *ctx);
  364. void dbengine_event_loop(void *arg);
  365. typedef void (*enqueue_callback_t)(struct rrdeng_cmd *cmd);
  366. typedef void (*dequeue_callback_t)(struct rrdeng_cmd *cmd);
  367. void rrdeng_enqueue_epdl_cmd(struct rrdeng_cmd *cmd);
  368. void rrdeng_dequeue_epdl_cmd(struct rrdeng_cmd *cmd);
  369. typedef struct rrdeng_cmd *(*requeue_callback_t)(void *data);
  370. void rrdeng_req_cmd(requeue_callback_t get_cmd_cb, void *data, STORAGE_PRIORITY priority);
  371. void rrdeng_enq_cmd(struct rrdengine_instance *ctx, enum rrdeng_opcode opcode, void *data,
  372. struct completion *completion, enum storage_priority priority,
  373. enqueue_callback_t enqueue_cb, dequeue_callback_t dequeue_cb);
  374. void pdc_route_asynchronously(struct rrdengine_instance *ctx, struct page_details_control *pdc);
  375. void pdc_route_synchronously(struct rrdengine_instance *ctx, struct page_details_control *pdc);
  376. void pdc_acquire(PDC *pdc);
  377. bool pdc_release_and_destroy_if_unreferenced(PDC *pdc, bool worker, bool router);
  378. unsigned rrdeng_target_data_file_size(struct rrdengine_instance *ctx);
  379. struct page_descr_with_data *page_descriptor_get(void);
  380. typedef struct validated_page_descriptor {
  381. time_t start_time_s;
  382. time_t end_time_s;
  383. time_t update_every_s;
  384. size_t page_length;
  385. size_t point_size;
  386. size_t entries;
  387. uint8_t type;
  388. bool is_valid;
  389. } VALIDATED_PAGE_DESCRIPTOR;
  390. #define DBENGINE_EMPTY_PAGE (void *)(-1)
  391. #define page_entries_by_time(start_time_s, end_time_s, update_every_s) \
  392. ((update_every_s) ? (((end_time_s) - ((start_time_s) - (update_every_s))) / (update_every_s)) : 1)
  393. #define page_entries_by_size(page_length_in_bytes, point_size_in_bytes) \
  394. ((page_length_in_bytes) / (point_size_in_bytes))
  395. VALIDATED_PAGE_DESCRIPTOR validate_page(uuid_t *uuid,
  396. time_t start_time_s,
  397. time_t end_time_s,
  398. time_t update_every_s,
  399. size_t page_length,
  400. uint8_t page_type,
  401. size_t entries,
  402. time_t now_s,
  403. time_t overwrite_zero_update_every_s,
  404. bool have_read_error,
  405. const char *msg,
  406. RRDENG_COLLECT_PAGE_FLAGS flags);
  407. VALIDATED_PAGE_DESCRIPTOR validate_extent_page_descr(const struct rrdeng_extent_page_descr *descr, time_t now_s, time_t overwrite_zero_update_every_s, bool have_read_error);
  408. void collect_page_flags_to_buffer(BUFFER *wb, RRDENG_COLLECT_PAGE_FLAGS flags);
  409. typedef enum {
  410. PAGE_IS_IN_THE_PAST = -1,
  411. PAGE_IS_IN_RANGE = 0,
  412. PAGE_IS_IN_THE_FUTURE = 1,
  413. } TIME_RANGE_COMPARE;
  414. TIME_RANGE_COMPARE is_page_in_time_range(time_t page_first_time_s, time_t page_last_time_s, time_t wanted_start_time_s, time_t wanted_end_time_s);
  415. static inline time_t max_acceptable_collected_time(void) {
  416. return now_realtime_sec() + 1;
  417. }
  418. void datafile_delete(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile, bool update_retention, bool worker);
  419. static inline int journal_metric_uuid_compare(const void *key, const void *metric) {
  420. return uuid_memcmp((uuid_t *)key, &(((struct journal_metric_list *) metric)->uuid));
  421. }
  422. #endif /* NETDATA_RRDENGINE_H */