cache.c 99 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "cache.h"
  3. /* STATES AND TRANSITIONS
  4. *
  5. * entry | entry
  6. * v v
  7. * HOT -> DIRTY --> CLEAN --> EVICT
  8. * v | v
  9. * flush | evict
  10. * v | v
  11. * save | free
  12. * callback | callback
  13. *
  14. */
  15. typedef int32_t REFCOUNT;
  16. #define REFCOUNT_DELETING (-100)
  17. // to use ARAL uncomment the following line:
  18. #define PGC_WITH_ARAL 1
  19. typedef enum __attribute__ ((__packed__)) {
  20. // mutually exclusive flags
  21. PGC_PAGE_CLEAN = (1 << 0), // none of the following
  22. PGC_PAGE_DIRTY = (1 << 1), // contains unsaved data
  23. PGC_PAGE_HOT = (1 << 2), // currently being collected
  24. // flags related to various actions on each page
  25. PGC_PAGE_IS_BEING_DELETED = (1 << 3),
  26. PGC_PAGE_IS_BEING_MIGRATED_TO_V2 = (1 << 4),
  27. PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES = (1 << 5),
  28. PGC_PAGE_HAS_BEEN_ACCESSED = (1 << 6),
  29. } PGC_PAGE_FLAGS;
  30. #define page_flag_check(page, flag) (__atomic_load_n(&((page)->flags), __ATOMIC_ACQUIRE) & (flag))
  31. #define page_flag_set(page, flag) __atomic_or_fetch(&((page)->flags), flag, __ATOMIC_RELEASE)
  32. #define page_flag_clear(page, flag) __atomic_and_fetch(&((page)->flags), ~(flag), __ATOMIC_RELEASE)
  33. #define page_get_status_flags(page) page_flag_check(page, PGC_PAGE_HOT | PGC_PAGE_DIRTY | PGC_PAGE_CLEAN)
  34. #define is_page_hot(page) (page_get_status_flags(page) == PGC_PAGE_HOT)
  35. #define is_page_dirty(page) (page_get_status_flags(page) == PGC_PAGE_DIRTY)
  36. #define is_page_clean(page) (page_get_status_flags(page) == PGC_PAGE_CLEAN)
  37. struct pgc_page {
  38. // indexing data
  39. Word_t section;
  40. Word_t metric_id;
  41. time_t start_time_s;
  42. time_t end_time_s;
  43. uint32_t update_every_s;
  44. uint32_t assumed_size;
  45. REFCOUNT refcount;
  46. uint16_t accesses; // counts the number of accesses on this page
  47. PGC_PAGE_FLAGS flags;
  48. SPINLOCK transition_spinlock; // when the page changes between HOT, DIRTY, CLEAN, we have to get this lock
  49. struct {
  50. struct pgc_page *next;
  51. struct pgc_page *prev;
  52. } link;
  53. void *data;
  54. uint8_t custom_data[];
  55. // IMPORTANT!
  56. // THIS STRUCTURE NEEDS TO BE INITIALIZED BY HAND!
  57. };
  58. struct pgc_linked_list {
  59. SPINLOCK spinlock;
  60. union {
  61. PGC_PAGE *base;
  62. Pvoid_t sections_judy;
  63. };
  64. PGC_PAGE_FLAGS flags;
  65. size_t version;
  66. size_t last_version_checked;
  67. bool linked_list_in_sections_judy; // when true, we use 'sections_judy', otherwise we use 'base'
  68. struct pgc_queue_statistics *stats;
  69. };
  70. struct pgc {
  71. struct {
  72. char name[PGC_NAME_MAX + 1];
  73. size_t partitions;
  74. size_t clean_size;
  75. size_t max_dirty_pages_per_call;
  76. size_t max_pages_per_inline_eviction;
  77. size_t max_skip_pages_per_inline_eviction;
  78. size_t max_flushes_inline;
  79. size_t max_workers_evict_inline;
  80. size_t additional_bytes_per_page;
  81. free_clean_page_callback pgc_free_clean_cb;
  82. save_dirty_page_callback pgc_save_dirty_cb;
  83. save_dirty_init_callback pgc_save_init_cb;
  84. PGC_OPTIONS options;
  85. size_t severe_pressure_per1000;
  86. size_t aggressive_evict_per1000;
  87. size_t healthy_size_per1000;
  88. size_t evict_low_threshold_per1000;
  89. dynamic_target_cache_size_callback dynamic_target_size_cb;
  90. } config;
  91. #ifdef PGC_WITH_ARAL
  92. ARAL **aral;
  93. #endif
  94. PGC_CACHE_LINE_PADDING(0);
  95. struct pgc_index {
  96. RW_SPINLOCK rw_spinlock;
  97. Pvoid_t sections_judy;
  98. PGC_CACHE_LINE_PADDING(0);
  99. } *index;
  100. PGC_CACHE_LINE_PADDING(1);
  101. struct {
  102. SPINLOCK spinlock;
  103. size_t per1000;
  104. } usage;
  105. PGC_CACHE_LINE_PADDING(2);
  106. struct pgc_linked_list clean; // LRU is applied here to free memory from the cache
  107. PGC_CACHE_LINE_PADDING(3);
  108. struct pgc_linked_list dirty; // in the dirty list, pages are ordered the way they were marked dirty
  109. PGC_CACHE_LINE_PADDING(4);
  110. struct pgc_linked_list hot; // in the hot list, pages are order the way they were marked hot
  111. PGC_CACHE_LINE_PADDING(5);
  112. struct pgc_statistics stats; // statistics
  113. #ifdef NETDATA_PGC_POINTER_CHECK
  114. PGC_CACHE_LINE_PADDING(6);
  115. netdata_mutex_t global_pointer_registry_mutex;
  116. Pvoid_t global_pointer_registry;
  117. #endif
  118. };
  119. // ----------------------------------------------------------------------------
  120. // validate each pointer is indexed once - internal checks only
  121. static inline void pointer_index_init(PGC *cache __maybe_unused) {
  122. #ifdef NETDATA_PGC_POINTER_CHECK
  123. netdata_mutex_init(&cache->global_pointer_registry_mutex);
  124. #else
  125. ;
  126. #endif
  127. }
  128. static inline void pointer_destroy_index(PGC *cache __maybe_unused) {
  129. #ifdef NETDATA_PGC_POINTER_CHECK
  130. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  131. JudyHSFreeArray(&cache->global_pointer_registry, PJE0);
  132. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  133. #else
  134. ;
  135. #endif
  136. }
  137. static inline void pointer_add(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
  138. #ifdef NETDATA_PGC_POINTER_CHECK
  139. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  140. Pvoid_t *PValue = JudyHSIns(&cache->global_pointer_registry, &page, sizeof(void *), PJE0);
  141. if(*PValue != NULL)
  142. fatal("pointer already exists in registry");
  143. *PValue = page;
  144. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  145. #else
  146. ;
  147. #endif
  148. }
  149. static inline void pointer_check(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
  150. #ifdef NETDATA_PGC_POINTER_CHECK
  151. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  152. Pvoid_t *PValue = JudyHSGet(cache->global_pointer_registry, &page, sizeof(void *));
  153. if(PValue == NULL)
  154. fatal("pointer is not found in registry");
  155. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  156. #else
  157. ;
  158. #endif
  159. }
  160. static inline void pointer_del(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
  161. #ifdef NETDATA_PGC_POINTER_CHECK
  162. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  163. int ret = JudyHSDel(&cache->global_pointer_registry, &page, sizeof(void *), PJE0);
  164. if(!ret)
  165. fatal("pointer to be deleted does not exist in registry");
  166. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  167. #else
  168. ;
  169. #endif
  170. }
  171. // ----------------------------------------------------------------------------
  172. // locking
  173. static inline size_t pgc_indexing_partition(PGC *cache, Word_t metric_id) {
  174. static __thread Word_t last_metric_id = 0;
  175. static __thread size_t last_partition = 0;
  176. if(metric_id == last_metric_id || cache->config.partitions == 1)
  177. return last_partition;
  178. last_metric_id = metric_id;
  179. last_partition = indexing_partition(metric_id, cache->config.partitions);
  180. return last_partition;
  181. }
  182. static inline void pgc_index_read_lock(PGC *cache, size_t partition) {
  183. rw_spinlock_read_lock(&cache->index[partition].rw_spinlock);
  184. }
  185. static inline void pgc_index_read_unlock(PGC *cache, size_t partition) {
  186. rw_spinlock_read_unlock(&cache->index[partition].rw_spinlock);
  187. }
  188. static inline void pgc_index_write_lock(PGC *cache, size_t partition) {
  189. rw_spinlock_write_lock(&cache->index[partition].rw_spinlock);
  190. }
  191. static inline void pgc_index_write_unlock(PGC *cache, size_t partition) {
  192. rw_spinlock_write_unlock(&cache->index[partition].rw_spinlock);
  193. }
  194. static inline bool pgc_ll_trylock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
  195. return spinlock_trylock(&ll->spinlock);
  196. }
  197. static inline void pgc_ll_lock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
  198. spinlock_lock(&ll->spinlock);
  199. }
  200. static inline void pgc_ll_unlock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
  201. spinlock_unlock(&ll->spinlock);
  202. }
  203. static inline bool page_transition_trylock(PGC *cache __maybe_unused, PGC_PAGE *page) {
  204. return spinlock_trylock(&page->transition_spinlock);
  205. }
  206. static inline void page_transition_lock(PGC *cache __maybe_unused, PGC_PAGE *page) {
  207. spinlock_lock(&page->transition_spinlock);
  208. }
  209. static inline void page_transition_unlock(PGC *cache __maybe_unused, PGC_PAGE *page) {
  210. spinlock_unlock(&page->transition_spinlock);
  211. }
  212. // ----------------------------------------------------------------------------
  213. // evictions control
  214. static inline size_t cache_usage_per1000(PGC *cache, size_t *size_to_evict) {
  215. if(size_to_evict)
  216. spinlock_lock(&cache->usage.spinlock);
  217. else if(!spinlock_trylock(&cache->usage.spinlock))
  218. return __atomic_load_n(&cache->usage.per1000, __ATOMIC_RELAXED);
  219. size_t current_cache_size;
  220. size_t wanted_cache_size;
  221. size_t per1000;
  222. size_t dirty = __atomic_load_n(&cache->dirty.stats->size, __ATOMIC_RELAXED);
  223. size_t hot = __atomic_load_n(&cache->hot.stats->size, __ATOMIC_RELAXED);
  224. if(cache->config.options & PGC_OPTIONS_AUTOSCALE) {
  225. size_t dirty_max = __atomic_load_n(&cache->dirty.stats->max_size, __ATOMIC_RELAXED);
  226. size_t hot_max = __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED);
  227. // our promise to users
  228. size_t max_size1 = MAX(hot_max, hot) * 2;
  229. // protection against slow flushing
  230. size_t max_size2 = hot_max + ((dirty_max < hot_max / 2) ? hot_max / 2 : dirty_max * 2);
  231. // the final wanted cache size
  232. wanted_cache_size = MIN(max_size1, max_size2);
  233. if(cache->config.dynamic_target_size_cb) {
  234. size_t wanted_cache_size_cb = cache->config.dynamic_target_size_cb();
  235. if(wanted_cache_size_cb > wanted_cache_size)
  236. wanted_cache_size = wanted_cache_size_cb;
  237. }
  238. if (wanted_cache_size < hot + dirty + cache->config.clean_size)
  239. wanted_cache_size = hot + dirty + cache->config.clean_size;
  240. }
  241. else
  242. wanted_cache_size = hot + dirty + cache->config.clean_size;
  243. // protection again huge queries
  244. // if huge queries are running, or huge amounts need to be saved
  245. // allow the cache to grow more (hot pages in main cache are also referenced)
  246. size_t referenced_size = __atomic_load_n(&cache->stats.referenced_size, __ATOMIC_RELAXED);
  247. if(unlikely(wanted_cache_size < referenced_size * 2 / 3))
  248. wanted_cache_size = referenced_size * 2 / 3;
  249. current_cache_size = __atomic_load_n(&cache->stats.size, __ATOMIC_RELAXED); // + pgc_aral_overhead();
  250. per1000 = (size_t)((unsigned long long)current_cache_size * 1000ULL / (unsigned long long)wanted_cache_size);
  251. __atomic_store_n(&cache->usage.per1000, per1000, __ATOMIC_RELAXED);
  252. __atomic_store_n(&cache->stats.wanted_cache_size, wanted_cache_size, __ATOMIC_RELAXED);
  253. __atomic_store_n(&cache->stats.current_cache_size, current_cache_size, __ATOMIC_RELAXED);
  254. spinlock_unlock(&cache->usage.spinlock);
  255. if(size_to_evict) {
  256. size_t target = (size_t)((unsigned long long)wanted_cache_size * (unsigned long long)cache->config.evict_low_threshold_per1000 / 1000ULL);
  257. if(current_cache_size > target)
  258. *size_to_evict = current_cache_size - target;
  259. else
  260. *size_to_evict = 0;
  261. }
  262. if(per1000 >= cache->config.severe_pressure_per1000)
  263. __atomic_add_fetch(&cache->stats.events_cache_under_severe_pressure, 1, __ATOMIC_RELAXED);
  264. else if(per1000 >= cache->config.aggressive_evict_per1000)
  265. __atomic_add_fetch(&cache->stats.events_cache_needs_space_aggressively, 1, __ATOMIC_RELAXED);
  266. return per1000;
  267. }
  268. static inline bool cache_pressure(PGC *cache, size_t limit) {
  269. return (cache_usage_per1000(cache, NULL) >= limit);
  270. }
  271. #define cache_under_severe_pressure(cache) cache_pressure(cache, (cache)->config.severe_pressure_per1000)
  272. #define cache_needs_space_aggressively(cache) cache_pressure(cache, (cache)->config.aggressive_evict_per1000)
  273. #define cache_above_healthy_limit(cache) cache_pressure(cache, (cache)->config.healthy_size_per1000)
  274. typedef bool (*evict_filter)(PGC_PAGE *page, void *data);
  275. static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evict, bool wait, bool all_of_them, evict_filter filter, void *data);
  276. #define evict_pages(cache, max_skip, max_evict, wait, all_of_them) evict_pages_with_filter(cache, max_skip, max_evict, wait, all_of_them, NULL, NULL)
  277. static inline void evict_on_clean_page_added(PGC *cache __maybe_unused) {
  278. if((cache->config.options & PGC_OPTIONS_EVICT_PAGES_INLINE) || cache_needs_space_aggressively(cache)) {
  279. evict_pages(cache,
  280. cache->config.max_skip_pages_per_inline_eviction,
  281. cache->config.max_pages_per_inline_eviction,
  282. false, false);
  283. }
  284. }
  285. static inline void evict_on_page_release_when_permitted(PGC *cache __maybe_unused) {
  286. if ((cache->config.options & PGC_OPTIONS_EVICT_PAGES_INLINE) || cache_under_severe_pressure(cache)) {
  287. evict_pages(cache,
  288. cache->config.max_skip_pages_per_inline_eviction,
  289. cache->config.max_pages_per_inline_eviction,
  290. false, false);
  291. }
  292. }
  293. // ----------------------------------------------------------------------------
  294. // flushing control
  295. static bool flush_pages(PGC *cache, size_t max_flushes, Word_t section, bool wait, bool all_of_them);
  296. static inline bool flushing_critical(PGC *cache) {
  297. if(unlikely(__atomic_load_n(&cache->dirty.stats->size, __ATOMIC_RELAXED) > __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED))) {
  298. __atomic_add_fetch(&cache->stats.events_flush_critical, 1, __ATOMIC_RELAXED);
  299. return true;
  300. }
  301. return false;
  302. }
  303. // ----------------------------------------------------------------------------
  304. // helpers
  305. static inline size_t page_assumed_size(PGC *cache, size_t size) {
  306. return size + (sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page + sizeof(Word_t) * 3);
  307. }
  308. static inline size_t page_size_from_assumed_size(PGC *cache, size_t assumed_size) {
  309. return assumed_size - (sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page + sizeof(Word_t) * 3);
  310. }
  311. // ----------------------------------------------------------------------------
  312. // Linked list management
  313. static inline void atomic_set_max(size_t *max, size_t desired) {
  314. size_t expected;
  315. expected = __atomic_load_n(max, __ATOMIC_RELAXED);
  316. do {
  317. if(expected >= desired)
  318. return;
  319. } while(!__atomic_compare_exchange_n(max, &expected, desired,
  320. false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
  321. }
  322. struct section_pages {
  323. SPINLOCK migration_to_v2_spinlock;
  324. size_t entries;
  325. size_t size;
  326. PGC_PAGE *base;
  327. };
  328. static ARAL *pgc_section_pages_aral = NULL;
  329. static void pgc_section_pages_static_aral_init(void) {
  330. static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER;
  331. if(unlikely(!pgc_section_pages_aral)) {
  332. spinlock_lock(&spinlock);
  333. // we have to check again
  334. if(!pgc_section_pages_aral)
  335. pgc_section_pages_aral = aral_create(
  336. "pgc_section",
  337. sizeof(struct section_pages),
  338. 0,
  339. 65536, NULL,
  340. NULL, NULL, false, false);
  341. spinlock_unlock(&spinlock);
  342. }
  343. }
  344. static inline void pgc_stats_ll_judy_change(PGC *cache, struct pgc_linked_list *ll, size_t mem_before_judyl, size_t mem_after_judyl) {
  345. if(mem_after_judyl > mem_before_judyl) {
  346. __atomic_add_fetch(&ll->stats->size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
  347. __atomic_add_fetch(&cache->stats.size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
  348. }
  349. else if(mem_after_judyl < mem_before_judyl) {
  350. __atomic_sub_fetch(&ll->stats->size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
  351. __atomic_sub_fetch(&cache->stats.size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
  352. }
  353. }
  354. static inline void pgc_stats_index_judy_change(PGC *cache, size_t mem_before_judyl, size_t mem_after_judyl) {
  355. if(mem_after_judyl > mem_before_judyl) {
  356. __atomic_add_fetch(&cache->stats.size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
  357. }
  358. else if(mem_after_judyl < mem_before_judyl) {
  359. __atomic_sub_fetch(&cache->stats.size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
  360. }
  361. }
  362. static void pgc_ll_add(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PGC_PAGE *page, bool having_lock) {
  363. if(!having_lock)
  364. pgc_ll_lock(cache, ll);
  365. internal_fatal(page_get_status_flags(page) != 0,
  366. "DBENGINE CACHE: invalid page flags, the page has %d, but it is should be %d",
  367. page_get_status_flags(page),
  368. 0);
  369. if(ll->linked_list_in_sections_judy) {
  370. size_t mem_before_judyl, mem_after_judyl;
  371. mem_before_judyl = JudyLMemUsed(ll->sections_judy);
  372. Pvoid_t *section_pages_pptr = JudyLIns(&ll->sections_judy, page->section, PJE0);
  373. mem_after_judyl = JudyLMemUsed(ll->sections_judy);
  374. struct section_pages *sp = *section_pages_pptr;
  375. if(!sp) {
  376. // sp = callocz(1, sizeof(struct section_pages));
  377. sp = aral_mallocz(pgc_section_pages_aral);
  378. memset(sp, 0, sizeof(struct section_pages));
  379. *section_pages_pptr = sp;
  380. mem_after_judyl += sizeof(struct section_pages);
  381. }
  382. pgc_stats_ll_judy_change(cache, ll, mem_before_judyl, mem_after_judyl);
  383. sp->entries++;
  384. sp->size += page->assumed_size;
  385. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(sp->base, page, link.prev, link.next);
  386. if((sp->entries % cache->config.max_dirty_pages_per_call) == 0)
  387. ll->version++;
  388. }
  389. else {
  390. // CLEAN pages end up here.
  391. // - New pages created as CLEAN, always have 1 access.
  392. // - DIRTY pages made CLEAN, depending on their accesses may be appended (accesses > 0) or prepended (accesses = 0).
  393. if(page->accesses || page_flag_check(page, PGC_PAGE_HAS_BEEN_ACCESSED | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES) == PGC_PAGE_HAS_BEEN_ACCESSED) {
  394. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
  395. page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  396. }
  397. else
  398. DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
  399. ll->version++;
  400. }
  401. page_flag_set(page, ll->flags);
  402. if(!having_lock)
  403. pgc_ll_unlock(cache, ll);
  404. size_t entries = __atomic_add_fetch(&ll->stats->entries, 1, __ATOMIC_RELAXED);
  405. size_t size = __atomic_add_fetch(&ll->stats->size, page->assumed_size, __ATOMIC_RELAXED);
  406. __atomic_add_fetch(&ll->stats->added_entries, 1, __ATOMIC_RELAXED);
  407. __atomic_add_fetch(&ll->stats->added_size, page->assumed_size, __ATOMIC_RELAXED);
  408. atomic_set_max(&ll->stats->max_entries, entries);
  409. atomic_set_max(&ll->stats->max_size, size);
  410. }
  411. static void pgc_ll_del(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PGC_PAGE *page, bool having_lock) {
  412. __atomic_sub_fetch(&ll->stats->entries, 1, __ATOMIC_RELAXED);
  413. __atomic_sub_fetch(&ll->stats->size, page->assumed_size, __ATOMIC_RELAXED);
  414. __atomic_add_fetch(&ll->stats->removed_entries, 1, __ATOMIC_RELAXED);
  415. __atomic_add_fetch(&ll->stats->removed_size, page->assumed_size, __ATOMIC_RELAXED);
  416. if(!having_lock)
  417. pgc_ll_lock(cache, ll);
  418. internal_fatal(page_get_status_flags(page) != ll->flags,
  419. "DBENGINE CACHE: invalid page flags, the page has %d, but it is should be %d",
  420. page_get_status_flags(page),
  421. ll->flags);
  422. page_flag_clear(page, ll->flags);
  423. if(ll->linked_list_in_sections_judy) {
  424. Pvoid_t *section_pages_pptr = JudyLGet(ll->sections_judy, page->section, PJE0);
  425. internal_fatal(!section_pages_pptr, "DBENGINE CACHE: page should be in Judy LL, but it is not");
  426. struct section_pages *sp = *section_pages_pptr;
  427. sp->entries--;
  428. sp->size -= page->assumed_size;
  429. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(sp->base, page, link.prev, link.next);
  430. if(!sp->base) {
  431. size_t mem_before_judyl, mem_after_judyl;
  432. mem_before_judyl = JudyLMemUsed(ll->sections_judy);
  433. int rc = JudyLDel(&ll->sections_judy, page->section, PJE0);
  434. mem_after_judyl = JudyLMemUsed(ll->sections_judy);
  435. if(!rc)
  436. fatal("DBENGINE CACHE: cannot delete section from Judy LL");
  437. // freez(sp);
  438. aral_freez(pgc_section_pages_aral, sp);
  439. mem_after_judyl -= sizeof(struct section_pages);
  440. pgc_stats_ll_judy_change(cache, ll, mem_before_judyl, mem_after_judyl);
  441. }
  442. }
  443. else {
  444. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
  445. ll->version++;
  446. }
  447. if(!having_lock)
  448. pgc_ll_unlock(cache, ll);
  449. }
  450. static inline void page_has_been_accessed(PGC *cache, PGC_PAGE *page) {
  451. PGC_PAGE_FLAGS flags = page_flag_check(page, PGC_PAGE_CLEAN | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES);
  452. if (!(flags & PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES)) {
  453. __atomic_add_fetch(&page->accesses, 1, __ATOMIC_RELAXED);
  454. if (flags & PGC_PAGE_CLEAN) {
  455. if(pgc_ll_trylock(cache, &cache->clean)) {
  456. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  457. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  458. pgc_ll_unlock(cache, &cache->clean);
  459. page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  460. }
  461. else
  462. page_flag_set(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  463. }
  464. }
  465. }
  466. // ----------------------------------------------------------------------------
  467. // state transitions
  468. static inline void page_set_clean(PGC *cache, PGC_PAGE *page, bool having_transition_lock, bool having_clean_lock) {
  469. if(!having_transition_lock)
  470. page_transition_lock(cache, page);
  471. PGC_PAGE_FLAGS flags = page_get_status_flags(page);
  472. if(flags & PGC_PAGE_CLEAN) {
  473. if(!having_transition_lock)
  474. page_transition_unlock(cache, page);
  475. return;
  476. }
  477. if(flags & PGC_PAGE_HOT)
  478. pgc_ll_del(cache, &cache->hot, page, false);
  479. if(flags & PGC_PAGE_DIRTY)
  480. pgc_ll_del(cache, &cache->dirty, page, false);
  481. // first add to linked list, the set the flag (required for move_page_last())
  482. pgc_ll_add(cache, &cache->clean, page, having_clean_lock);
  483. if(!having_transition_lock)
  484. page_transition_unlock(cache, page);
  485. }
  486. static inline void page_set_dirty(PGC *cache, PGC_PAGE *page, bool having_hot_lock) {
  487. if(!having_hot_lock)
  488. // to avoid deadlocks, we have to get the hot lock before the page transition
  489. // since this is what all_hot_to_dirty() does
  490. pgc_ll_lock(cache, &cache->hot);
  491. page_transition_lock(cache, page);
  492. PGC_PAGE_FLAGS flags = page_get_status_flags(page);
  493. if(flags & PGC_PAGE_DIRTY) {
  494. page_transition_unlock(cache, page);
  495. if(!having_hot_lock)
  496. // we don't need the hot lock anymore
  497. pgc_ll_unlock(cache, &cache->hot);
  498. return;
  499. }
  500. __atomic_add_fetch(&cache->stats.hot2dirty_entries, 1, __ATOMIC_RELAXED);
  501. __atomic_add_fetch(&cache->stats.hot2dirty_size, page->assumed_size, __ATOMIC_RELAXED);
  502. if(likely(flags & PGC_PAGE_HOT))
  503. pgc_ll_del(cache, &cache->hot, page, true);
  504. if(!having_hot_lock)
  505. // we don't need the hot lock anymore
  506. pgc_ll_unlock(cache, &cache->hot);
  507. if(unlikely(flags & PGC_PAGE_CLEAN))
  508. pgc_ll_del(cache, &cache->clean, page, false);
  509. // first add to linked list, the set the flag (required for move_page_last())
  510. pgc_ll_add(cache, &cache->dirty, page, false);
  511. __atomic_sub_fetch(&cache->stats.hot2dirty_entries, 1, __ATOMIC_RELAXED);
  512. __atomic_sub_fetch(&cache->stats.hot2dirty_size, page->assumed_size, __ATOMIC_RELAXED);
  513. page_transition_unlock(cache, page);
  514. }
  515. static inline void page_set_hot(PGC *cache, PGC_PAGE *page) {
  516. page_transition_lock(cache, page);
  517. PGC_PAGE_FLAGS flags = page_get_status_flags(page);
  518. if(flags & PGC_PAGE_HOT) {
  519. page_transition_unlock(cache, page);
  520. return;
  521. }
  522. if(flags & PGC_PAGE_DIRTY)
  523. pgc_ll_del(cache, &cache->dirty, page, false);
  524. if(flags & PGC_PAGE_CLEAN)
  525. pgc_ll_del(cache, &cache->clean, page, false);
  526. // first add to linked list, the set the flag (required for move_page_last())
  527. pgc_ll_add(cache, &cache->hot, page, false);
  528. page_transition_unlock(cache, page);
  529. }
  530. // ----------------------------------------------------------------------------
  531. // Referencing
  532. static inline size_t PGC_REFERENCED_PAGES(PGC *cache) {
  533. return __atomic_load_n(&cache->stats.referenced_entries, __ATOMIC_RELAXED);
  534. }
  535. static inline void PGC_REFERENCED_PAGES_PLUS1(PGC *cache, PGC_PAGE *page) {
  536. __atomic_add_fetch(&cache->stats.referenced_entries, 1, __ATOMIC_RELAXED);
  537. __atomic_add_fetch(&cache->stats.referenced_size, page->assumed_size, __ATOMIC_RELAXED);
  538. }
  539. static inline void PGC_REFERENCED_PAGES_MINUS1(PGC *cache, size_t assumed_size) {
  540. __atomic_sub_fetch(&cache->stats.referenced_entries, 1, __ATOMIC_RELAXED);
  541. __atomic_sub_fetch(&cache->stats.referenced_size, assumed_size, __ATOMIC_RELAXED);
  542. }
  543. // If the page is not already acquired,
  544. // YOU HAVE TO HAVE THE QUEUE (hot, dirty, clean) THE PAGE IS IN, L O C K E D !
  545. // If you don't have it locked, NOTHING PREVENTS THIS PAGE FOR VANISHING WHILE THIS IS CALLED!
  546. static inline bool page_acquire(PGC *cache, PGC_PAGE *page) {
  547. __atomic_add_fetch(&cache->stats.acquires, 1, __ATOMIC_RELAXED);
  548. REFCOUNT expected, desired;
  549. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  550. size_t spins = 0;
  551. do {
  552. spins++;
  553. if(unlikely(expected < 0))
  554. return false;
  555. desired = expected + 1;
  556. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED));
  557. if(unlikely(spins > 1))
  558. __atomic_add_fetch(&cache->stats.acquire_spins, spins - 1, __ATOMIC_RELAXED);
  559. if(desired == 1)
  560. PGC_REFERENCED_PAGES_PLUS1(cache, page);
  561. return true;
  562. }
  563. static inline void page_release(PGC *cache, PGC_PAGE *page, bool evict_if_necessary) {
  564. __atomic_add_fetch(&cache->stats.releases, 1, __ATOMIC_RELAXED);
  565. size_t assumed_size = page->assumed_size; // take the size before we release it
  566. REFCOUNT expected, desired;
  567. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  568. size_t spins = 0;
  569. do {
  570. spins++;
  571. internal_fatal(expected <= 0,
  572. "DBENGINE CACHE: trying to release a page with reference counter %d", expected);
  573. desired = expected - 1;
  574. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
  575. if(unlikely(spins > 1))
  576. __atomic_add_fetch(&cache->stats.release_spins, spins - 1, __ATOMIC_RELAXED);
  577. if(desired == 0) {
  578. PGC_REFERENCED_PAGES_MINUS1(cache, assumed_size);
  579. if(evict_if_necessary)
  580. evict_on_page_release_when_permitted(cache);
  581. }
  582. }
  583. static inline bool non_acquired_page_get_for_deletion___while_having_clean_locked(PGC *cache __maybe_unused, PGC_PAGE *page) {
  584. __atomic_add_fetch(&cache->stats.acquires_for_deletion, 1, __ATOMIC_RELAXED);
  585. internal_fatal(!is_page_clean(page),
  586. "DBENGINE CACHE: only clean pages can be deleted");
  587. REFCOUNT expected, desired;
  588. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  589. size_t spins = 0;
  590. bool delete_it;
  591. do {
  592. spins++;
  593. if (expected == 0) {
  594. desired = REFCOUNT_DELETING;
  595. delete_it = true;
  596. }
  597. else {
  598. delete_it = false;
  599. break;
  600. }
  601. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
  602. if(delete_it) {
  603. // we can delete this page
  604. internal_fatal(page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
  605. "DBENGINE CACHE: page is already being deleted");
  606. page_flag_set(page, PGC_PAGE_IS_BEING_DELETED);
  607. }
  608. if(unlikely(spins > 1))
  609. __atomic_add_fetch(&cache->stats.delete_spins, spins - 1, __ATOMIC_RELAXED);
  610. return delete_it;
  611. }
  612. static inline bool acquired_page_get_for_deletion_or_release_it(PGC *cache __maybe_unused, PGC_PAGE *page) {
  613. __atomic_add_fetch(&cache->stats.acquires_for_deletion, 1, __ATOMIC_RELAXED);
  614. size_t assumed_size = page->assumed_size; // take the size before we release it
  615. REFCOUNT expected, desired;
  616. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  617. size_t spins = 0;
  618. bool delete_it;
  619. do {
  620. spins++;
  621. internal_fatal(expected < 1,
  622. "DBENGINE CACHE: page to be deleted should be acquired by the caller.");
  623. if (expected == 1) {
  624. // we are the only one having this page referenced
  625. desired = REFCOUNT_DELETING;
  626. delete_it = true;
  627. }
  628. else {
  629. // this page cannot be deleted
  630. desired = expected - 1;
  631. delete_it = false;
  632. }
  633. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
  634. if(delete_it) {
  635. PGC_REFERENCED_PAGES_MINUS1(cache, assumed_size);
  636. // we can delete this page
  637. internal_fatal(page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
  638. "DBENGINE CACHE: page is already being deleted");
  639. page_flag_set(page, PGC_PAGE_IS_BEING_DELETED);
  640. }
  641. if(unlikely(spins > 1))
  642. __atomic_add_fetch(&cache->stats.delete_spins, spins - 1, __ATOMIC_RELAXED);
  643. return delete_it;
  644. }
  645. // ----------------------------------------------------------------------------
  646. // Indexing
  647. static inline void free_this_page(PGC *cache, PGC_PAGE *page, size_t partition __maybe_unused) {
  648. // call the callback to free the user supplied memory
  649. cache->config.pgc_free_clean_cb(cache, (PGC_ENTRY){
  650. .section = page->section,
  651. .metric_id = page->metric_id,
  652. .start_time_s = page->start_time_s,
  653. .end_time_s = __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
  654. .update_every_s = page->update_every_s,
  655. .size = page_size_from_assumed_size(cache, page->assumed_size),
  656. .hot = (is_page_hot(page)) ? true : false,
  657. .data = page->data,
  658. .custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL,
  659. });
  660. // update statistics
  661. __atomic_add_fetch(&cache->stats.removed_entries, 1, __ATOMIC_RELAXED);
  662. __atomic_add_fetch(&cache->stats.removed_size, page->assumed_size, __ATOMIC_RELAXED);
  663. __atomic_sub_fetch(&cache->stats.entries, 1, __ATOMIC_RELAXED);
  664. __atomic_sub_fetch(&cache->stats.size, page->assumed_size, __ATOMIC_RELAXED);
  665. // free our memory
  666. #ifdef PGC_WITH_ARAL
  667. aral_freez(cache->aral[partition], page);
  668. #else
  669. freez(page);
  670. #endif
  671. }
  672. static void remove_this_page_from_index_unsafe(PGC *cache, PGC_PAGE *page, size_t partition) {
  673. // remove it from the Judy arrays
  674. pointer_check(cache, page);
  675. internal_fatal(page_flag_check(page, PGC_PAGE_HOT | PGC_PAGE_DIRTY | PGC_PAGE_CLEAN),
  676. "DBENGINE CACHE: page to be removed from the cache is still in the linked-list");
  677. internal_fatal(!page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
  678. "DBENGINE CACHE: page to be removed from the index, is not marked for deletion");
  679. internal_fatal(partition != pgc_indexing_partition(cache, page->metric_id),
  680. "DBENGINE CACHE: attempted to remove this page from the wrong partition of the cache");
  681. Pvoid_t *metrics_judy_pptr = JudyLGet(cache->index[partition].sections_judy, page->section, PJE0);
  682. if(unlikely(!metrics_judy_pptr))
  683. fatal("DBENGINE CACHE: section '%lu' should exist, but it does not.", page->section);
  684. Pvoid_t *pages_judy_pptr = JudyLGet(*metrics_judy_pptr, page->metric_id, PJE0);
  685. if(unlikely(!pages_judy_pptr))
  686. fatal("DBENGINE CACHE: metric '%lu' in section '%lu' should exist, but it does not.",
  687. page->metric_id, page->section);
  688. Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, page->start_time_s, PJE0);
  689. if(unlikely(!page_ptr))
  690. fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' should exist, but it does not.",
  691. page->start_time_s, page->metric_id, page->section);
  692. PGC_PAGE *found_page = *page_ptr;
  693. if(unlikely(found_page != page))
  694. fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' should exist, but the index returned a different address.",
  695. page->start_time_s, page->metric_id, page->section);
  696. size_t mem_before_judyl = 0, mem_after_judyl = 0;
  697. mem_before_judyl += JudyLMemUsed(*pages_judy_pptr);
  698. if(unlikely(!JudyLDel(pages_judy_pptr, page->start_time_s, PJE0)))
  699. fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' exists, but cannot be deleted.",
  700. page->start_time_s, page->metric_id, page->section);
  701. mem_after_judyl += JudyLMemUsed(*pages_judy_pptr);
  702. mem_before_judyl += JudyLMemUsed(*metrics_judy_pptr);
  703. if(!*pages_judy_pptr && !JudyLDel(metrics_judy_pptr, page->metric_id, PJE0))
  704. fatal("DBENGINE CACHE: metric '%lu' in section '%lu' exists and is empty, but cannot be deleted.",
  705. page->metric_id, page->section);
  706. mem_after_judyl += JudyLMemUsed(*metrics_judy_pptr);
  707. mem_before_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  708. if(!*metrics_judy_pptr && !JudyLDel(&cache->index[partition].sections_judy, page->section, PJE0))
  709. fatal("DBENGINE CACHE: section '%lu' exists and is empty, but cannot be deleted.", page->section);
  710. mem_after_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  711. pgc_stats_index_judy_change(cache, mem_before_judyl, mem_after_judyl);
  712. pointer_del(cache, page);
  713. }
  714. static inline void remove_and_free_page_not_in_any_queue_and_acquired_for_deletion(PGC *cache, PGC_PAGE *page) {
  715. size_t partition = pgc_indexing_partition(cache, page->metric_id);
  716. pgc_index_write_lock(cache, partition);
  717. remove_this_page_from_index_unsafe(cache, page, partition);
  718. pgc_index_write_unlock(cache, partition);
  719. free_this_page(cache, page, partition);
  720. }
  721. static inline bool make_acquired_page_clean_and_evict_or_page_release(PGC *cache, PGC_PAGE *page) {
  722. pointer_check(cache, page);
  723. page_transition_lock(cache, page);
  724. pgc_ll_lock(cache, &cache->clean);
  725. // make it clean - it does not have any accesses, so it will be prepended
  726. page_set_clean(cache, page, true, true);
  727. if(!acquired_page_get_for_deletion_or_release_it(cache, page)) {
  728. pgc_ll_unlock(cache, &cache->clean);
  729. page_transition_unlock(cache, page);
  730. return false;
  731. }
  732. // remove it from the linked list
  733. pgc_ll_del(cache, &cache->clean, page, true);
  734. pgc_ll_unlock(cache, &cache->clean);
  735. page_transition_unlock(cache, page);
  736. remove_and_free_page_not_in_any_queue_and_acquired_for_deletion(cache, page);
  737. return true;
  738. }
  739. // returns true, when there is more work to do
  740. static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evict, bool wait, bool all_of_them, evict_filter filter, void *data) {
  741. size_t per1000 = cache_usage_per1000(cache, NULL);
  742. if(!all_of_them && per1000 < cache->config.healthy_size_per1000)
  743. // don't bother - not enough to do anything
  744. return false;
  745. size_t workers_running = __atomic_add_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
  746. if(!wait && !all_of_them && workers_running > cache->config.max_workers_evict_inline && per1000 < cache->config.severe_pressure_per1000) {
  747. __atomic_sub_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
  748. return false;
  749. }
  750. internal_fatal(cache->clean.linked_list_in_sections_judy,
  751. "wrong clean pages configuration - clean pages need to have a linked list, not a judy array");
  752. if(unlikely(!max_skip))
  753. max_skip = SIZE_MAX;
  754. else if(unlikely(max_skip < 2))
  755. max_skip = 2;
  756. if(unlikely(!max_evict))
  757. max_evict = SIZE_MAX;
  758. else if(unlikely(max_evict < 2))
  759. max_evict = 2;
  760. size_t total_pages_evicted = 0;
  761. size_t total_pages_skipped = 0;
  762. bool stopped_before_finishing = false;
  763. size_t spins = 0;
  764. do {
  765. if(++spins > 1)
  766. __atomic_add_fetch(&cache->stats.evict_spins, 1, __ATOMIC_RELAXED);
  767. bool batch;
  768. size_t max_size_to_evict = 0;
  769. if (unlikely(all_of_them)) {
  770. max_size_to_evict = SIZE_MAX;
  771. batch = true;
  772. }
  773. else if(unlikely(wait)) {
  774. per1000 = cache_usage_per1000(cache, &max_size_to_evict);
  775. batch = (wait && per1000 > cache->config.severe_pressure_per1000) ? true : false;
  776. }
  777. else {
  778. batch = false;
  779. max_size_to_evict = (cache_above_healthy_limit(cache)) ? 1 : 0;
  780. }
  781. if (!max_size_to_evict)
  782. break;
  783. // check if we have to stop
  784. if(total_pages_evicted >= max_evict && !all_of_them) {
  785. stopped_before_finishing = true;
  786. break;
  787. }
  788. if(!all_of_them && !wait) {
  789. if(!pgc_ll_trylock(cache, &cache->clean)) {
  790. stopped_before_finishing = true;
  791. goto premature_exit;
  792. }
  793. // at this point we have the clean lock
  794. }
  795. else
  796. pgc_ll_lock(cache, &cache->clean);
  797. // find a page to evict
  798. PGC_PAGE *pages_to_evict = NULL;
  799. size_t pages_to_evict_size = 0;
  800. for(PGC_PAGE *page = cache->clean.base, *next = NULL, *first_page_we_relocated = NULL; page ; page = next) {
  801. next = page->link.next;
  802. if(unlikely(page == first_page_we_relocated))
  803. // we did a complete loop on all pages
  804. break;
  805. if(unlikely(page_flag_check(page, PGC_PAGE_HAS_BEEN_ACCESSED | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES) == PGC_PAGE_HAS_BEEN_ACCESSED)) {
  806. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  807. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  808. page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  809. continue;
  810. }
  811. if(unlikely(filter && !filter(page, data)))
  812. continue;
  813. if(non_acquired_page_get_for_deletion___while_having_clean_locked(cache, page)) {
  814. // we can delete this page
  815. // remove it from the clean list
  816. pgc_ll_del(cache, &cache->clean, page, true);
  817. __atomic_add_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
  818. __atomic_add_fetch(&cache->stats.evicting_size, page->assumed_size, __ATOMIC_RELAXED);
  819. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pages_to_evict, page, link.prev, link.next);
  820. pages_to_evict_size += page->assumed_size;
  821. if(unlikely(all_of_them || (batch && pages_to_evict_size < max_size_to_evict)))
  822. // get more pages
  823. ;
  824. else
  825. // one page at a time
  826. break;
  827. }
  828. else {
  829. // we can't delete this page
  830. if(!first_page_we_relocated)
  831. first_page_we_relocated = page;
  832. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  833. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  834. // check if we have to stop
  835. if(unlikely(++total_pages_skipped >= max_skip && !all_of_them)) {
  836. stopped_before_finishing = true;
  837. break;
  838. }
  839. }
  840. }
  841. pgc_ll_unlock(cache, &cache->clean);
  842. if(likely(pages_to_evict)) {
  843. // remove them from the index
  844. if(unlikely(pages_to_evict->link.next)) {
  845. // we have many pages, let's minimize the index locks we are going to get
  846. PGC_PAGE *pages_per_partition[cache->config.partitions];
  847. memset(pages_per_partition, 0, sizeof(PGC_PAGE *) * cache->config.partitions);
  848. // sort them by partition
  849. for (PGC_PAGE *page = pages_to_evict, *next = NULL; page; page = next) {
  850. next = page->link.next;
  851. size_t partition = pgc_indexing_partition(cache, page->metric_id);
  852. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(pages_to_evict, page, link.prev, link.next);
  853. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pages_per_partition[partition], page, link.prev, link.next);
  854. }
  855. // remove them from the index
  856. for (size_t partition = 0; partition < cache->config.partitions; partition++) {
  857. if (!pages_per_partition[partition]) continue;
  858. pgc_index_write_lock(cache, partition);
  859. for (PGC_PAGE *page = pages_per_partition[partition]; page; page = page->link.next)
  860. remove_this_page_from_index_unsafe(cache, page, partition);
  861. pgc_index_write_unlock(cache, partition);
  862. }
  863. // free them
  864. for (size_t partition = 0; partition < cache->config.partitions; partition++) {
  865. if (!pages_per_partition[partition]) continue;
  866. for (PGC_PAGE *page = pages_per_partition[partition], *next = NULL; page; page = next) {
  867. next = page->link.next;
  868. size_t page_size = page->assumed_size;
  869. free_this_page(cache, page, partition);
  870. __atomic_sub_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
  871. __atomic_sub_fetch(&cache->stats.evicting_size, page_size, __ATOMIC_RELAXED);
  872. total_pages_evicted++;
  873. }
  874. }
  875. }
  876. else {
  877. // just one page to be evicted
  878. PGC_PAGE *page = pages_to_evict;
  879. size_t page_size = page->assumed_size;
  880. size_t partition = pgc_indexing_partition(cache, page->metric_id);
  881. pgc_index_write_lock(cache, partition);
  882. remove_this_page_from_index_unsafe(cache, page, partition);
  883. pgc_index_write_unlock(cache, partition);
  884. free_this_page(cache, page, partition);
  885. __atomic_sub_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
  886. __atomic_sub_fetch(&cache->stats.evicting_size, page_size, __ATOMIC_RELAXED);
  887. total_pages_evicted++;
  888. }
  889. }
  890. else
  891. break;
  892. } while(all_of_them || (total_pages_evicted < max_evict && total_pages_skipped < max_skip));
  893. if(all_of_them && !filter) {
  894. pgc_ll_lock(cache, &cache->clean);
  895. if(cache->clean.stats->entries) {
  896. nd_log_limit_static_global_var(erl, 1, 0);
  897. nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
  898. "DBENGINE CACHE: cannot free all clean pages, %zu are still in the clean queue",
  899. cache->clean.stats->entries);
  900. }
  901. pgc_ll_unlock(cache, &cache->clean);
  902. }
  903. premature_exit:
  904. if(unlikely(total_pages_skipped))
  905. __atomic_add_fetch(&cache->stats.evict_skipped, total_pages_skipped, __ATOMIC_RELAXED);
  906. __atomic_sub_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
  907. return stopped_before_finishing;
  908. }
  909. static PGC_PAGE *page_add(PGC *cache, PGC_ENTRY *entry, bool *added) {
  910. internal_fatal(entry->start_time_s < 0 || entry->end_time_s < 0,
  911. "DBENGINE CACHE: timestamps are negative");
  912. __atomic_add_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED);
  913. size_t partition = pgc_indexing_partition(cache, entry->metric_id);
  914. #ifdef PGC_WITH_ARAL
  915. PGC_PAGE *allocation = aral_mallocz(cache->aral[partition]);
  916. #endif
  917. PGC_PAGE *page;
  918. size_t spins = 0;
  919. if(unlikely(entry->start_time_s < 0))
  920. entry->start_time_s = 0;
  921. if(unlikely(entry->end_time_s < 0))
  922. entry->end_time_s = 0;
  923. do {
  924. if(++spins > 1)
  925. __atomic_add_fetch(&cache->stats.insert_spins, 1, __ATOMIC_RELAXED);
  926. pgc_index_write_lock(cache, partition);
  927. size_t mem_before_judyl = 0, mem_after_judyl = 0;
  928. mem_before_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  929. Pvoid_t *metrics_judy_pptr = JudyLIns(&cache->index[partition].sections_judy, entry->section, PJE0);
  930. if(unlikely(!metrics_judy_pptr || metrics_judy_pptr == PJERR))
  931. fatal("DBENGINE CACHE: corrupted sections judy array");
  932. mem_after_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  933. mem_before_judyl += JudyLMemUsed(*metrics_judy_pptr);
  934. Pvoid_t *pages_judy_pptr = JudyLIns(metrics_judy_pptr, entry->metric_id, PJE0);
  935. if(unlikely(!pages_judy_pptr || pages_judy_pptr == PJERR))
  936. fatal("DBENGINE CACHE: corrupted pages judy array");
  937. mem_after_judyl += JudyLMemUsed(*metrics_judy_pptr);
  938. mem_before_judyl += JudyLMemUsed(*pages_judy_pptr);
  939. Pvoid_t *page_ptr = JudyLIns(pages_judy_pptr, entry->start_time_s, PJE0);
  940. if(unlikely(!page_ptr || page_ptr == PJERR))
  941. fatal("DBENGINE CACHE: corrupted page in judy array");
  942. mem_after_judyl += JudyLMemUsed(*pages_judy_pptr);
  943. pgc_stats_index_judy_change(cache, mem_before_judyl, mem_after_judyl);
  944. page = *page_ptr;
  945. if (likely(!page)) {
  946. #ifdef PGC_WITH_ARAL
  947. page = allocation;
  948. allocation = NULL;
  949. #else
  950. page = mallocz(sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page);
  951. #endif
  952. page->refcount = 1;
  953. page->accesses = (entry->hot) ? 0 : 1;
  954. page->flags = 0;
  955. page->section = entry->section;
  956. page->metric_id = entry->metric_id;
  957. page->start_time_s = entry->start_time_s;
  958. page->end_time_s = entry->end_time_s,
  959. page->update_every_s = entry->update_every_s,
  960. page->data = entry->data;
  961. page->assumed_size = page_assumed_size(cache, entry->size);
  962. spinlock_init(&page->transition_spinlock);
  963. page->link.prev = NULL;
  964. page->link.next = NULL;
  965. if(cache->config.additional_bytes_per_page) {
  966. if(entry->custom_data)
  967. memcpy(page->custom_data, entry->custom_data, cache->config.additional_bytes_per_page);
  968. else
  969. memset(page->custom_data, 0, cache->config.additional_bytes_per_page);
  970. }
  971. // put it in the index
  972. *page_ptr = page;
  973. pointer_add(cache, page);
  974. pgc_index_write_unlock(cache, partition);
  975. if (entry->hot)
  976. page_set_hot(cache, page);
  977. else
  978. page_set_clean(cache, page, false, false);
  979. PGC_REFERENCED_PAGES_PLUS1(cache, page);
  980. // update statistics
  981. __atomic_add_fetch(&cache->stats.added_entries, 1, __ATOMIC_RELAXED);
  982. __atomic_add_fetch(&cache->stats.added_size, page->assumed_size, __ATOMIC_RELAXED);
  983. __atomic_add_fetch(&cache->stats.entries, 1, __ATOMIC_RELAXED);
  984. __atomic_add_fetch(&cache->stats.size, page->assumed_size, __ATOMIC_RELAXED);
  985. if(added)
  986. *added = true;
  987. }
  988. else {
  989. if (!page_acquire(cache, page))
  990. page = NULL;
  991. else if(added)
  992. *added = false;
  993. pgc_index_write_unlock(cache, partition);
  994. if(unlikely(!page)) {
  995. // now that we don't have the lock,
  996. // give it some time for the old page to go away
  997. struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 };
  998. nanosleep(&ns, NULL);
  999. }
  1000. }
  1001. } while(!page);
  1002. #ifdef PGC_WITH_ARAL
  1003. if(allocation)
  1004. aral_freez(cache->aral[partition], allocation);
  1005. #endif
  1006. __atomic_sub_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED);
  1007. if(!entry->hot)
  1008. evict_on_clean_page_added(cache);
  1009. if((cache->config.options & PGC_OPTIONS_FLUSH_PAGES_INLINE) || flushing_critical(cache)) {
  1010. flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL,
  1011. false, false);
  1012. }
  1013. return page;
  1014. }
  1015. static PGC_PAGE *page_find_and_acquire(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method) {
  1016. __atomic_add_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED);
  1017. size_t *stats_hit_ptr, *stats_miss_ptr;
  1018. if(method == PGC_SEARCH_CLOSEST) {
  1019. __atomic_add_fetch(&cache->stats.searches_closest, 1, __ATOMIC_RELAXED);
  1020. stats_hit_ptr = &cache->stats.searches_closest_hits;
  1021. stats_miss_ptr = &cache->stats.searches_closest_misses;
  1022. }
  1023. else {
  1024. __atomic_add_fetch(&cache->stats.searches_exact, 1, __ATOMIC_RELAXED);
  1025. stats_hit_ptr = &cache->stats.searches_exact_hits;
  1026. stats_miss_ptr = &cache->stats.searches_exact_misses;
  1027. }
  1028. PGC_PAGE *page = NULL;
  1029. size_t partition = pgc_indexing_partition(cache, metric_id);
  1030. pgc_index_read_lock(cache, partition);
  1031. Pvoid_t *metrics_judy_pptr = JudyLGet(cache->index[partition].sections_judy, section, PJE0);
  1032. if(unlikely(metrics_judy_pptr == PJERR))
  1033. fatal("DBENGINE CACHE: corrupted sections judy array");
  1034. if(unlikely(!metrics_judy_pptr)) {
  1035. // section does not exist
  1036. goto cleanup;
  1037. }
  1038. Pvoid_t *pages_judy_pptr = JudyLGet(*metrics_judy_pptr, metric_id, PJE0);
  1039. if(unlikely(pages_judy_pptr == PJERR))
  1040. fatal("DBENGINE CACHE: corrupted pages judy array");
  1041. if(unlikely(!pages_judy_pptr)) {
  1042. // metric does not exist
  1043. goto cleanup;
  1044. }
  1045. switch(method) {
  1046. default:
  1047. case PGC_SEARCH_CLOSEST: {
  1048. Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, start_time_s, PJE0);
  1049. if (unlikely(page_ptr == PJERR))
  1050. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1051. if (page_ptr)
  1052. page = *page_ptr;
  1053. else {
  1054. Word_t time = start_time_s;
  1055. // find the previous page
  1056. page_ptr = JudyLPrev(*pages_judy_pptr, &time, PJE0);
  1057. if(unlikely(page_ptr == PJERR))
  1058. fatal("DBENGINE CACHE: corrupted page in pages judy array #2");
  1059. if(page_ptr) {
  1060. // found a page starting before our timestamp
  1061. // check if our timestamp is included
  1062. page = *page_ptr;
  1063. if(start_time_s > page->end_time_s)
  1064. // it is not good for us
  1065. page = NULL;
  1066. }
  1067. if(!page) {
  1068. // find the next page then...
  1069. time = start_time_s;
  1070. page_ptr = JudyLNext(*pages_judy_pptr, &time, PJE0);
  1071. if(page_ptr)
  1072. page = *page_ptr;
  1073. }
  1074. }
  1075. }
  1076. break;
  1077. case PGC_SEARCH_EXACT: {
  1078. Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, start_time_s, PJE0);
  1079. if (unlikely(page_ptr == PJERR))
  1080. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1081. if (page_ptr)
  1082. page = *page_ptr;
  1083. }
  1084. break;
  1085. case PGC_SEARCH_FIRST: {
  1086. Word_t time = start_time_s;
  1087. Pvoid_t *page_ptr = JudyLFirst(*pages_judy_pptr, &time, PJE0);
  1088. if (unlikely(page_ptr == PJERR))
  1089. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1090. if (page_ptr)
  1091. page = *page_ptr;
  1092. }
  1093. break;
  1094. case PGC_SEARCH_NEXT: {
  1095. Word_t time = start_time_s;
  1096. Pvoid_t *page_ptr = JudyLNext(*pages_judy_pptr, &time, PJE0);
  1097. if (unlikely(page_ptr == PJERR))
  1098. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1099. if (page_ptr)
  1100. page = *page_ptr;
  1101. }
  1102. break;
  1103. case PGC_SEARCH_LAST: {
  1104. Word_t time = start_time_s;
  1105. Pvoid_t *page_ptr = JudyLLast(*pages_judy_pptr, &time, PJE0);
  1106. if (unlikely(page_ptr == PJERR))
  1107. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1108. if (page_ptr)
  1109. page = *page_ptr;
  1110. }
  1111. break;
  1112. case PGC_SEARCH_PREV: {
  1113. Word_t time = start_time_s;
  1114. Pvoid_t *page_ptr = JudyLPrev(*pages_judy_pptr, &time, PJE0);
  1115. if (unlikely(page_ptr == PJERR))
  1116. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1117. if (page_ptr)
  1118. page = *page_ptr;
  1119. }
  1120. break;
  1121. }
  1122. if(page) {
  1123. pointer_check(cache, page);
  1124. if(!page_acquire(cache, page)) {
  1125. // this page is not good to use
  1126. page = NULL;
  1127. }
  1128. }
  1129. cleanup:
  1130. pgc_index_read_unlock(cache, partition);
  1131. if(page) {
  1132. __atomic_add_fetch(stats_hit_ptr, 1, __ATOMIC_RELAXED);
  1133. page_has_been_accessed(cache, page);
  1134. }
  1135. else
  1136. __atomic_add_fetch(stats_miss_ptr, 1, __ATOMIC_RELAXED);
  1137. __atomic_sub_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED);
  1138. return page;
  1139. }
  1140. static void all_hot_pages_to_dirty(PGC *cache, Word_t section) {
  1141. pgc_ll_lock(cache, &cache->hot);
  1142. bool first = true;
  1143. Word_t last_section = (section == PGC_SECTION_ALL) ? 0 : section;
  1144. Pvoid_t *section_pages_pptr;
  1145. while ((section_pages_pptr = JudyLFirstThenNext(cache->hot.sections_judy, &last_section, &first))) {
  1146. if(section != PGC_SECTION_ALL && last_section != section)
  1147. break;
  1148. struct section_pages *sp = *section_pages_pptr;
  1149. PGC_PAGE *page = sp->base;
  1150. while(page) {
  1151. PGC_PAGE *next = page->link.next;
  1152. if(page_acquire(cache, page)) {
  1153. page_set_dirty(cache, page, true);
  1154. page_release(cache, page, false);
  1155. // page ptr may be invalid now
  1156. }
  1157. page = next;
  1158. }
  1159. }
  1160. pgc_ll_unlock(cache, &cache->hot);
  1161. }
  1162. // returns true when there is more work to do
  1163. static bool flush_pages(PGC *cache, size_t max_flushes, Word_t section, bool wait, bool all_of_them) {
  1164. internal_fatal(!cache->dirty.linked_list_in_sections_judy,
  1165. "wrong dirty pages configuration - dirty pages need to have a judy array, not a linked list");
  1166. if(!all_of_them && !wait) {
  1167. // we have been called from a data collection thread
  1168. // let's not waste its time...
  1169. if(!pgc_ll_trylock(cache, &cache->dirty)) {
  1170. // we would block, so give up...
  1171. return true;
  1172. }
  1173. // we got the lock at this point
  1174. }
  1175. else
  1176. pgc_ll_lock(cache, &cache->dirty);
  1177. size_t optimal_flush_size = cache->config.max_dirty_pages_per_call;
  1178. size_t dirty_version_at_entry = cache->dirty.version;
  1179. if(!all_of_them && (cache->dirty.stats->entries < optimal_flush_size || cache->dirty.last_version_checked == dirty_version_at_entry)) {
  1180. pgc_ll_unlock(cache, &cache->dirty);
  1181. return false;
  1182. }
  1183. __atomic_add_fetch(&cache->stats.workers_flush, 1, __ATOMIC_RELAXED);
  1184. bool have_dirty_lock = true;
  1185. if(all_of_them || !max_flushes)
  1186. max_flushes = SIZE_MAX;
  1187. Word_t last_section = (section == PGC_SECTION_ALL) ? 0 : section;
  1188. size_t flushes_so_far = 0;
  1189. Pvoid_t *section_pages_pptr;
  1190. bool stopped_before_finishing = false;
  1191. size_t spins = 0;
  1192. bool first = true;
  1193. while (have_dirty_lock && (section_pages_pptr = JudyLFirstThenNext(cache->dirty.sections_judy, &last_section, &first))) {
  1194. if(section != PGC_SECTION_ALL && last_section != section)
  1195. break;
  1196. struct section_pages *sp = *section_pages_pptr;
  1197. if(!all_of_them && sp->entries < optimal_flush_size)
  1198. continue;
  1199. if(!all_of_them && flushes_so_far > max_flushes) {
  1200. stopped_before_finishing = true;
  1201. break;
  1202. }
  1203. if(++spins > 1)
  1204. __atomic_add_fetch(&cache->stats.flush_spins, 1, __ATOMIC_RELAXED);
  1205. PGC_ENTRY array[optimal_flush_size];
  1206. PGC_PAGE *pages[optimal_flush_size];
  1207. size_t pages_added = 0, pages_added_size = 0;
  1208. size_t pages_removed_dirty = 0, pages_removed_dirty_size = 0;
  1209. size_t pages_cancelled = 0, pages_cancelled_size = 0;
  1210. size_t pages_made_clean = 0, pages_made_clean_size = 0;
  1211. PGC_PAGE *page = sp->base;
  1212. while (page && pages_added < optimal_flush_size) {
  1213. PGC_PAGE *next = page->link.next;
  1214. internal_fatal(page_get_status_flags(page) != PGC_PAGE_DIRTY,
  1215. "DBENGINE CACHE: page should be in the dirty list before saved");
  1216. if (page_acquire(cache, page)) {
  1217. internal_fatal(page_get_status_flags(page) != PGC_PAGE_DIRTY,
  1218. "DBENGINE CACHE: page should be in the dirty list before saved");
  1219. internal_fatal(page->section != last_section,
  1220. "DBENGINE CACHE: dirty page is not in the right section (tier)");
  1221. if(!page_transition_trylock(cache, page)) {
  1222. page_release(cache, page, false);
  1223. // page ptr may be invalid now
  1224. }
  1225. else {
  1226. pages[pages_added] = page;
  1227. array[pages_added] = (PGC_ENTRY) {
  1228. .section = page->section,
  1229. .metric_id = page->metric_id,
  1230. .start_time_s = page->start_time_s,
  1231. .end_time_s = __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
  1232. .update_every_s = page->update_every_s,
  1233. .size = page_size_from_assumed_size(cache, page->assumed_size),
  1234. .data = page->data,
  1235. .custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL,
  1236. .hot = false,
  1237. };
  1238. pages_added_size += page->assumed_size;
  1239. pages_added++;
  1240. }
  1241. }
  1242. page = next;
  1243. }
  1244. // do we have enough to save?
  1245. if(all_of_them || pages_added == optimal_flush_size) {
  1246. // we should do it
  1247. for (size_t i = 0; i < pages_added; i++) {
  1248. PGC_PAGE *tpg = pages[i];
  1249. internal_fatal(page_get_status_flags(tpg) != PGC_PAGE_DIRTY,
  1250. "DBENGINE CACHE: page should be in the dirty list before saved");
  1251. __atomic_add_fetch(&cache->stats.flushing_entries, 1, __ATOMIC_RELAXED);
  1252. __atomic_add_fetch(&cache->stats.flushing_size, tpg->assumed_size, __ATOMIC_RELAXED);
  1253. // remove it from the dirty list
  1254. pgc_ll_del(cache, &cache->dirty, tpg, true);
  1255. pages_removed_dirty_size += tpg->assumed_size;
  1256. pages_removed_dirty++;
  1257. }
  1258. // next time, repeat the same section (tier)
  1259. first = true;
  1260. }
  1261. else {
  1262. // we can't do it
  1263. for (size_t i = 0; i < pages_added; i++) {
  1264. PGC_PAGE *tpg = pages[i];
  1265. internal_fatal(page_get_status_flags(tpg) != PGC_PAGE_DIRTY,
  1266. "DBENGINE CACHE: page should be in the dirty list before saved");
  1267. pages_cancelled_size += tpg->assumed_size;
  1268. pages_cancelled++;
  1269. page_transition_unlock(cache, tpg);
  1270. page_release(cache, tpg, false);
  1271. // page ptr may be invalid now
  1272. }
  1273. __atomic_add_fetch(&cache->stats.flushes_cancelled, pages_cancelled, __ATOMIC_RELAXED);
  1274. __atomic_add_fetch(&cache->stats.flushes_cancelled_size, pages_cancelled_size, __ATOMIC_RELAXED);
  1275. internal_fatal(pages_added != pages_cancelled || pages_added_size != pages_cancelled_size,
  1276. "DBENGINE CACHE: flushing cancel pages mismatch");
  1277. // next time, continue to the next section (tier)
  1278. first = false;
  1279. continue;
  1280. }
  1281. if(cache->config.pgc_save_init_cb)
  1282. cache->config.pgc_save_init_cb(cache, last_section);
  1283. pgc_ll_unlock(cache, &cache->dirty);
  1284. have_dirty_lock = false;
  1285. // call the callback to save them
  1286. // it may take some time, so let's release the lock
  1287. cache->config.pgc_save_dirty_cb(cache, array, pages, pages_added);
  1288. flushes_so_far++;
  1289. __atomic_add_fetch(&cache->stats.flushes_completed, pages_added, __ATOMIC_RELAXED);
  1290. __atomic_add_fetch(&cache->stats.flushes_completed_size, pages_added_size, __ATOMIC_RELAXED);
  1291. size_t pages_to_evict = 0; (void)pages_to_evict;
  1292. for (size_t i = 0; i < pages_added; i++) {
  1293. PGC_PAGE *tpg = pages[i];
  1294. internal_fatal(page_get_status_flags(tpg) != 0,
  1295. "DBENGINE CACHE: page should not be in any list while it is being saved");
  1296. __atomic_sub_fetch(&cache->stats.flushing_entries, 1, __ATOMIC_RELAXED);
  1297. __atomic_sub_fetch(&cache->stats.flushing_size, tpg->assumed_size, __ATOMIC_RELAXED);
  1298. pages_made_clean_size += tpg->assumed_size;
  1299. pages_made_clean++;
  1300. if(!tpg->accesses)
  1301. pages_to_evict++;
  1302. page_set_clean(cache, tpg, true, false);
  1303. page_transition_unlock(cache, tpg);
  1304. page_release(cache, tpg, false);
  1305. // tpg ptr may be invalid now
  1306. }
  1307. internal_fatal(pages_added != pages_made_clean || pages_added != pages_removed_dirty ||
  1308. pages_added_size != pages_made_clean_size || pages_added_size != pages_removed_dirty_size
  1309. , "DBENGINE CACHE: flushing pages mismatch");
  1310. if(!all_of_them && !wait) {
  1311. if(pgc_ll_trylock(cache, &cache->dirty))
  1312. have_dirty_lock = true;
  1313. else {
  1314. stopped_before_finishing = true;
  1315. have_dirty_lock = false;
  1316. }
  1317. }
  1318. else {
  1319. pgc_ll_lock(cache, &cache->dirty);
  1320. have_dirty_lock = true;
  1321. }
  1322. }
  1323. if(have_dirty_lock) {
  1324. if(!stopped_before_finishing && dirty_version_at_entry > cache->dirty.last_version_checked)
  1325. cache->dirty.last_version_checked = dirty_version_at_entry;
  1326. pgc_ll_unlock(cache, &cache->dirty);
  1327. }
  1328. __atomic_sub_fetch(&cache->stats.workers_flush, 1, __ATOMIC_RELAXED);
  1329. return stopped_before_finishing;
  1330. }
  1331. void free_all_unreferenced_clean_pages(PGC *cache) {
  1332. evict_pages(cache, 0, 0, true, true);
  1333. }
  1334. // ----------------------------------------------------------------------------
  1335. // public API
  1336. PGC *pgc_create(const char *name,
  1337. size_t clean_size_bytes, free_clean_page_callback pgc_free_cb,
  1338. size_t max_dirty_pages_per_flush,
  1339. save_dirty_init_callback pgc_save_init_cb,
  1340. save_dirty_page_callback pgc_save_dirty_cb,
  1341. size_t max_pages_per_inline_eviction, size_t max_inline_evictors,
  1342. size_t max_skip_pages_per_inline_eviction,
  1343. size_t max_flushes_inline,
  1344. PGC_OPTIONS options, size_t partitions, size_t additional_bytes_per_page) {
  1345. if(max_pages_per_inline_eviction < 2)
  1346. max_pages_per_inline_eviction = 2;
  1347. if(max_dirty_pages_per_flush < 1)
  1348. max_dirty_pages_per_flush = 1;
  1349. if(max_flushes_inline * max_dirty_pages_per_flush < 2)
  1350. max_flushes_inline = 2;
  1351. PGC *cache = callocz(1, sizeof(PGC));
  1352. strncpyz(cache->config.name, name, PGC_NAME_MAX);
  1353. cache->config.options = options;
  1354. cache->config.clean_size = (clean_size_bytes < 1 * 1024 * 1024) ? 1 * 1024 * 1024 : clean_size_bytes;
  1355. cache->config.pgc_free_clean_cb = pgc_free_cb;
  1356. cache->config.max_dirty_pages_per_call = max_dirty_pages_per_flush;
  1357. cache->config.pgc_save_init_cb = pgc_save_init_cb;
  1358. cache->config.pgc_save_dirty_cb = pgc_save_dirty_cb;
  1359. cache->config.max_pages_per_inline_eviction = max_pages_per_inline_eviction;
  1360. cache->config.max_skip_pages_per_inline_eviction = (max_skip_pages_per_inline_eviction < 2) ? 2 : max_skip_pages_per_inline_eviction;
  1361. cache->config.max_flushes_inline = (max_flushes_inline < 1) ? 1 : max_flushes_inline;
  1362. cache->config.partitions = partitions < 1 ? (size_t)get_netdata_cpus() : partitions;
  1363. cache->config.additional_bytes_per_page = additional_bytes_per_page;
  1364. cache->config.max_workers_evict_inline = max_inline_evictors;
  1365. cache->config.severe_pressure_per1000 = 1010;
  1366. cache->config.aggressive_evict_per1000 = 990;
  1367. cache->config.healthy_size_per1000 = 980;
  1368. cache->config.evict_low_threshold_per1000 = 970;
  1369. cache->index = callocz(cache->config.partitions, sizeof(struct pgc_index));
  1370. for(size_t part = 0; part < cache->config.partitions ; part++)
  1371. rw_spinlock_init(&cache->index[part].rw_spinlock);
  1372. spinlock_init(&cache->hot.spinlock);
  1373. spinlock_init(&cache->dirty.spinlock);
  1374. spinlock_init(&cache->clean.spinlock);
  1375. cache->hot.flags = PGC_PAGE_HOT;
  1376. cache->hot.linked_list_in_sections_judy = true;
  1377. cache->hot.stats = &cache->stats.queues.hot;
  1378. cache->dirty.flags = PGC_PAGE_DIRTY;
  1379. cache->dirty.linked_list_in_sections_judy = true;
  1380. cache->dirty.stats = &cache->stats.queues.dirty;
  1381. cache->clean.flags = PGC_PAGE_CLEAN;
  1382. cache->clean.linked_list_in_sections_judy = false;
  1383. cache->clean.stats = &cache->stats.queues.clean;
  1384. pgc_section_pages_static_aral_init();
  1385. #ifdef PGC_WITH_ARAL
  1386. cache->aral = callocz(cache->config.partitions, sizeof(ARAL *));
  1387. for(size_t part = 0; part < cache->config.partitions ; part++) {
  1388. char buf[100 +1];
  1389. snprintfz(buf, sizeof(buf) - 1, "%s[%zu]", name, part);
  1390. cache->aral[part] = aral_create(
  1391. buf,
  1392. sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page,
  1393. 0,
  1394. 16384,
  1395. aral_statistics(pgc_section_pages_aral),
  1396. NULL, NULL, false, false);
  1397. }
  1398. #endif
  1399. pointer_index_init(cache);
  1400. return cache;
  1401. }
  1402. struct aral_statistics *pgc_aral_statistics(void) {
  1403. return aral_statistics(pgc_section_pages_aral);
  1404. }
  1405. size_t pgc_aral_structures(void) {
  1406. return aral_structures(pgc_section_pages_aral);
  1407. }
  1408. size_t pgc_aral_overhead(void) {
  1409. return aral_overhead(pgc_section_pages_aral);
  1410. }
  1411. void pgc_flush_all_hot_and_dirty_pages(PGC *cache, Word_t section) {
  1412. all_hot_pages_to_dirty(cache, section);
  1413. // save all dirty pages to make them clean
  1414. flush_pages(cache, 0, section, true, true);
  1415. }
  1416. void pgc_destroy(PGC *cache) {
  1417. // convert all hot pages to dirty
  1418. all_hot_pages_to_dirty(cache, PGC_SECTION_ALL);
  1419. // save all dirty pages to make them clean
  1420. flush_pages(cache, 0, PGC_SECTION_ALL, true, true);
  1421. // free all unreferenced clean pages
  1422. free_all_unreferenced_clean_pages(cache);
  1423. if(PGC_REFERENCED_PAGES(cache))
  1424. netdata_log_error("DBENGINE CACHE: there are %zu referenced cache pages - leaving the cache allocated", PGC_REFERENCED_PAGES(cache));
  1425. else {
  1426. pointer_destroy_index(cache);
  1427. // for(size_t part = 0; part < cache->config.partitions ; part++)
  1428. // netdata_rwlock_destroy(&cache->index[part].rw_spinlock);
  1429. #ifdef PGC_WITH_ARAL
  1430. for(size_t part = 0; part < cache->config.partitions ; part++)
  1431. aral_destroy(cache->aral[part]);
  1432. freez(cache->aral);
  1433. #endif
  1434. freez(cache->index);
  1435. freez(cache);
  1436. }
  1437. }
  1438. PGC_PAGE *pgc_page_add_and_acquire(PGC *cache, PGC_ENTRY entry, bool *added) {
  1439. return page_add(cache, &entry, added);
  1440. }
  1441. PGC_PAGE *pgc_page_dup(PGC *cache, PGC_PAGE *page) {
  1442. if(!page_acquire(cache, page))
  1443. fatal("DBENGINE CACHE: tried to dup a page that is not acquired!");
  1444. return page;
  1445. }
  1446. void pgc_page_release(PGC *cache, PGC_PAGE *page) {
  1447. page_release(cache, page, is_page_clean(page));
  1448. }
  1449. void pgc_page_hot_to_dirty_and_release(PGC *cache, PGC_PAGE *page) {
  1450. __atomic_add_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1451. //#ifdef NETDATA_INTERNAL_CHECKS
  1452. // page_transition_lock(cache, page);
  1453. // internal_fatal(!is_page_hot(page), "DBENGINE CACHE: called %s() but page is not hot", __FUNCTION__ );
  1454. // page_transition_unlock(cache, page);
  1455. //#endif
  1456. // make page dirty
  1457. page_set_dirty(cache, page, false);
  1458. // release the page
  1459. page_release(cache, page, true);
  1460. // page ptr may be invalid now
  1461. __atomic_sub_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1462. // flush, if we have to
  1463. if((cache->config.options & PGC_OPTIONS_FLUSH_PAGES_INLINE) || flushing_critical(cache)) {
  1464. flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL,
  1465. false, false);
  1466. }
  1467. }
  1468. bool pgc_page_to_clean_evict_or_release(PGC *cache, PGC_PAGE *page) {
  1469. bool ret;
  1470. __atomic_add_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1471. // prevent accesses from increasing the accesses counter
  1472. page_flag_set(page, PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES);
  1473. // zero the accesses counter
  1474. __atomic_store_n(&page->accesses, 0, __ATOMIC_RELEASE);
  1475. // if there are no other references to it, evict it immediately
  1476. if(make_acquired_page_clean_and_evict_or_page_release(cache, page)) {
  1477. __atomic_add_fetch(&cache->stats.hot_empty_pages_evicted_immediately, 1, __ATOMIC_RELAXED);
  1478. ret = true;
  1479. }
  1480. else {
  1481. __atomic_add_fetch(&cache->stats.hot_empty_pages_evicted_later, 1, __ATOMIC_RELAXED);
  1482. ret = false;
  1483. }
  1484. __atomic_sub_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1485. return ret;
  1486. }
  1487. Word_t pgc_page_section(PGC_PAGE *page) {
  1488. return page->section;
  1489. }
  1490. Word_t pgc_page_metric(PGC_PAGE *page) {
  1491. return page->metric_id;
  1492. }
  1493. time_t pgc_page_start_time_s(PGC_PAGE *page) {
  1494. return page->start_time_s;
  1495. }
  1496. time_t pgc_page_end_time_s(PGC_PAGE *page) {
  1497. return page->end_time_s;
  1498. }
  1499. time_t pgc_page_update_every_s(PGC_PAGE *page) {
  1500. return page->update_every_s;
  1501. }
  1502. time_t pgc_page_fix_update_every(PGC_PAGE *page, time_t update_every_s) {
  1503. if(page->update_every_s == 0)
  1504. page->update_every_s = (uint32_t) update_every_s;
  1505. return page->update_every_s;
  1506. }
  1507. time_t pgc_page_fix_end_time_s(PGC_PAGE *page, time_t end_time_s) {
  1508. page->end_time_s = end_time_s;
  1509. return page->end_time_s;
  1510. }
  1511. void *pgc_page_data(PGC_PAGE *page) {
  1512. return page->data;
  1513. }
  1514. void *pgc_page_custom_data(PGC *cache, PGC_PAGE *page) {
  1515. if(cache->config.additional_bytes_per_page)
  1516. return page->custom_data;
  1517. return NULL;
  1518. }
  1519. size_t pgc_page_data_size(PGC *cache, PGC_PAGE *page) {
  1520. return page_size_from_assumed_size(cache, page->assumed_size);
  1521. }
  1522. bool pgc_is_page_hot(PGC_PAGE *page) {
  1523. return is_page_hot(page);
  1524. }
  1525. bool pgc_is_page_dirty(PGC_PAGE *page) {
  1526. return is_page_dirty(page);
  1527. }
  1528. bool pgc_is_page_clean(PGC_PAGE *page) {
  1529. return is_page_clean(page);
  1530. }
  1531. void pgc_reset_hot_max(PGC *cache) {
  1532. size_t entries = __atomic_load_n(&cache->hot.stats->entries, __ATOMIC_RELAXED);
  1533. size_t size = __atomic_load_n(&cache->hot.stats->size, __ATOMIC_RELAXED);
  1534. __atomic_store_n(&cache->hot.stats->max_entries, entries, __ATOMIC_RELAXED);
  1535. __atomic_store_n(&cache->hot.stats->max_size, size, __ATOMIC_RELAXED);
  1536. size_t size_to_evict = 0;
  1537. cache_usage_per1000(cache, &size_to_evict);
  1538. evict_pages(cache, 0, 0, true, false);
  1539. }
  1540. void pgc_set_dynamic_target_cache_size_callback(PGC *cache, dynamic_target_cache_size_callback callback) {
  1541. cache->config.dynamic_target_size_cb = callback;
  1542. size_t size_to_evict = 0;
  1543. cache_usage_per1000(cache, &size_to_evict);
  1544. evict_pages(cache, 0, 0, true, false);
  1545. }
  1546. size_t pgc_get_current_cache_size(PGC *cache) {
  1547. cache_usage_per1000(cache, NULL);
  1548. return __atomic_load_n(&cache->stats.current_cache_size, __ATOMIC_RELAXED);
  1549. }
  1550. size_t pgc_get_wanted_cache_size(PGC *cache) {
  1551. cache_usage_per1000(cache, NULL);
  1552. return __atomic_load_n(&cache->stats.wanted_cache_size, __ATOMIC_RELAXED);
  1553. }
  1554. bool pgc_evict_pages(PGC *cache, size_t max_skip, size_t max_evict) {
  1555. bool under_pressure = cache_needs_space_aggressively(cache);
  1556. return evict_pages(cache,
  1557. under_pressure ? 0 : max_skip,
  1558. under_pressure ? 0 : max_evict,
  1559. true, false);
  1560. }
  1561. bool pgc_flush_pages(PGC *cache, size_t max_flushes) {
  1562. bool under_pressure = flushing_critical(cache);
  1563. return flush_pages(cache, under_pressure ? 0 : max_flushes, PGC_SECTION_ALL, true, false);
  1564. }
  1565. void pgc_page_hot_set_end_time_s(PGC *cache __maybe_unused, PGC_PAGE *page, time_t end_time_s) {
  1566. internal_fatal(!is_page_hot(page),
  1567. "DBENGINE CACHE: end_time_s update on non-hot page");
  1568. internal_fatal(end_time_s < __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
  1569. "DBENGINE CACHE: end_time_s is not bigger than existing");
  1570. __atomic_store_n(&page->end_time_s, end_time_s, __ATOMIC_RELAXED);
  1571. #ifdef PGC_COUNT_POINTS_COLLECTED
  1572. __atomic_add_fetch(&cache->stats.points_collected, 1, __ATOMIC_RELAXED);
  1573. #endif
  1574. }
  1575. PGC_PAGE *pgc_page_get_and_acquire(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method) {
  1576. return page_find_and_acquire(cache, section, metric_id, start_time_s, method);
  1577. }
  1578. struct pgc_statistics pgc_get_statistics(PGC *cache) {
  1579. // FIXME - get the statistics atomically
  1580. return cache->stats;
  1581. }
  1582. size_t pgc_hot_and_dirty_entries(PGC *cache) {
  1583. size_t entries = 0;
  1584. entries += __atomic_load_n(&cache->hot.stats->entries, __ATOMIC_RELAXED);
  1585. entries += __atomic_load_n(&cache->dirty.stats->entries, __ATOMIC_RELAXED);
  1586. entries += __atomic_load_n(&cache->stats.flushing_entries, __ATOMIC_RELAXED);
  1587. entries += __atomic_load_n(&cache->stats.hot2dirty_entries, __ATOMIC_RELAXED);
  1588. return entries;
  1589. }
  1590. void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_fileno, uint8_t type, migrate_to_v2_callback cb, void *data) {
  1591. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.journal_v2_indexing_started, 1, __ATOMIC_RELAXED);
  1592. __atomic_add_fetch(&cache->stats.workers_jv2_flush, 1, __ATOMIC_RELAXED);
  1593. pgc_ll_lock(cache, &cache->hot);
  1594. Pvoid_t JudyL_metrics = NULL;
  1595. Pvoid_t JudyL_extents_pos = NULL;
  1596. size_t count_of_unique_extents = 0;
  1597. size_t count_of_unique_metrics = 0;
  1598. size_t count_of_unique_pages = 0;
  1599. size_t master_extent_index_id = 0;
  1600. Pvoid_t *section_pages_pptr = JudyLGet(cache->hot.sections_judy, section, PJE0);
  1601. if(!section_pages_pptr) {
  1602. pgc_ll_unlock(cache, &cache->hot);
  1603. return;
  1604. }
  1605. struct section_pages *sp = *section_pages_pptr;
  1606. if(!spinlock_trylock(&sp->migration_to_v2_spinlock)) {
  1607. netdata_log_info("DBENGINE: migration to journal v2 for datafile %u is postponed, another jv2 indexer is already running for this section", datafile_fileno);
  1608. pgc_ll_unlock(cache, &cache->hot);
  1609. return;
  1610. }
  1611. ARAL *ar_mi = aral_by_size_acquire(sizeof(struct jv2_metrics_info));
  1612. ARAL *ar_pi = aral_by_size_acquire(sizeof(struct jv2_page_info));
  1613. ARAL *ar_ei = aral_by_size_acquire(sizeof(struct jv2_extents_info));
  1614. for(PGC_PAGE *page = sp->base; page ; page = page->link.next) {
  1615. struct extent_io_data *xio = (struct extent_io_data *)page->custom_data;
  1616. if(xio->fileno != datafile_fileno) continue;
  1617. if(page_flag_check(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2)) {
  1618. internal_fatal(true, "Migration to journal v2: page has already been migrated to v2");
  1619. continue;
  1620. }
  1621. if(!page_transition_trylock(cache, page)) {
  1622. internal_fatal(true, "Migration to journal v2: cannot get page transition lock");
  1623. continue;
  1624. }
  1625. if(!page_acquire(cache, page)) {
  1626. internal_fatal(true, "Migration to journal v2: cannot acquire page for migration to v2");
  1627. continue;
  1628. }
  1629. page_flag_set(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2);
  1630. pgc_ll_unlock(cache, &cache->hot);
  1631. // update the extents JudyL
  1632. size_t current_extent_index_id;
  1633. Pvoid_t *PValue = JudyLIns(&JudyL_extents_pos, xio->pos, PJE0);
  1634. if(!PValue || *PValue == PJERR)
  1635. fatal("Corrupted JudyL extents pos");
  1636. struct jv2_extents_info *ei;
  1637. if(!*PValue) {
  1638. ei = aral_mallocz(ar_ei); // callocz(1, sizeof(struct jv2_extents_info));
  1639. ei->pos = xio->pos;
  1640. ei->bytes = xio->bytes;
  1641. ei->number_of_pages = 1;
  1642. ei->index = master_extent_index_id++;
  1643. *PValue = ei;
  1644. count_of_unique_extents++;
  1645. }
  1646. else {
  1647. ei = *PValue;
  1648. ei->number_of_pages++;
  1649. }
  1650. current_extent_index_id = ei->index;
  1651. // update the metrics JudyL
  1652. PValue = JudyLIns(&JudyL_metrics, page->metric_id, PJE0);
  1653. if(!PValue || *PValue == PJERR)
  1654. fatal("Corrupted JudyL metrics");
  1655. struct jv2_metrics_info *mi;
  1656. if(!*PValue) {
  1657. mi = aral_mallocz(ar_mi); // callocz(1, sizeof(struct jv2_metrics_info));
  1658. mi->uuid = mrg_metric_uuid(main_mrg, (METRIC *)page->metric_id);
  1659. mi->first_time_s = page->start_time_s;
  1660. mi->last_time_s = page->end_time_s;
  1661. mi->number_of_pages = 1;
  1662. mi->page_list_header = 0;
  1663. mi->JudyL_pages_by_start_time = NULL;
  1664. *PValue = mi;
  1665. count_of_unique_metrics++;
  1666. }
  1667. else {
  1668. mi = *PValue;
  1669. mi->number_of_pages++;
  1670. if(page->start_time_s < mi->first_time_s)
  1671. mi->first_time_s = page->start_time_s;
  1672. if(page->end_time_s > mi->last_time_s)
  1673. mi->last_time_s = page->end_time_s;
  1674. }
  1675. PValue = JudyLIns(&mi->JudyL_pages_by_start_time, page->start_time_s, PJE0);
  1676. if(!PValue || *PValue == PJERR)
  1677. fatal("Corrupted JudyL metric pages");
  1678. if(!*PValue) {
  1679. struct jv2_page_info *pi = aral_mallocz(ar_pi); // callocz(1, (sizeof(struct jv2_page_info)));
  1680. pi->start_time_s = page->start_time_s;
  1681. pi->end_time_s = page->end_time_s;
  1682. pi->update_every_s = page->update_every_s;
  1683. pi->page_length = page_size_from_assumed_size(cache, page->assumed_size);
  1684. pi->page = page;
  1685. pi->extent_index = current_extent_index_id;
  1686. pi->custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL;
  1687. *PValue = pi;
  1688. count_of_unique_pages++;
  1689. }
  1690. else {
  1691. // impossible situation
  1692. internal_fatal(true, "Page is already in JudyL metric pages");
  1693. page_flag_clear(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2);
  1694. page_transition_unlock(cache, page);
  1695. page_release(cache, page, false);
  1696. }
  1697. pgc_ll_lock(cache, &cache->hot);
  1698. }
  1699. spinlock_unlock(&sp->migration_to_v2_spinlock);
  1700. pgc_ll_unlock(cache, &cache->hot);
  1701. // callback
  1702. cb(section, datafile_fileno, type, JudyL_metrics, JudyL_extents_pos, count_of_unique_extents, count_of_unique_metrics, count_of_unique_pages, data);
  1703. {
  1704. Pvoid_t *PValue1;
  1705. bool metric_id_first = true;
  1706. Word_t metric_id = 0;
  1707. while ((PValue1 = JudyLFirstThenNext(JudyL_metrics, &metric_id, &metric_id_first))) {
  1708. struct jv2_metrics_info *mi = *PValue1;
  1709. Pvoid_t *PValue2;
  1710. bool start_time_first = true;
  1711. Word_t start_time = 0;
  1712. while ((PValue2 = JudyLFirstThenNext(mi->JudyL_pages_by_start_time, &start_time, &start_time_first))) {
  1713. struct jv2_page_info *pi = *PValue2;
  1714. page_transition_unlock(cache, pi->page);
  1715. pgc_page_hot_to_dirty_and_release(cache, pi->page);
  1716. // make_acquired_page_clean_and_evict_or_page_release(cache, pi->page);
  1717. aral_freez(ar_pi, pi);
  1718. }
  1719. JudyLFreeArray(&mi->JudyL_pages_by_start_time, PJE0);
  1720. aral_freez(ar_mi, mi);
  1721. }
  1722. JudyLFreeArray(&JudyL_metrics, PJE0);
  1723. }
  1724. {
  1725. Pvoid_t *PValue;
  1726. bool extent_pos_first = true;
  1727. Word_t extent_pos = 0;
  1728. while ((PValue = JudyLFirstThenNext(JudyL_extents_pos, &extent_pos, &extent_pos_first))) {
  1729. struct jv2_extents_info *ei = *PValue;
  1730. aral_freez(ar_ei, ei);
  1731. }
  1732. JudyLFreeArray(&JudyL_extents_pos, PJE0);
  1733. }
  1734. aral_by_size_release(ar_ei);
  1735. aral_by_size_release(ar_pi);
  1736. aral_by_size_release(ar_mi);
  1737. __atomic_sub_fetch(&cache->stats.workers_jv2_flush, 1, __ATOMIC_RELAXED);
  1738. }
  1739. static bool match_page_data(PGC_PAGE *page, void *data) {
  1740. return (page->data == data);
  1741. }
  1742. void pgc_open_evict_clean_pages_of_datafile(PGC *cache, struct rrdengine_datafile *datafile) {
  1743. evict_pages_with_filter(cache, 0, 0, true, true, match_page_data, datafile);
  1744. }
  1745. size_t pgc_count_clean_pages_having_data_ptr(PGC *cache, Word_t section, void *ptr) {
  1746. size_t found = 0;
  1747. pgc_ll_lock(cache, &cache->clean);
  1748. for(PGC_PAGE *page = cache->clean.base; page ;page = page->link.next)
  1749. found += (page->data == ptr && page->section == section) ? 1 : 0;
  1750. pgc_ll_unlock(cache, &cache->clean);
  1751. return found;
  1752. }
  1753. size_t pgc_count_hot_pages_having_data_ptr(PGC *cache, Word_t section, void *ptr) {
  1754. size_t found = 0;
  1755. pgc_ll_lock(cache, &cache->hot);
  1756. Pvoid_t *section_pages_pptr = JudyLGet(cache->hot.sections_judy, section, PJE0);
  1757. if(section_pages_pptr) {
  1758. struct section_pages *sp = *section_pages_pptr;
  1759. for(PGC_PAGE *page = sp->base; page ;page = page->link.next)
  1760. found += (page->data == ptr) ? 1 : 0;
  1761. }
  1762. pgc_ll_unlock(cache, &cache->hot);
  1763. return found;
  1764. }
  1765. // ----------------------------------------------------------------------------
  1766. // unittest
  1767. static void unittest_free_clean_page_callback(PGC *cache __maybe_unused, PGC_ENTRY entry __maybe_unused) {
  1768. ;
  1769. }
  1770. static void unittest_save_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused) {
  1771. ;
  1772. }
  1773. #ifdef PGC_STRESS_TEST
  1774. struct {
  1775. bool stop;
  1776. PGC *cache;
  1777. PGC_PAGE **metrics;
  1778. size_t clean_metrics;
  1779. size_t hot_metrics;
  1780. time_t first_time_t;
  1781. time_t last_time_t;
  1782. size_t cache_size;
  1783. size_t query_threads;
  1784. size_t collect_threads;
  1785. size_t partitions;
  1786. size_t points_per_page;
  1787. time_t time_per_collection_ut;
  1788. time_t time_per_query_ut;
  1789. time_t time_per_flush_ut;
  1790. PGC_OPTIONS options;
  1791. char rand_statebufs[1024];
  1792. struct random_data *random_data;
  1793. } pgc_uts = {
  1794. .stop = false,
  1795. .metrics = NULL,
  1796. .clean_metrics = 100000,
  1797. .hot_metrics = 1000000,
  1798. .first_time_t = 100000000,
  1799. .last_time_t = 0,
  1800. .cache_size = 0, // get the default (8MB)
  1801. .collect_threads = 16,
  1802. .query_threads = 16,
  1803. .partitions = 0, // get the default (system cpus)
  1804. .options = PGC_OPTIONS_AUTOSCALE,/* PGC_OPTIONS_FLUSH_PAGES_INLINE | PGC_OPTIONS_EVICT_PAGES_INLINE,*/
  1805. .points_per_page = 10,
  1806. .time_per_collection_ut = 1000000,
  1807. .time_per_query_ut = 250,
  1808. .time_per_flush_ut = 100,
  1809. .rand_statebufs = {},
  1810. .random_data = NULL,
  1811. };
  1812. void *unittest_stress_test_collector(void *ptr) {
  1813. size_t id = *((size_t *)ptr);
  1814. size_t metric_start = pgc_uts.clean_metrics;
  1815. size_t metric_end = pgc_uts.clean_metrics + pgc_uts.hot_metrics;
  1816. size_t number_of_metrics = metric_end - metric_start;
  1817. size_t per_collector_metrics = number_of_metrics / pgc_uts.collect_threads;
  1818. metric_start = metric_start + per_collector_metrics * id + 1;
  1819. metric_end = metric_start + per_collector_metrics - 1;
  1820. time_t start_time_t = pgc_uts.first_time_t + 1;
  1821. heartbeat_t hb;
  1822. heartbeat_init(&hb);
  1823. while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1824. // netdata_log_info("COLLECTOR %zu: collecting metrics %zu to %zu, from %ld to %lu", id, metric_start, metric_end, start_time_t, start_time_t + pgc_uts.points_per_page);
  1825. netdata_thread_disable_cancelability();
  1826. for (size_t i = metric_start; i < metric_end; i++) {
  1827. bool added;
  1828. pgc_uts.metrics[i] = pgc_page_add_and_acquire(pgc_uts.cache, (PGC_ENTRY) {
  1829. .section = 1,
  1830. .metric_id = i,
  1831. .start_time_t = start_time_t,
  1832. .end_time_t = start_time_t,
  1833. .update_every = 1,
  1834. .size = 4096,
  1835. .data = NULL,
  1836. .hot = true,
  1837. }, &added);
  1838. if(!pgc_is_page_hot(pgc_uts.metrics[i]) || !added) {
  1839. pgc_page_release(pgc_uts.cache, pgc_uts.metrics[i]);
  1840. pgc_uts.metrics[i] = NULL;
  1841. }
  1842. }
  1843. time_t end_time_t = start_time_t + (time_t)pgc_uts.points_per_page;
  1844. while(++start_time_t <= end_time_t && !__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1845. heartbeat_next(&hb, pgc_uts.time_per_collection_ut);
  1846. for (size_t i = metric_start; i < metric_end; i++) {
  1847. if(pgc_uts.metrics[i])
  1848. pgc_page_hot_set_end_time_t(pgc_uts.cache, pgc_uts.metrics[i], start_time_t);
  1849. }
  1850. __atomic_store_n(&pgc_uts.last_time_t, start_time_t, __ATOMIC_RELAXED);
  1851. }
  1852. for (size_t i = metric_start; i < metric_end; i++) {
  1853. if (pgc_uts.metrics[i]) {
  1854. if(i % 10 == 0)
  1855. pgc_page_to_clean_evict_or_release(pgc_uts.cache, pgc_uts.metrics[i]);
  1856. else
  1857. pgc_page_hot_to_dirty_and_release(pgc_uts.cache, pgc_uts.metrics[i]);
  1858. }
  1859. }
  1860. netdata_thread_enable_cancelability();
  1861. }
  1862. return ptr;
  1863. }
  1864. void *unittest_stress_test_queries(void *ptr) {
  1865. size_t id = *((size_t *)ptr);
  1866. struct random_data *random_data = &pgc_uts.random_data[id];
  1867. size_t start = 0;
  1868. size_t end = pgc_uts.clean_metrics + pgc_uts.hot_metrics;
  1869. while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1870. netdata_thread_disable_cancelability();
  1871. int32_t random_number;
  1872. random_r(random_data, &random_number);
  1873. size_t metric_id = random_number % (end - start);
  1874. time_t start_time_t = pgc_uts.first_time_t;
  1875. time_t end_time_t = __atomic_load_n(&pgc_uts.last_time_t, __ATOMIC_RELAXED);
  1876. if(end_time_t <= start_time_t)
  1877. end_time_t = start_time_t + 1;
  1878. size_t pages = (end_time_t - start_time_t) / pgc_uts.points_per_page + 1;
  1879. PGC_PAGE *array[pages];
  1880. for(size_t i = 0; i < pages ;i++)
  1881. array[i] = NULL;
  1882. // find the pages the cache has
  1883. for(size_t i = 0; i < pages ;i++) {
  1884. time_t page_start_time = start_time_t + (time_t)(i * pgc_uts.points_per_page);
  1885. array[i] = pgc_page_get_and_acquire(pgc_uts.cache, 1, metric_id,
  1886. page_start_time, (i < pages - 1)?PGC_SEARCH_EXACT:PGC_SEARCH_CLOSEST);
  1887. }
  1888. // load the rest of the pages
  1889. for(size_t i = 0; i < pages ;i++) {
  1890. if(array[i]) continue;
  1891. time_t page_start_time = start_time_t + (time_t)(i * pgc_uts.points_per_page);
  1892. array[i] = pgc_page_add_and_acquire(pgc_uts.cache, (PGC_ENTRY) {
  1893. .section = 1,
  1894. .metric_id = metric_id,
  1895. .start_time_t = page_start_time,
  1896. .end_time_t = page_start_time + (time_t)pgc_uts.points_per_page,
  1897. .update_every = 1,
  1898. .size = 4096,
  1899. .data = NULL,
  1900. .hot = false,
  1901. }, NULL);
  1902. }
  1903. // do the query
  1904. // ...
  1905. struct timespec work_duration = {.tv_sec = 0, .tv_nsec = pgc_uts.time_per_query_ut * NSEC_PER_USEC };
  1906. nanosleep(&work_duration, NULL);
  1907. // release the pages
  1908. for(size_t i = 0; i < pages ;i++) {
  1909. if(!array[i]) continue;
  1910. pgc_page_release(pgc_uts.cache, array[i]);
  1911. array[i] = NULL;
  1912. }
  1913. netdata_thread_enable_cancelability();
  1914. }
  1915. return ptr;
  1916. }
  1917. void *unittest_stress_test_service(void *ptr) {
  1918. heartbeat_t hb;
  1919. heartbeat_init(&hb);
  1920. while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1921. heartbeat_next(&hb, 1 * USEC_PER_SEC);
  1922. pgc_flush_pages(pgc_uts.cache, 1000);
  1923. pgc_evict_pages(pgc_uts.cache, 0, 0);
  1924. }
  1925. return ptr;
  1926. }
  1927. static void unittest_stress_test_save_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused) {
  1928. // netdata_log_info("SAVE %zu pages", entries);
  1929. if(!pgc_uts.stop) {
  1930. usec_t t = pgc_uts.time_per_flush_ut;
  1931. if(t > 0) {
  1932. struct timespec work_duration = {
  1933. .tv_sec = t / USEC_PER_SEC,
  1934. .tv_nsec = (long) ((t % USEC_PER_SEC) * NSEC_PER_USEC)
  1935. };
  1936. nanosleep(&work_duration, NULL);
  1937. }
  1938. }
  1939. }
  1940. void unittest_stress_test(void) {
  1941. pgc_uts.cache = pgc_create(pgc_uts.cache_size * 1024 * 1024,
  1942. unittest_free_clean_page_callback,
  1943. 64, unittest_stress_test_save_dirty_page_callback,
  1944. 1000, 10000, 1,
  1945. pgc_uts.options, pgc_uts.partitions, 0);
  1946. pgc_uts.metrics = callocz(pgc_uts.clean_metrics + pgc_uts.hot_metrics, sizeof(PGC_PAGE *));
  1947. pthread_t service_thread;
  1948. netdata_thread_create(&service_thread, "SERVICE",
  1949. NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
  1950. unittest_stress_test_service, NULL);
  1951. pthread_t collect_threads[pgc_uts.collect_threads];
  1952. size_t collect_thread_ids[pgc_uts.collect_threads];
  1953. for(size_t i = 0; i < pgc_uts.collect_threads ;i++) {
  1954. collect_thread_ids[i] = i;
  1955. char buffer[100 + 1];
  1956. snprintfz(buffer, sizeof(buffer) - 1, "COLLECT_%zu", i);
  1957. netdata_thread_create(&collect_threads[i], buffer,
  1958. NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
  1959. unittest_stress_test_collector, &collect_thread_ids[i]);
  1960. }
  1961. pthread_t queries_threads[pgc_uts.query_threads];
  1962. size_t query_thread_ids[pgc_uts.query_threads];
  1963. pgc_uts.random_data = callocz(pgc_uts.query_threads, sizeof(struct random_data));
  1964. for(size_t i = 0; i < pgc_uts.query_threads ;i++) {
  1965. query_thread_ids[i] = i;
  1966. char buffer[100 + 1];
  1967. snprintfz(buffer, sizeof(buffer) - 1, "QUERY_%zu", i);
  1968. initstate_r(1, pgc_uts.rand_statebufs, 1024, &pgc_uts.random_data[i]);
  1969. netdata_thread_create(&queries_threads[i], buffer,
  1970. NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
  1971. unittest_stress_test_queries, &query_thread_ids[i]);
  1972. }
  1973. heartbeat_t hb;
  1974. heartbeat_init(&hb);
  1975. struct {
  1976. size_t entries;
  1977. size_t added;
  1978. size_t deleted;
  1979. size_t referenced;
  1980. size_t hot_entries;
  1981. size_t hot_added;
  1982. size_t hot_deleted;
  1983. size_t dirty_entries;
  1984. size_t dirty_added;
  1985. size_t dirty_deleted;
  1986. size_t clean_entries;
  1987. size_t clean_added;
  1988. size_t clean_deleted;
  1989. size_t searches_exact;
  1990. size_t searches_exact_hits;
  1991. size_t searches_closest;
  1992. size_t searches_closest_hits;
  1993. size_t collections;
  1994. size_t events_cache_under_severe_pressure;
  1995. size_t events_cache_needs_space_90;
  1996. size_t events_flush_critical;
  1997. } stats = {}, old_stats = {};
  1998. for(int i = 0; i < 86400 ;i++) {
  1999. heartbeat_next(&hb, 1 * USEC_PER_SEC);
  2000. old_stats = stats;
  2001. stats.entries = __atomic_load_n(&pgc_uts.cache->stats.entries, __ATOMIC_RELAXED);
  2002. stats.added = __atomic_load_n(&pgc_uts.cache->stats.added_entries, __ATOMIC_RELAXED);
  2003. stats.deleted = __atomic_load_n(&pgc_uts.cache->stats.removed_entries, __ATOMIC_RELAXED);
  2004. stats.referenced = __atomic_load_n(&pgc_uts.cache->stats.referenced_entries, __ATOMIC_RELAXED);
  2005. stats.hot_entries = __atomic_load_n(&pgc_uts.cache->hot.stats->entries, __ATOMIC_RELAXED);
  2006. stats.hot_added = __atomic_load_n(&pgc_uts.cache->hot.stats->added_entries, __ATOMIC_RELAXED);
  2007. stats.hot_deleted = __atomic_load_n(&pgc_uts.cache->hot.stats->removed_entries, __ATOMIC_RELAXED);
  2008. stats.dirty_entries = __atomic_load_n(&pgc_uts.cache->dirty.stats->entries, __ATOMIC_RELAXED);
  2009. stats.dirty_added = __atomic_load_n(&pgc_uts.cache->dirty.stats->added_entries, __ATOMIC_RELAXED);
  2010. stats.dirty_deleted = __atomic_load_n(&pgc_uts.cache->dirty.stats->removed_entries, __ATOMIC_RELAXED);
  2011. stats.clean_entries = __atomic_load_n(&pgc_uts.cache->clean.stats->entries, __ATOMIC_RELAXED);
  2012. stats.clean_added = __atomic_load_n(&pgc_uts.cache->clean.stats->added_entries, __ATOMIC_RELAXED);
  2013. stats.clean_deleted = __atomic_load_n(&pgc_uts.cache->clean.stats->removed_entries, __ATOMIC_RELAXED);
  2014. stats.searches_exact = __atomic_load_n(&pgc_uts.cache->stats.searches_exact, __ATOMIC_RELAXED);
  2015. stats.searches_exact_hits = __atomic_load_n(&pgc_uts.cache->stats.searches_exact_hits, __ATOMIC_RELAXED);
  2016. stats.searches_closest = __atomic_load_n(&pgc_uts.cache->stats.searches_closest, __ATOMIC_RELAXED);
  2017. stats.searches_closest_hits = __atomic_load_n(&pgc_uts.cache->stats.searches_closest_hits, __ATOMIC_RELAXED);
  2018. stats.events_cache_under_severe_pressure = __atomic_load_n(&pgc_uts.cache->stats.events_cache_under_severe_pressure, __ATOMIC_RELAXED);
  2019. stats.events_cache_needs_space_90 = __atomic_load_n(&pgc_uts.cache->stats.events_cache_needs_space_aggressively, __ATOMIC_RELAXED);
  2020. stats.events_flush_critical = __atomic_load_n(&pgc_uts.cache->stats.events_flush_critical, __ATOMIC_RELAXED);
  2021. size_t searches_exact = stats.searches_exact - old_stats.searches_exact;
  2022. size_t searches_closest = stats.searches_closest - old_stats.searches_closest;
  2023. size_t hit_exact = stats.searches_exact_hits - old_stats.searches_exact_hits;
  2024. size_t hit_closest = stats.searches_closest_hits - old_stats.searches_closest_hits;
  2025. double hit_exact_pc = (searches_exact > 0) ? (double)hit_exact * 100.0 / (double)searches_exact : 0.0;
  2026. double hit_closest_pc = (searches_closest > 0) ? (double)hit_closest * 100.0 / (double)searches_closest : 0.0;
  2027. #ifdef PGC_COUNT_POINTS_COLLECTED
  2028. stats.collections = __atomic_load_n(&pgc_uts.cache->stats.points_collected, __ATOMIC_RELAXED);
  2029. #endif
  2030. char *cache_status = "N";
  2031. if(stats.events_cache_under_severe_pressure > old_stats.events_cache_under_severe_pressure)
  2032. cache_status = "F";
  2033. else if(stats.events_cache_needs_space_90 > old_stats.events_cache_needs_space_90)
  2034. cache_status = "f";
  2035. char *flushing_status = "N";
  2036. if(stats.events_flush_critical > old_stats.events_flush_critical)
  2037. flushing_status = "F";
  2038. netdata_log_info("PGS %5zuk +%4zuk/-%4zuk "
  2039. "| RF %5zuk "
  2040. "| HOT %5zuk +%4zuk -%4zuk "
  2041. "| DRT %s %5zuk +%4zuk -%4zuk "
  2042. "| CLN %s %5zuk +%4zuk -%4zuk "
  2043. "| SRCH %4zuk %4zuk, HIT %4.1f%% %4.1f%% "
  2044. #ifdef PGC_COUNT_POINTS_COLLECTED
  2045. "| CLCT %8.4f Mps"
  2046. #endif
  2047. , stats.entries / 1000
  2048. , (stats.added - old_stats.added) / 1000, (stats.deleted - old_stats.deleted) / 1000
  2049. , stats.referenced / 1000
  2050. , stats.hot_entries / 1000, (stats.hot_added - old_stats.hot_added) / 1000, (stats.hot_deleted - old_stats.hot_deleted) / 1000
  2051. , flushing_status
  2052. , stats.dirty_entries / 1000
  2053. , (stats.dirty_added - old_stats.dirty_added) / 1000, (stats.dirty_deleted - old_stats.dirty_deleted) / 1000
  2054. , cache_status
  2055. , stats.clean_entries / 1000
  2056. , (stats.clean_added - old_stats.clean_added) / 1000, (stats.clean_deleted - old_stats.clean_deleted) / 1000
  2057. , searches_exact / 1000, searches_closest / 1000
  2058. , hit_exact_pc, hit_closest_pc
  2059. #ifdef PGC_COUNT_POINTS_COLLECTED
  2060. , (double)(stats.collections - old_stats.collections) / 1000.0 / 1000.0
  2061. #endif
  2062. );
  2063. }
  2064. netdata_log_info("Waiting for threads to stop...");
  2065. __atomic_store_n(&pgc_uts.stop, true, __ATOMIC_RELAXED);
  2066. netdata_thread_join(service_thread, NULL);
  2067. for(size_t i = 0; i < pgc_uts.collect_threads ;i++)
  2068. netdata_thread_join(collect_threads[i],NULL);
  2069. for(size_t i = 0; i < pgc_uts.query_threads ;i++)
  2070. netdata_thread_join(queries_threads[i],NULL);
  2071. pgc_destroy(pgc_uts.cache);
  2072. freez(pgc_uts.metrics);
  2073. freez(pgc_uts.random_data);
  2074. }
  2075. #endif
  2076. int pgc_unittest(void) {
  2077. PGC *cache = pgc_create("test",
  2078. 32 * 1024 * 1024, unittest_free_clean_page_callback,
  2079. 64, NULL, unittest_save_dirty_page_callback,
  2080. 10, 10, 1000, 10,
  2081. PGC_OPTIONS_DEFAULT, 1, 11);
  2082. // FIXME - unit tests
  2083. // - add clean page
  2084. // - add clean page again (should not add it)
  2085. // - release page (should decrement counters)
  2086. // - add hot page
  2087. // - add hot page again (should not add it)
  2088. // - turn hot page to dirty, with and without a reference counter to it
  2089. // - dirty pages are saved once there are enough of them
  2090. // - find page exact
  2091. // - find page (should return last)
  2092. // - find page (should return next)
  2093. // - page cache full (should evict)
  2094. // - on destroy, turn hot pages to dirty and save them
  2095. PGC_PAGE *page1 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
  2096. .section = 1,
  2097. .metric_id = 10,
  2098. .start_time_s = 100,
  2099. .end_time_s = 1000,
  2100. .size = 4096,
  2101. .data = NULL,
  2102. .hot = false,
  2103. .custom_data = (uint8_t *)"0123456789",
  2104. }, NULL);
  2105. if(strcmp(pgc_page_custom_data(cache, page1), "0123456789") != 0)
  2106. fatal("custom data do not work");
  2107. memcpy(pgc_page_custom_data(cache, page1), "ABCDEFGHIJ", 11);
  2108. if(strcmp(pgc_page_custom_data(cache, page1), "ABCDEFGHIJ") != 0)
  2109. fatal("custom data do not work");
  2110. pgc_page_release(cache, page1);
  2111. PGC_PAGE *page2 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
  2112. .section = 2,
  2113. .metric_id = 10,
  2114. .start_time_s = 1001,
  2115. .end_time_s = 2000,
  2116. .size = 4096,
  2117. .data = NULL,
  2118. .hot = true,
  2119. }, NULL);
  2120. pgc_page_hot_set_end_time_s(cache, page2, 2001);
  2121. pgc_page_hot_to_dirty_and_release(cache, page2);
  2122. PGC_PAGE *page3 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
  2123. .section = 3,
  2124. .metric_id = 10,
  2125. .start_time_s = 1001,
  2126. .end_time_s = 2000,
  2127. .size = 4096,
  2128. .data = NULL,
  2129. .hot = true,
  2130. }, NULL);
  2131. pgc_page_hot_set_end_time_s(cache, page3, 2001);
  2132. pgc_page_hot_to_dirty_and_release(cache, page3);
  2133. pgc_destroy(cache);
  2134. #ifdef PGC_STRESS_TEST
  2135. unittest_stress_test();
  2136. #endif
  2137. return 0;
  2138. }