cache.c 99 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737
  1. #include "cache.h"
  2. /* STATES AND TRANSITIONS
  3. *
  4. * entry | entry
  5. * v v
  6. * HOT -> DIRTY --> CLEAN --> EVICT
  7. * v | v
  8. * flush | evict
  9. * v | v
  10. * save | free
  11. * callback | callback
  12. *
  13. */
  14. typedef int32_t REFCOUNT;
  15. #define REFCOUNT_DELETING (-100)
  16. // to use ARAL uncomment the following line:
  17. #define PGC_WITH_ARAL 1
  18. typedef enum __attribute__ ((__packed__)) {
  19. // mutually exclusive flags
  20. PGC_PAGE_CLEAN = (1 << 0), // none of the following
  21. PGC_PAGE_DIRTY = (1 << 1), // contains unsaved data
  22. PGC_PAGE_HOT = (1 << 2), // currently being collected
  23. // flags related to various actions on each page
  24. PGC_PAGE_IS_BEING_DELETED = (1 << 3),
  25. PGC_PAGE_IS_BEING_MIGRATED_TO_V2 = (1 << 4),
  26. PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES = (1 << 5),
  27. PGC_PAGE_HAS_BEEN_ACCESSED = (1 << 6),
  28. } PGC_PAGE_FLAGS;
  29. #define page_flag_check(page, flag) (__atomic_load_n(&((page)->flags), __ATOMIC_ACQUIRE) & (flag))
  30. #define page_flag_set(page, flag) __atomic_or_fetch(&((page)->flags), flag, __ATOMIC_RELEASE)
  31. #define page_flag_clear(page, flag) __atomic_and_fetch(&((page)->flags), ~(flag), __ATOMIC_RELEASE)
  32. #define page_get_status_flags(page) page_flag_check(page, PGC_PAGE_HOT | PGC_PAGE_DIRTY | PGC_PAGE_CLEAN)
  33. #define is_page_hot(page) (page_get_status_flags(page) == PGC_PAGE_HOT)
  34. #define is_page_dirty(page) (page_get_status_flags(page) == PGC_PAGE_DIRTY)
  35. #define is_page_clean(page) (page_get_status_flags(page) == PGC_PAGE_CLEAN)
  36. struct pgc_page {
  37. // indexing data
  38. Word_t section;
  39. Word_t metric_id;
  40. time_t start_time_s;
  41. time_t end_time_s;
  42. uint32_t update_every_s;
  43. uint32_t assumed_size;
  44. REFCOUNT refcount;
  45. uint16_t accesses; // counts the number of accesses on this page
  46. PGC_PAGE_FLAGS flags;
  47. SPINLOCK transition_spinlock; // when the page changes between HOT, DIRTY, CLEAN, we have to get this lock
  48. struct {
  49. struct pgc_page *next;
  50. struct pgc_page *prev;
  51. } link;
  52. void *data;
  53. uint8_t custom_data[];
  54. // IMPORTANT!
  55. // THIS STRUCTURE NEEDS TO BE INITIALIZED BY HAND!
  56. };
  57. struct pgc_linked_list {
  58. SPINLOCK spinlock;
  59. union {
  60. PGC_PAGE *base;
  61. Pvoid_t sections_judy;
  62. };
  63. PGC_PAGE_FLAGS flags;
  64. size_t version;
  65. size_t last_version_checked;
  66. bool linked_list_in_sections_judy; // when true, we use 'sections_judy', otherwise we use 'base'
  67. struct pgc_queue_statistics *stats;
  68. };
  69. struct pgc {
  70. struct {
  71. char name[PGC_NAME_MAX + 1];
  72. size_t partitions;
  73. size_t clean_size;
  74. size_t max_dirty_pages_per_call;
  75. size_t max_pages_per_inline_eviction;
  76. size_t max_skip_pages_per_inline_eviction;
  77. size_t max_flushes_inline;
  78. size_t max_workers_evict_inline;
  79. size_t additional_bytes_per_page;
  80. free_clean_page_callback pgc_free_clean_cb;
  81. save_dirty_page_callback pgc_save_dirty_cb;
  82. save_dirty_init_callback pgc_save_init_cb;
  83. PGC_OPTIONS options;
  84. size_t severe_pressure_per1000;
  85. size_t aggressive_evict_per1000;
  86. size_t healthy_size_per1000;
  87. size_t evict_low_threshold_per1000;
  88. dynamic_target_cache_size_callback dynamic_target_size_cb;
  89. } config;
  90. #ifdef PGC_WITH_ARAL
  91. ARAL **aral;
  92. #endif
  93. PGC_CACHE_LINE_PADDING(0);
  94. struct pgc_index {
  95. netdata_rwlock_t rwlock;
  96. Pvoid_t sections_judy;
  97. } *index;
  98. PGC_CACHE_LINE_PADDING(1);
  99. struct {
  100. SPINLOCK spinlock;
  101. size_t per1000;
  102. } usage;
  103. PGC_CACHE_LINE_PADDING(2);
  104. struct pgc_linked_list clean; // LRU is applied here to free memory from the cache
  105. PGC_CACHE_LINE_PADDING(3);
  106. struct pgc_linked_list dirty; // in the dirty list, pages are ordered the way they were marked dirty
  107. PGC_CACHE_LINE_PADDING(4);
  108. struct pgc_linked_list hot; // in the hot list, pages are order the way they were marked hot
  109. PGC_CACHE_LINE_PADDING(5);
  110. struct pgc_statistics stats; // statistics
  111. #ifdef NETDATA_PGC_POINTER_CHECK
  112. PGC_CACHE_LINE_PADDING(6);
  113. netdata_mutex_t global_pointer_registry_mutex;
  114. Pvoid_t global_pointer_registry;
  115. #endif
  116. };
  117. // ----------------------------------------------------------------------------
  118. // validate each pointer is indexed once - internal checks only
  119. static inline void pointer_index_init(PGC *cache __maybe_unused) {
  120. #ifdef NETDATA_PGC_POINTER_CHECK
  121. netdata_mutex_init(&cache->global_pointer_registry_mutex);
  122. #else
  123. ;
  124. #endif
  125. }
  126. static inline void pointer_destroy_index(PGC *cache __maybe_unused) {
  127. #ifdef NETDATA_PGC_POINTER_CHECK
  128. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  129. JudyHSFreeArray(&cache->global_pointer_registry, PJE0);
  130. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  131. #else
  132. ;
  133. #endif
  134. }
  135. static inline void pointer_add(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
  136. #ifdef NETDATA_PGC_POINTER_CHECK
  137. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  138. Pvoid_t *PValue = JudyHSIns(&cache->global_pointer_registry, &page, sizeof(void *), PJE0);
  139. if(*PValue != NULL)
  140. fatal("pointer already exists in registry");
  141. *PValue = page;
  142. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  143. #else
  144. ;
  145. #endif
  146. }
  147. static inline void pointer_check(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
  148. #ifdef NETDATA_PGC_POINTER_CHECK
  149. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  150. Pvoid_t *PValue = JudyHSGet(cache->global_pointer_registry, &page, sizeof(void *));
  151. if(PValue == NULL)
  152. fatal("pointer is not found in registry");
  153. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  154. #else
  155. ;
  156. #endif
  157. }
  158. static inline void pointer_del(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
  159. #ifdef NETDATA_PGC_POINTER_CHECK
  160. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  161. int ret = JudyHSDel(&cache->global_pointer_registry, &page, sizeof(void *), PJE0);
  162. if(!ret)
  163. fatal("pointer to be deleted does not exist in registry");
  164. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  165. #else
  166. ;
  167. #endif
  168. }
  169. // ----------------------------------------------------------------------------
  170. // locking
  171. static inline size_t pgc_indexing_partition(PGC *cache, Word_t metric_id) {
  172. static __thread Word_t last_metric_id = 0;
  173. static __thread size_t last_partition = 0;
  174. if(metric_id == last_metric_id || cache->config.partitions == 1)
  175. return last_partition;
  176. last_metric_id = metric_id;
  177. last_partition = indexing_partition(metric_id, cache->config.partitions);
  178. return last_partition;
  179. }
  180. static inline void pgc_index_read_lock(PGC *cache, size_t partition) {
  181. netdata_rwlock_rdlock(&cache->index[partition].rwlock);
  182. }
  183. static inline void pgc_index_read_unlock(PGC *cache, size_t partition) {
  184. netdata_rwlock_unlock(&cache->index[partition].rwlock);
  185. }
  186. //static inline bool pgc_index_write_trylock(PGC *cache, size_t partition) {
  187. // return !netdata_rwlock_trywrlock(&cache->index[partition].rwlock);
  188. //}
  189. static inline void pgc_index_write_lock(PGC *cache, size_t partition) {
  190. netdata_rwlock_wrlock(&cache->index[partition].rwlock);
  191. }
  192. static inline void pgc_index_write_unlock(PGC *cache, size_t partition) {
  193. netdata_rwlock_unlock(&cache->index[partition].rwlock);
  194. }
  195. static inline bool pgc_ll_trylock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
  196. return netdata_spinlock_trylock(&ll->spinlock);
  197. }
  198. static inline void pgc_ll_lock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
  199. netdata_spinlock_lock(&ll->spinlock);
  200. }
  201. static inline void pgc_ll_unlock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
  202. netdata_spinlock_unlock(&ll->spinlock);
  203. }
  204. static inline bool page_transition_trylock(PGC *cache __maybe_unused, PGC_PAGE *page) {
  205. return netdata_spinlock_trylock(&page->transition_spinlock);
  206. }
  207. static inline void page_transition_lock(PGC *cache __maybe_unused, PGC_PAGE *page) {
  208. netdata_spinlock_lock(&page->transition_spinlock);
  209. }
  210. static inline void page_transition_unlock(PGC *cache __maybe_unused, PGC_PAGE *page) {
  211. netdata_spinlock_unlock(&page->transition_spinlock);
  212. }
  213. // ----------------------------------------------------------------------------
  214. // evictions control
  215. static inline size_t cache_usage_per1000(PGC *cache, size_t *size_to_evict) {
  216. if(size_to_evict)
  217. netdata_spinlock_lock(&cache->usage.spinlock);
  218. else if(!netdata_spinlock_trylock(&cache->usage.spinlock))
  219. return __atomic_load_n(&cache->usage.per1000, __ATOMIC_RELAXED);
  220. size_t current_cache_size;
  221. size_t wanted_cache_size;
  222. size_t per1000;
  223. size_t dirty = __atomic_load_n(&cache->dirty.stats->size, __ATOMIC_RELAXED);
  224. size_t hot = __atomic_load_n(&cache->hot.stats->size, __ATOMIC_RELAXED);
  225. if(cache->config.options & PGC_OPTIONS_AUTOSCALE) {
  226. size_t dirty_max = __atomic_load_n(&cache->dirty.stats->max_size, __ATOMIC_RELAXED);
  227. size_t hot_max = __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED);
  228. // our promise to users
  229. size_t max_size1 = MAX(hot_max, hot) * 2;
  230. // protection against slow flushing
  231. size_t max_size2 = hot_max + ((dirty_max < hot_max / 2) ? hot_max / 2 : dirty_max * 2);
  232. // the final wanted cache size
  233. wanted_cache_size = MIN(max_size1, max_size2);
  234. if(cache->config.dynamic_target_size_cb) {
  235. size_t wanted_cache_size_cb = cache->config.dynamic_target_size_cb();
  236. if(wanted_cache_size_cb > wanted_cache_size)
  237. wanted_cache_size = wanted_cache_size_cb;
  238. }
  239. if (wanted_cache_size < hot + dirty + cache->config.clean_size)
  240. wanted_cache_size = hot + dirty + cache->config.clean_size;
  241. }
  242. else
  243. wanted_cache_size = hot + dirty + cache->config.clean_size;
  244. // protection again huge queries
  245. // if huge queries are running, or huge amounts need to be saved
  246. // allow the cache to grow more (hot pages in main cache are also referenced)
  247. size_t referenced_size = __atomic_load_n(&cache->stats.referenced_size, __ATOMIC_RELAXED);
  248. if(unlikely(wanted_cache_size < referenced_size * 2 / 3))
  249. wanted_cache_size = referenced_size * 2 / 3;
  250. current_cache_size = __atomic_load_n(&cache->stats.size, __ATOMIC_RELAXED); // + pgc_aral_overhead();
  251. per1000 = (size_t)((unsigned long long)current_cache_size * 1000ULL / (unsigned long long)wanted_cache_size);
  252. __atomic_store_n(&cache->usage.per1000, per1000, __ATOMIC_RELAXED);
  253. __atomic_store_n(&cache->stats.wanted_cache_size, wanted_cache_size, __ATOMIC_RELAXED);
  254. __atomic_store_n(&cache->stats.current_cache_size, current_cache_size, __ATOMIC_RELAXED);
  255. netdata_spinlock_unlock(&cache->usage.spinlock);
  256. if(size_to_evict) {
  257. size_t target = (size_t)((unsigned long long)wanted_cache_size * (unsigned long long)cache->config.evict_low_threshold_per1000 / 1000ULL);
  258. if(current_cache_size > target)
  259. *size_to_evict = current_cache_size - target;
  260. else
  261. *size_to_evict = 0;
  262. }
  263. if(per1000 >= cache->config.severe_pressure_per1000)
  264. __atomic_add_fetch(&cache->stats.events_cache_under_severe_pressure, 1, __ATOMIC_RELAXED);
  265. else if(per1000 >= cache->config.aggressive_evict_per1000)
  266. __atomic_add_fetch(&cache->stats.events_cache_needs_space_aggressively, 1, __ATOMIC_RELAXED);
  267. return per1000;
  268. }
  269. static inline bool cache_pressure(PGC *cache, size_t limit) {
  270. return (cache_usage_per1000(cache, NULL) >= limit);
  271. }
  272. #define cache_under_severe_pressure(cache) cache_pressure(cache, (cache)->config.severe_pressure_per1000)
  273. #define cache_needs_space_aggressively(cache) cache_pressure(cache, (cache)->config.aggressive_evict_per1000)
  274. #define cache_above_healthy_limit(cache) cache_pressure(cache, (cache)->config.healthy_size_per1000)
  275. typedef bool (*evict_filter)(PGC_PAGE *page, void *data);
  276. static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evict, bool wait, bool all_of_them, evict_filter filter, void *data);
  277. #define evict_pages(cache, max_skip, max_evict, wait, all_of_them) evict_pages_with_filter(cache, max_skip, max_evict, wait, all_of_them, NULL, NULL)
  278. static inline void evict_on_clean_page_added(PGC *cache __maybe_unused) {
  279. if((cache->config.options & PGC_OPTIONS_EVICT_PAGES_INLINE) || cache_needs_space_aggressively(cache)) {
  280. evict_pages(cache,
  281. cache->config.max_skip_pages_per_inline_eviction,
  282. cache->config.max_pages_per_inline_eviction,
  283. false, false);
  284. }
  285. }
  286. static inline void evict_on_page_release_when_permitted(PGC *cache __maybe_unused) {
  287. if ((cache->config.options & PGC_OPTIONS_EVICT_PAGES_INLINE) || cache_under_severe_pressure(cache)) {
  288. evict_pages(cache,
  289. cache->config.max_skip_pages_per_inline_eviction,
  290. cache->config.max_pages_per_inline_eviction,
  291. false, false);
  292. }
  293. }
  294. // ----------------------------------------------------------------------------
  295. // flushing control
  296. static bool flush_pages(PGC *cache, size_t max_flushes, Word_t section, bool wait, bool all_of_them);
  297. static inline bool flushing_critical(PGC *cache) {
  298. if(unlikely(__atomic_load_n(&cache->dirty.stats->size, __ATOMIC_RELAXED) > __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED))) {
  299. __atomic_add_fetch(&cache->stats.events_flush_critical, 1, __ATOMIC_RELAXED);
  300. return true;
  301. }
  302. return false;
  303. }
  304. // ----------------------------------------------------------------------------
  305. // helpers
  306. static inline size_t page_assumed_size(PGC *cache, size_t size) {
  307. return size + (sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page + sizeof(Word_t) * 3);
  308. }
  309. static inline size_t page_size_from_assumed_size(PGC *cache, size_t assumed_size) {
  310. return assumed_size - (sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page + sizeof(Word_t) * 3);
  311. }
  312. // ----------------------------------------------------------------------------
  313. // Linked list management
  314. static inline void atomic_set_max(size_t *max, size_t desired) {
  315. size_t expected;
  316. expected = __atomic_load_n(max, __ATOMIC_RELAXED);
  317. do {
  318. if(expected >= desired)
  319. return;
  320. } while(!__atomic_compare_exchange_n(max, &expected, desired,
  321. false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
  322. }
  323. struct section_pages {
  324. SPINLOCK migration_to_v2_spinlock;
  325. size_t entries;
  326. size_t size;
  327. PGC_PAGE *base;
  328. };
  329. static ARAL *pgc_section_pages_aral = NULL;
  330. static void pgc_section_pages_static_aral_init(void) {
  331. static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER;
  332. if(unlikely(!pgc_section_pages_aral)) {
  333. netdata_spinlock_lock(&spinlock);
  334. // we have to check again
  335. if(!pgc_section_pages_aral)
  336. pgc_section_pages_aral = aral_create(
  337. "pgc_section",
  338. sizeof(struct section_pages),
  339. 0,
  340. 65536, NULL,
  341. NULL, NULL, false, false);
  342. netdata_spinlock_unlock(&spinlock);
  343. }
  344. }
  345. static inline void pgc_stats_ll_judy_change(PGC *cache, struct pgc_linked_list *ll, size_t mem_before_judyl, size_t mem_after_judyl) {
  346. if(mem_after_judyl > mem_before_judyl) {
  347. __atomic_add_fetch(&ll->stats->size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
  348. __atomic_add_fetch(&cache->stats.size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
  349. }
  350. else if(mem_after_judyl < mem_before_judyl) {
  351. __atomic_sub_fetch(&ll->stats->size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
  352. __atomic_sub_fetch(&cache->stats.size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
  353. }
  354. }
  355. static inline void pgc_stats_index_judy_change(PGC *cache, size_t mem_before_judyl, size_t mem_after_judyl) {
  356. if(mem_after_judyl > mem_before_judyl) {
  357. __atomic_add_fetch(&cache->stats.size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
  358. }
  359. else if(mem_after_judyl < mem_before_judyl) {
  360. __atomic_sub_fetch(&cache->stats.size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
  361. }
  362. }
  363. static void pgc_ll_add(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PGC_PAGE *page, bool having_lock) {
  364. if(!having_lock)
  365. pgc_ll_lock(cache, ll);
  366. internal_fatal(page_get_status_flags(page) != 0,
  367. "DBENGINE CACHE: invalid page flags, the page has %d, but it is should be %d",
  368. page_get_status_flags(page),
  369. 0);
  370. if(ll->linked_list_in_sections_judy) {
  371. size_t mem_before_judyl, mem_after_judyl;
  372. mem_before_judyl = JudyLMemUsed(ll->sections_judy);
  373. Pvoid_t *section_pages_pptr = JudyLIns(&ll->sections_judy, page->section, PJE0);
  374. mem_after_judyl = JudyLMemUsed(ll->sections_judy);
  375. struct section_pages *sp = *section_pages_pptr;
  376. if(!sp) {
  377. // sp = callocz(1, sizeof(struct section_pages));
  378. sp = aral_mallocz(pgc_section_pages_aral);
  379. memset(sp, 0, sizeof(struct section_pages));
  380. *section_pages_pptr = sp;
  381. mem_after_judyl += sizeof(struct section_pages);
  382. }
  383. pgc_stats_ll_judy_change(cache, ll, mem_before_judyl, mem_after_judyl);
  384. sp->entries++;
  385. sp->size += page->assumed_size;
  386. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(sp->base, page, link.prev, link.next);
  387. if((sp->entries % cache->config.max_dirty_pages_per_call) == 0)
  388. ll->version++;
  389. }
  390. else {
  391. // CLEAN pages end up here.
  392. // - New pages created as CLEAN, always have 1 access.
  393. // - DIRTY pages made CLEAN, depending on their accesses may be appended (accesses > 0) or prepended (accesses = 0).
  394. if(page->accesses || page_flag_check(page, PGC_PAGE_HAS_BEEN_ACCESSED | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES) == PGC_PAGE_HAS_BEEN_ACCESSED) {
  395. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
  396. page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  397. }
  398. else
  399. DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
  400. ll->version++;
  401. }
  402. page_flag_set(page, ll->flags);
  403. if(!having_lock)
  404. pgc_ll_unlock(cache, ll);
  405. size_t entries = __atomic_add_fetch(&ll->stats->entries, 1, __ATOMIC_RELAXED);
  406. size_t size = __atomic_add_fetch(&ll->stats->size, page->assumed_size, __ATOMIC_RELAXED);
  407. __atomic_add_fetch(&ll->stats->added_entries, 1, __ATOMIC_RELAXED);
  408. __atomic_add_fetch(&ll->stats->added_size, page->assumed_size, __ATOMIC_RELAXED);
  409. atomic_set_max(&ll->stats->max_entries, entries);
  410. atomic_set_max(&ll->stats->max_size, size);
  411. }
  412. static void pgc_ll_del(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PGC_PAGE *page, bool having_lock) {
  413. __atomic_sub_fetch(&ll->stats->entries, 1, __ATOMIC_RELAXED);
  414. __atomic_sub_fetch(&ll->stats->size, page->assumed_size, __ATOMIC_RELAXED);
  415. __atomic_add_fetch(&ll->stats->removed_entries, 1, __ATOMIC_RELAXED);
  416. __atomic_add_fetch(&ll->stats->removed_size, page->assumed_size, __ATOMIC_RELAXED);
  417. if(!having_lock)
  418. pgc_ll_lock(cache, ll);
  419. internal_fatal(page_get_status_flags(page) != ll->flags,
  420. "DBENGINE CACHE: invalid page flags, the page has %d, but it is should be %d",
  421. page_get_status_flags(page),
  422. ll->flags);
  423. page_flag_clear(page, ll->flags);
  424. if(ll->linked_list_in_sections_judy) {
  425. Pvoid_t *section_pages_pptr = JudyLGet(ll->sections_judy, page->section, PJE0);
  426. internal_fatal(!section_pages_pptr, "DBENGINE CACHE: page should be in Judy LL, but it is not");
  427. struct section_pages *sp = *section_pages_pptr;
  428. sp->entries--;
  429. sp->size -= page->assumed_size;
  430. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(sp->base, page, link.prev, link.next);
  431. if(!sp->base) {
  432. size_t mem_before_judyl, mem_after_judyl;
  433. mem_before_judyl = JudyLMemUsed(ll->sections_judy);
  434. int rc = JudyLDel(&ll->sections_judy, page->section, PJE0);
  435. mem_after_judyl = JudyLMemUsed(ll->sections_judy);
  436. if(!rc)
  437. fatal("DBENGINE CACHE: cannot delete section from Judy LL");
  438. // freez(sp);
  439. aral_freez(pgc_section_pages_aral, sp);
  440. mem_after_judyl -= sizeof(struct section_pages);
  441. pgc_stats_ll_judy_change(cache, ll, mem_before_judyl, mem_after_judyl);
  442. }
  443. }
  444. else {
  445. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
  446. ll->version++;
  447. }
  448. if(!having_lock)
  449. pgc_ll_unlock(cache, ll);
  450. }
  451. static inline void page_has_been_accessed(PGC *cache, PGC_PAGE *page) {
  452. PGC_PAGE_FLAGS flags = page_flag_check(page, PGC_PAGE_CLEAN | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES);
  453. if (!(flags & PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES)) {
  454. __atomic_add_fetch(&page->accesses, 1, __ATOMIC_RELAXED);
  455. if (flags & PGC_PAGE_CLEAN) {
  456. if(pgc_ll_trylock(cache, &cache->clean)) {
  457. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  458. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  459. pgc_ll_unlock(cache, &cache->clean);
  460. page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  461. }
  462. else
  463. page_flag_set(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  464. }
  465. }
  466. }
  467. // ----------------------------------------------------------------------------
  468. // state transitions
  469. static inline void page_set_clean(PGC *cache, PGC_PAGE *page, bool having_transition_lock, bool having_clean_lock) {
  470. if(!having_transition_lock)
  471. page_transition_lock(cache, page);
  472. PGC_PAGE_FLAGS flags = page_get_status_flags(page);
  473. if(flags & PGC_PAGE_CLEAN) {
  474. if(!having_transition_lock)
  475. page_transition_unlock(cache, page);
  476. return;
  477. }
  478. if(flags & PGC_PAGE_HOT)
  479. pgc_ll_del(cache, &cache->hot, page, false);
  480. if(flags & PGC_PAGE_DIRTY)
  481. pgc_ll_del(cache, &cache->dirty, page, false);
  482. // first add to linked list, the set the flag (required for move_page_last())
  483. pgc_ll_add(cache, &cache->clean, page, having_clean_lock);
  484. if(!having_transition_lock)
  485. page_transition_unlock(cache, page);
  486. }
  487. static inline void page_set_dirty(PGC *cache, PGC_PAGE *page, bool having_hot_lock) {
  488. if(!having_hot_lock)
  489. // to avoid deadlocks, we have to get the hot lock before the page transition
  490. // since this is what all_hot_to_dirty() does
  491. pgc_ll_lock(cache, &cache->hot);
  492. page_transition_lock(cache, page);
  493. PGC_PAGE_FLAGS flags = page_get_status_flags(page);
  494. if(flags & PGC_PAGE_DIRTY) {
  495. page_transition_unlock(cache, page);
  496. if(!having_hot_lock)
  497. // we don't need the hot lock anymore
  498. pgc_ll_unlock(cache, &cache->hot);
  499. return;
  500. }
  501. __atomic_add_fetch(&cache->stats.hot2dirty_entries, 1, __ATOMIC_RELAXED);
  502. __atomic_add_fetch(&cache->stats.hot2dirty_size, page->assumed_size, __ATOMIC_RELAXED);
  503. if(likely(flags & PGC_PAGE_HOT))
  504. pgc_ll_del(cache, &cache->hot, page, true);
  505. if(!having_hot_lock)
  506. // we don't need the hot lock anymore
  507. pgc_ll_unlock(cache, &cache->hot);
  508. if(unlikely(flags & PGC_PAGE_CLEAN))
  509. pgc_ll_del(cache, &cache->clean, page, false);
  510. // first add to linked list, the set the flag (required for move_page_last())
  511. pgc_ll_add(cache, &cache->dirty, page, false);
  512. __atomic_sub_fetch(&cache->stats.hot2dirty_entries, 1, __ATOMIC_RELAXED);
  513. __atomic_sub_fetch(&cache->stats.hot2dirty_size, page->assumed_size, __ATOMIC_RELAXED);
  514. page_transition_unlock(cache, page);
  515. }
  516. static inline void page_set_hot(PGC *cache, PGC_PAGE *page) {
  517. page_transition_lock(cache, page);
  518. PGC_PAGE_FLAGS flags = page_get_status_flags(page);
  519. if(flags & PGC_PAGE_HOT) {
  520. page_transition_unlock(cache, page);
  521. return;
  522. }
  523. if(flags & PGC_PAGE_DIRTY)
  524. pgc_ll_del(cache, &cache->dirty, page, false);
  525. if(flags & PGC_PAGE_CLEAN)
  526. pgc_ll_del(cache, &cache->clean, page, false);
  527. // first add to linked list, the set the flag (required for move_page_last())
  528. pgc_ll_add(cache, &cache->hot, page, false);
  529. page_transition_unlock(cache, page);
  530. }
  531. // ----------------------------------------------------------------------------
  532. // Referencing
  533. static inline size_t PGC_REFERENCED_PAGES(PGC *cache) {
  534. return __atomic_load_n(&cache->stats.referenced_entries, __ATOMIC_RELAXED);
  535. }
  536. static inline void PGC_REFERENCED_PAGES_PLUS1(PGC *cache, PGC_PAGE *page) {
  537. __atomic_add_fetch(&cache->stats.referenced_entries, 1, __ATOMIC_RELAXED);
  538. __atomic_add_fetch(&cache->stats.referenced_size, page->assumed_size, __ATOMIC_RELAXED);
  539. }
  540. static inline void PGC_REFERENCED_PAGES_MINUS1(PGC *cache, size_t assumed_size) {
  541. __atomic_sub_fetch(&cache->stats.referenced_entries, 1, __ATOMIC_RELAXED);
  542. __atomic_sub_fetch(&cache->stats.referenced_size, assumed_size, __ATOMIC_RELAXED);
  543. }
  544. // If the page is not already acquired,
  545. // YOU HAVE TO HAVE THE QUEUE (hot, dirty, clean) THE PAGE IS IN, L O C K E D !
  546. // If you don't have it locked, NOTHING PREVENTS THIS PAGE FOR VANISHING WHILE THIS IS CALLED!
  547. static inline bool page_acquire(PGC *cache, PGC_PAGE *page) {
  548. __atomic_add_fetch(&cache->stats.acquires, 1, __ATOMIC_RELAXED);
  549. REFCOUNT expected, desired;
  550. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  551. size_t spins = 0;
  552. do {
  553. spins++;
  554. if(unlikely(expected < 0))
  555. return false;
  556. desired = expected + 1;
  557. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED));
  558. if(unlikely(spins > 1))
  559. __atomic_add_fetch(&cache->stats.acquire_spins, spins - 1, __ATOMIC_RELAXED);
  560. if(desired == 1)
  561. PGC_REFERENCED_PAGES_PLUS1(cache, page);
  562. return true;
  563. }
  564. static inline void page_release(PGC *cache, PGC_PAGE *page, bool evict_if_necessary) {
  565. __atomic_add_fetch(&cache->stats.releases, 1, __ATOMIC_RELAXED);
  566. size_t assumed_size = page->assumed_size; // take the size before we release it
  567. REFCOUNT expected, desired;
  568. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  569. size_t spins = 0;
  570. do {
  571. spins++;
  572. internal_fatal(expected <= 0,
  573. "DBENGINE CACHE: trying to release a page with reference counter %d", expected);
  574. desired = expected - 1;
  575. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
  576. if(unlikely(spins > 1))
  577. __atomic_add_fetch(&cache->stats.release_spins, spins - 1, __ATOMIC_RELAXED);
  578. if(desired == 0) {
  579. PGC_REFERENCED_PAGES_MINUS1(cache, assumed_size);
  580. if(evict_if_necessary)
  581. evict_on_page_release_when_permitted(cache);
  582. }
  583. }
  584. static inline bool non_acquired_page_get_for_deletion___while_having_clean_locked(PGC *cache __maybe_unused, PGC_PAGE *page) {
  585. __atomic_add_fetch(&cache->stats.acquires_for_deletion, 1, __ATOMIC_RELAXED);
  586. internal_fatal(!is_page_clean(page),
  587. "DBENGINE CACHE: only clean pages can be deleted");
  588. REFCOUNT expected, desired;
  589. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  590. size_t spins = 0;
  591. bool delete_it;
  592. do {
  593. spins++;
  594. if (expected == 0) {
  595. desired = REFCOUNT_DELETING;
  596. delete_it = true;
  597. }
  598. else {
  599. delete_it = false;
  600. break;
  601. }
  602. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
  603. if(delete_it) {
  604. // we can delete this page
  605. internal_fatal(page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
  606. "DBENGINE CACHE: page is already being deleted");
  607. page_flag_set(page, PGC_PAGE_IS_BEING_DELETED);
  608. }
  609. if(unlikely(spins > 1))
  610. __atomic_add_fetch(&cache->stats.delete_spins, spins - 1, __ATOMIC_RELAXED);
  611. return delete_it;
  612. }
  613. static inline bool acquired_page_get_for_deletion_or_release_it(PGC *cache __maybe_unused, PGC_PAGE *page) {
  614. __atomic_add_fetch(&cache->stats.acquires_for_deletion, 1, __ATOMIC_RELAXED);
  615. size_t assumed_size = page->assumed_size; // take the size before we release it
  616. REFCOUNT expected, desired;
  617. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  618. size_t spins = 0;
  619. bool delete_it;
  620. do {
  621. spins++;
  622. internal_fatal(expected < 1,
  623. "DBENGINE CACHE: page to be deleted should be acquired by the caller.");
  624. if (expected == 1) {
  625. // we are the only one having this page referenced
  626. desired = REFCOUNT_DELETING;
  627. delete_it = true;
  628. }
  629. else {
  630. // this page cannot be deleted
  631. desired = expected - 1;
  632. delete_it = false;
  633. }
  634. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
  635. if(delete_it) {
  636. PGC_REFERENCED_PAGES_MINUS1(cache, assumed_size);
  637. // we can delete this page
  638. internal_fatal(page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
  639. "DBENGINE CACHE: page is already being deleted");
  640. page_flag_set(page, PGC_PAGE_IS_BEING_DELETED);
  641. }
  642. if(unlikely(spins > 1))
  643. __atomic_add_fetch(&cache->stats.delete_spins, spins - 1, __ATOMIC_RELAXED);
  644. return delete_it;
  645. }
  646. // ----------------------------------------------------------------------------
  647. // Indexing
  648. static inline void free_this_page(PGC *cache, PGC_PAGE *page, size_t partition __maybe_unused) {
  649. // call the callback to free the user supplied memory
  650. cache->config.pgc_free_clean_cb(cache, (PGC_ENTRY){
  651. .section = page->section,
  652. .metric_id = page->metric_id,
  653. .start_time_s = page->start_time_s,
  654. .end_time_s = __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
  655. .update_every_s = page->update_every_s,
  656. .size = page_size_from_assumed_size(cache, page->assumed_size),
  657. .hot = (is_page_hot(page)) ? true : false,
  658. .data = page->data,
  659. .custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL,
  660. });
  661. // update statistics
  662. __atomic_add_fetch(&cache->stats.removed_entries, 1, __ATOMIC_RELAXED);
  663. __atomic_add_fetch(&cache->stats.removed_size, page->assumed_size, __ATOMIC_RELAXED);
  664. __atomic_sub_fetch(&cache->stats.entries, 1, __ATOMIC_RELAXED);
  665. __atomic_sub_fetch(&cache->stats.size, page->assumed_size, __ATOMIC_RELAXED);
  666. // free our memory
  667. #ifdef PGC_WITH_ARAL
  668. aral_freez(cache->aral[partition], page);
  669. #else
  670. freez(page);
  671. #endif
  672. }
  673. static void remove_this_page_from_index_unsafe(PGC *cache, PGC_PAGE *page, size_t partition) {
  674. // remove it from the Judy arrays
  675. pointer_check(cache, page);
  676. internal_fatal(page_flag_check(page, PGC_PAGE_HOT | PGC_PAGE_DIRTY | PGC_PAGE_CLEAN),
  677. "DBENGINE CACHE: page to be removed from the cache is still in the linked-list");
  678. internal_fatal(!page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
  679. "DBENGINE CACHE: page to be removed from the index, is not marked for deletion");
  680. internal_fatal(partition != pgc_indexing_partition(cache, page->metric_id),
  681. "DBENGINE CACHE: attempted to remove this page from the wrong partition of the cache");
  682. Pvoid_t *metrics_judy_pptr = JudyLGet(cache->index[partition].sections_judy, page->section, PJE0);
  683. if(unlikely(!metrics_judy_pptr))
  684. fatal("DBENGINE CACHE: section '%lu' should exist, but it does not.", page->section);
  685. Pvoid_t *pages_judy_pptr = JudyLGet(*metrics_judy_pptr, page->metric_id, PJE0);
  686. if(unlikely(!pages_judy_pptr))
  687. fatal("DBENGINE CACHE: metric '%lu' in section '%lu' should exist, but it does not.",
  688. page->metric_id, page->section);
  689. Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, page->start_time_s, PJE0);
  690. if(unlikely(!page_ptr))
  691. fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' should exist, but it does not.",
  692. page->start_time_s, page->metric_id, page->section);
  693. PGC_PAGE *found_page = *page_ptr;
  694. if(unlikely(found_page != page))
  695. fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' should exist, but the index returned a different address.",
  696. page->start_time_s, page->metric_id, page->section);
  697. size_t mem_before_judyl = 0, mem_after_judyl = 0;
  698. mem_before_judyl += JudyLMemUsed(*pages_judy_pptr);
  699. if(unlikely(!JudyLDel(pages_judy_pptr, page->start_time_s, PJE0)))
  700. fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' exists, but cannot be deleted.",
  701. page->start_time_s, page->metric_id, page->section);
  702. mem_after_judyl += JudyLMemUsed(*pages_judy_pptr);
  703. mem_before_judyl += JudyLMemUsed(*metrics_judy_pptr);
  704. if(!*pages_judy_pptr && !JudyLDel(metrics_judy_pptr, page->metric_id, PJE0))
  705. fatal("DBENGINE CACHE: metric '%lu' in section '%lu' exists and is empty, but cannot be deleted.",
  706. page->metric_id, page->section);
  707. mem_after_judyl += JudyLMemUsed(*metrics_judy_pptr);
  708. mem_before_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  709. if(!*metrics_judy_pptr && !JudyLDel(&cache->index[partition].sections_judy, page->section, PJE0))
  710. fatal("DBENGINE CACHE: section '%lu' exists and is empty, but cannot be deleted.", page->section);
  711. mem_after_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  712. pgc_stats_index_judy_change(cache, mem_before_judyl, mem_after_judyl);
  713. pointer_del(cache, page);
  714. }
  715. static inline void remove_and_free_page_not_in_any_queue_and_acquired_for_deletion(PGC *cache, PGC_PAGE *page) {
  716. size_t partition = pgc_indexing_partition(cache, page->metric_id);
  717. pgc_index_write_lock(cache, partition);
  718. remove_this_page_from_index_unsafe(cache, page, partition);
  719. pgc_index_write_unlock(cache, partition);
  720. free_this_page(cache, page, partition);
  721. }
  722. static inline bool make_acquired_page_clean_and_evict_or_page_release(PGC *cache, PGC_PAGE *page) {
  723. pointer_check(cache, page);
  724. page_transition_lock(cache, page);
  725. pgc_ll_lock(cache, &cache->clean);
  726. // make it clean - it does not have any accesses, so it will be prepended
  727. page_set_clean(cache, page, true, true);
  728. if(!acquired_page_get_for_deletion_or_release_it(cache, page)) {
  729. pgc_ll_unlock(cache, &cache->clean);
  730. page_transition_unlock(cache, page);
  731. return false;
  732. }
  733. // remove it from the linked list
  734. pgc_ll_del(cache, &cache->clean, page, true);
  735. pgc_ll_unlock(cache, &cache->clean);
  736. page_transition_unlock(cache, page);
  737. remove_and_free_page_not_in_any_queue_and_acquired_for_deletion(cache, page);
  738. return true;
  739. }
  740. // returns true, when there is more work to do
  741. static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evict, bool wait, bool all_of_them, evict_filter filter, void *data) {
  742. size_t per1000 = cache_usage_per1000(cache, NULL);
  743. if(!all_of_them && per1000 < cache->config.healthy_size_per1000)
  744. // don't bother - not enough to do anything
  745. return false;
  746. size_t workers_running = __atomic_add_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
  747. if(!wait && !all_of_them && workers_running > cache->config.max_workers_evict_inline && per1000 < cache->config.severe_pressure_per1000) {
  748. __atomic_sub_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
  749. return false;
  750. }
  751. internal_fatal(cache->clean.linked_list_in_sections_judy,
  752. "wrong clean pages configuration - clean pages need to have a linked list, not a judy array");
  753. if(unlikely(!max_skip))
  754. max_skip = SIZE_MAX;
  755. else if(unlikely(max_skip < 2))
  756. max_skip = 2;
  757. if(unlikely(!max_evict))
  758. max_evict = SIZE_MAX;
  759. else if(unlikely(max_evict < 2))
  760. max_evict = 2;
  761. size_t total_pages_evicted = 0;
  762. size_t total_pages_skipped = 0;
  763. bool stopped_before_finishing = false;
  764. size_t spins = 0;
  765. do {
  766. if(++spins > 1)
  767. __atomic_add_fetch(&cache->stats.evict_spins, 1, __ATOMIC_RELAXED);
  768. bool batch;
  769. size_t max_size_to_evict = 0;
  770. if (unlikely(all_of_them)) {
  771. max_size_to_evict = SIZE_MAX;
  772. batch = true;
  773. }
  774. else if(unlikely(wait)) {
  775. per1000 = cache_usage_per1000(cache, &max_size_to_evict);
  776. batch = (wait && per1000 > cache->config.severe_pressure_per1000) ? true : false;
  777. }
  778. else {
  779. batch = false;
  780. max_size_to_evict = (cache_above_healthy_limit(cache)) ? 1 : 0;
  781. }
  782. if (!max_size_to_evict)
  783. break;
  784. // check if we have to stop
  785. if(total_pages_evicted >= max_evict && !all_of_them) {
  786. stopped_before_finishing = true;
  787. break;
  788. }
  789. if(!all_of_them && !wait) {
  790. if(!pgc_ll_trylock(cache, &cache->clean)) {
  791. stopped_before_finishing = true;
  792. goto premature_exit;
  793. }
  794. // at this point we have the clean lock
  795. }
  796. else
  797. pgc_ll_lock(cache, &cache->clean);
  798. // find a page to evict
  799. PGC_PAGE *pages_to_evict = NULL;
  800. size_t pages_to_evict_size = 0;
  801. for(PGC_PAGE *page = cache->clean.base, *next = NULL, *first_page_we_relocated = NULL; page ; page = next) {
  802. next = page->link.next;
  803. if(unlikely(page == first_page_we_relocated))
  804. // we did a complete loop on all pages
  805. break;
  806. if(unlikely(page_flag_check(page, PGC_PAGE_HAS_BEEN_ACCESSED | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES) == PGC_PAGE_HAS_BEEN_ACCESSED)) {
  807. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  808. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  809. page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  810. continue;
  811. }
  812. if(unlikely(filter && !filter(page, data)))
  813. continue;
  814. if(non_acquired_page_get_for_deletion___while_having_clean_locked(cache, page)) {
  815. // we can delete this page
  816. // remove it from the clean list
  817. pgc_ll_del(cache, &cache->clean, page, true);
  818. __atomic_add_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
  819. __atomic_add_fetch(&cache->stats.evicting_size, page->assumed_size, __ATOMIC_RELAXED);
  820. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pages_to_evict, page, link.prev, link.next);
  821. pages_to_evict_size += page->assumed_size;
  822. if(unlikely(all_of_them || (batch && pages_to_evict_size < max_size_to_evict)))
  823. // get more pages
  824. ;
  825. else
  826. // one page at a time
  827. break;
  828. }
  829. else {
  830. // we can't delete this page
  831. if(!first_page_we_relocated)
  832. first_page_we_relocated = page;
  833. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  834. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  835. // check if we have to stop
  836. if(unlikely(++total_pages_skipped >= max_skip && !all_of_them)) {
  837. stopped_before_finishing = true;
  838. break;
  839. }
  840. }
  841. }
  842. pgc_ll_unlock(cache, &cache->clean);
  843. if(likely(pages_to_evict)) {
  844. // remove them from the index
  845. if(unlikely(pages_to_evict->link.next)) {
  846. // we have many pages, let's minimize the index locks we are going to get
  847. PGC_PAGE *pages_per_partition[cache->config.partitions];
  848. memset(pages_per_partition, 0, sizeof(PGC_PAGE *) * cache->config.partitions);
  849. // sort them by partition
  850. for (PGC_PAGE *page = pages_to_evict, *next = NULL; page; page = next) {
  851. next = page->link.next;
  852. size_t partition = pgc_indexing_partition(cache, page->metric_id);
  853. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(pages_to_evict, page, link.prev, link.next);
  854. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pages_per_partition[partition], page, link.prev, link.next);
  855. }
  856. // remove them from the index
  857. for (size_t partition = 0; partition < cache->config.partitions; partition++) {
  858. if (!pages_per_partition[partition]) continue;
  859. pgc_index_write_lock(cache, partition);
  860. for (PGC_PAGE *page = pages_per_partition[partition]; page; page = page->link.next)
  861. remove_this_page_from_index_unsafe(cache, page, partition);
  862. pgc_index_write_unlock(cache, partition);
  863. }
  864. // free them
  865. for (size_t partition = 0; partition < cache->config.partitions; partition++) {
  866. if (!pages_per_partition[partition]) continue;
  867. for (PGC_PAGE *page = pages_per_partition[partition], *next = NULL; page; page = next) {
  868. next = page->link.next;
  869. size_t page_size = page->assumed_size;
  870. free_this_page(cache, page, partition);
  871. __atomic_sub_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
  872. __atomic_sub_fetch(&cache->stats.evicting_size, page_size, __ATOMIC_RELAXED);
  873. total_pages_evicted++;
  874. }
  875. }
  876. }
  877. else {
  878. // just one page to be evicted
  879. PGC_PAGE *page = pages_to_evict;
  880. size_t page_size = page->assumed_size;
  881. size_t partition = pgc_indexing_partition(cache, page->metric_id);
  882. pgc_index_write_lock(cache, partition);
  883. remove_this_page_from_index_unsafe(cache, page, partition);
  884. pgc_index_write_unlock(cache, partition);
  885. free_this_page(cache, page, partition);
  886. __atomic_sub_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
  887. __atomic_sub_fetch(&cache->stats.evicting_size, page_size, __ATOMIC_RELAXED);
  888. total_pages_evicted++;
  889. }
  890. }
  891. else
  892. break;
  893. } while(all_of_them || (total_pages_evicted < max_evict && total_pages_skipped < max_skip));
  894. if(all_of_them && !filter) {
  895. pgc_ll_lock(cache, &cache->clean);
  896. if(cache->clean.stats->entries) {
  897. error_limit_static_global_var(erl, 1, 0);
  898. error_limit(&erl, "DBENGINE CACHE: cannot free all clean pages, %zu are still in the clean queue",
  899. cache->clean.stats->entries);
  900. }
  901. pgc_ll_unlock(cache, &cache->clean);
  902. }
  903. premature_exit:
  904. if(unlikely(total_pages_skipped))
  905. __atomic_add_fetch(&cache->stats.evict_skipped, total_pages_skipped, __ATOMIC_RELAXED);
  906. __atomic_sub_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
  907. return stopped_before_finishing;
  908. }
  909. static PGC_PAGE *page_add(PGC *cache, PGC_ENTRY *entry, bool *added) {
  910. __atomic_add_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED);
  911. size_t partition = pgc_indexing_partition(cache, entry->metric_id);
  912. #ifdef PGC_WITH_ARAL
  913. PGC_PAGE *allocation = aral_mallocz(cache->aral[partition]);
  914. #endif
  915. PGC_PAGE *page;
  916. size_t spins = 0;
  917. do {
  918. if(++spins > 1)
  919. __atomic_add_fetch(&cache->stats.insert_spins, 1, __ATOMIC_RELAXED);
  920. pgc_index_write_lock(cache, partition);
  921. size_t mem_before_judyl = 0, mem_after_judyl = 0;
  922. mem_before_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  923. Pvoid_t *metrics_judy_pptr = JudyLIns(&cache->index[partition].sections_judy, entry->section, PJE0);
  924. if(unlikely(!metrics_judy_pptr || metrics_judy_pptr == PJERR))
  925. fatal("DBENGINE CACHE: corrupted sections judy array");
  926. mem_after_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  927. mem_before_judyl += JudyLMemUsed(*metrics_judy_pptr);
  928. Pvoid_t *pages_judy_pptr = JudyLIns(metrics_judy_pptr, entry->metric_id, PJE0);
  929. if(unlikely(!pages_judy_pptr || pages_judy_pptr == PJERR))
  930. fatal("DBENGINE CACHE: corrupted pages judy array");
  931. mem_after_judyl += JudyLMemUsed(*metrics_judy_pptr);
  932. mem_before_judyl += JudyLMemUsed(*pages_judy_pptr);
  933. Pvoid_t *page_ptr = JudyLIns(pages_judy_pptr, entry->start_time_s, PJE0);
  934. if(unlikely(!page_ptr || page_ptr == PJERR))
  935. fatal("DBENGINE CACHE: corrupted page in judy array");
  936. mem_after_judyl += JudyLMemUsed(*pages_judy_pptr);
  937. pgc_stats_index_judy_change(cache, mem_before_judyl, mem_after_judyl);
  938. page = *page_ptr;
  939. if (likely(!page)) {
  940. #ifdef PGC_WITH_ARAL
  941. page = allocation;
  942. allocation = NULL;
  943. #else
  944. page = mallocz(sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page);
  945. #endif
  946. page->refcount = 1;
  947. page->accesses = (entry->hot) ? 0 : 1;
  948. page->flags = 0;
  949. page->section = entry->section;
  950. page->metric_id = entry->metric_id;
  951. page->start_time_s = entry->start_time_s;
  952. page->end_time_s = entry->end_time_s,
  953. page->update_every_s = entry->update_every_s,
  954. page->data = entry->data;
  955. page->assumed_size = page_assumed_size(cache, entry->size);
  956. netdata_spinlock_init(&page->transition_spinlock);
  957. page->link.prev = NULL;
  958. page->link.next = NULL;
  959. if(cache->config.additional_bytes_per_page) {
  960. if(entry->custom_data)
  961. memcpy(page->custom_data, entry->custom_data, cache->config.additional_bytes_per_page);
  962. else
  963. memset(page->custom_data, 0, cache->config.additional_bytes_per_page);
  964. }
  965. // put it in the index
  966. *page_ptr = page;
  967. pointer_add(cache, page);
  968. pgc_index_write_unlock(cache, partition);
  969. if (entry->hot)
  970. page_set_hot(cache, page);
  971. else
  972. page_set_clean(cache, page, false, false);
  973. PGC_REFERENCED_PAGES_PLUS1(cache, page);
  974. // update statistics
  975. __atomic_add_fetch(&cache->stats.added_entries, 1, __ATOMIC_RELAXED);
  976. __atomic_add_fetch(&cache->stats.added_size, page->assumed_size, __ATOMIC_RELAXED);
  977. __atomic_add_fetch(&cache->stats.entries, 1, __ATOMIC_RELAXED);
  978. __atomic_add_fetch(&cache->stats.size, page->assumed_size, __ATOMIC_RELAXED);
  979. if(added)
  980. *added = true;
  981. }
  982. else {
  983. if (!page_acquire(cache, page))
  984. page = NULL;
  985. else if(added)
  986. *added = false;
  987. pgc_index_write_unlock(cache, partition);
  988. if(unlikely(!page)) {
  989. // now that we don't have the lock,
  990. // give it some time for the old page to go away
  991. struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 };
  992. nanosleep(&ns, NULL);
  993. }
  994. }
  995. } while(!page);
  996. #ifdef PGC_WITH_ARAL
  997. if(allocation)
  998. aral_freez(cache->aral[partition], allocation);
  999. #endif
  1000. __atomic_sub_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED);
  1001. if(!entry->hot)
  1002. evict_on_clean_page_added(cache);
  1003. if((cache->config.options & PGC_OPTIONS_FLUSH_PAGES_INLINE) || flushing_critical(cache)) {
  1004. flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL,
  1005. false, false);
  1006. }
  1007. return page;
  1008. }
  1009. static PGC_PAGE *page_find_and_acquire(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method) {
  1010. __atomic_add_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED);
  1011. size_t *stats_hit_ptr, *stats_miss_ptr;
  1012. if(method == PGC_SEARCH_CLOSEST) {
  1013. __atomic_add_fetch(&cache->stats.searches_closest, 1, __ATOMIC_RELAXED);
  1014. stats_hit_ptr = &cache->stats.searches_closest_hits;
  1015. stats_miss_ptr = &cache->stats.searches_closest_misses;
  1016. }
  1017. else {
  1018. __atomic_add_fetch(&cache->stats.searches_exact, 1, __ATOMIC_RELAXED);
  1019. stats_hit_ptr = &cache->stats.searches_exact_hits;
  1020. stats_miss_ptr = &cache->stats.searches_exact_misses;
  1021. }
  1022. PGC_PAGE *page = NULL;
  1023. size_t partition = pgc_indexing_partition(cache, metric_id);
  1024. pgc_index_read_lock(cache, partition);
  1025. Pvoid_t *metrics_judy_pptr = JudyLGet(cache->index[partition].sections_judy, section, PJE0);
  1026. if(unlikely(metrics_judy_pptr == PJERR))
  1027. fatal("DBENGINE CACHE: corrupted sections judy array");
  1028. if(unlikely(!metrics_judy_pptr)) {
  1029. // section does not exist
  1030. goto cleanup;
  1031. }
  1032. Pvoid_t *pages_judy_pptr = JudyLGet(*metrics_judy_pptr, metric_id, PJE0);
  1033. if(unlikely(pages_judy_pptr == PJERR))
  1034. fatal("DBENGINE CACHE: corrupted pages judy array");
  1035. if(unlikely(!pages_judy_pptr)) {
  1036. // metric does not exist
  1037. goto cleanup;
  1038. }
  1039. switch(method) {
  1040. default:
  1041. case PGC_SEARCH_CLOSEST: {
  1042. Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, start_time_s, PJE0);
  1043. if (unlikely(page_ptr == PJERR))
  1044. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1045. if (page_ptr)
  1046. page = *page_ptr;
  1047. else {
  1048. Word_t time = start_time_s;
  1049. // find the previous page
  1050. page_ptr = JudyLLast(*pages_judy_pptr, &time, PJE0);
  1051. if(unlikely(page_ptr == PJERR))
  1052. fatal("DBENGINE CACHE: corrupted page in pages judy array #2");
  1053. if(page_ptr) {
  1054. // found a page starting before our timestamp
  1055. // check if our timestamp is included
  1056. page = *page_ptr;
  1057. if(start_time_s > page->end_time_s)
  1058. // it is not good for us
  1059. page = NULL;
  1060. }
  1061. if(!page) {
  1062. // find the next page then...
  1063. time = start_time_s;
  1064. page_ptr = JudyLNext(*pages_judy_pptr, &time, PJE0);
  1065. if(page_ptr)
  1066. page = *page_ptr;
  1067. }
  1068. }
  1069. }
  1070. break;
  1071. case PGC_SEARCH_EXACT: {
  1072. Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, start_time_s, PJE0);
  1073. if (unlikely(page_ptr == PJERR))
  1074. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1075. if (page_ptr)
  1076. page = *page_ptr;
  1077. }
  1078. break;
  1079. case PGC_SEARCH_FIRST: {
  1080. Word_t time = start_time_s;
  1081. Pvoid_t *page_ptr = JudyLFirst(*pages_judy_pptr, &time, PJE0);
  1082. if (unlikely(page_ptr == PJERR))
  1083. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1084. if (page_ptr)
  1085. page = *page_ptr;
  1086. }
  1087. break;
  1088. case PGC_SEARCH_NEXT: {
  1089. Word_t time = start_time_s;
  1090. Pvoid_t *page_ptr = JudyLNext(*pages_judy_pptr, &time, PJE0);
  1091. if (unlikely(page_ptr == PJERR))
  1092. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1093. if (page_ptr)
  1094. page = *page_ptr;
  1095. }
  1096. break;
  1097. case PGC_SEARCH_LAST: {
  1098. Word_t time = start_time_s;
  1099. Pvoid_t *page_ptr = JudyLLast(*pages_judy_pptr, &time, PJE0);
  1100. if (unlikely(page_ptr == PJERR))
  1101. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1102. if (page_ptr)
  1103. page = *page_ptr;
  1104. }
  1105. break;
  1106. case PGC_SEARCH_PREV: {
  1107. Word_t time = start_time_s;
  1108. Pvoid_t *page_ptr = JudyLPrev(*pages_judy_pptr, &time, PJE0);
  1109. if (unlikely(page_ptr == PJERR))
  1110. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1111. if (page_ptr)
  1112. page = *page_ptr;
  1113. }
  1114. break;
  1115. }
  1116. if(page) {
  1117. pointer_check(cache, page);
  1118. if(!page_acquire(cache, page)) {
  1119. // this page is not good to use
  1120. page = NULL;
  1121. }
  1122. }
  1123. cleanup:
  1124. pgc_index_read_unlock(cache, partition);
  1125. if(page) {
  1126. __atomic_add_fetch(stats_hit_ptr, 1, __ATOMIC_RELAXED);
  1127. page_has_been_accessed(cache, page);
  1128. }
  1129. else
  1130. __atomic_add_fetch(stats_miss_ptr, 1, __ATOMIC_RELAXED);
  1131. __atomic_sub_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED);
  1132. return page;
  1133. }
  1134. static void all_hot_pages_to_dirty(PGC *cache, Word_t section) {
  1135. pgc_ll_lock(cache, &cache->hot);
  1136. bool first = true;
  1137. Word_t last_section = (section == PGC_SECTION_ALL) ? 0 : section;
  1138. Pvoid_t *section_pages_pptr;
  1139. while ((section_pages_pptr = JudyLFirstThenNext(cache->hot.sections_judy, &last_section, &first))) {
  1140. if(section != PGC_SECTION_ALL && last_section != section)
  1141. break;
  1142. struct section_pages *sp = *section_pages_pptr;
  1143. PGC_PAGE *page = sp->base;
  1144. while(page) {
  1145. PGC_PAGE *next = page->link.next;
  1146. if(page_acquire(cache, page)) {
  1147. page_set_dirty(cache, page, true);
  1148. page_release(cache, page, false);
  1149. // page ptr may be invalid now
  1150. }
  1151. page = next;
  1152. }
  1153. }
  1154. pgc_ll_unlock(cache, &cache->hot);
  1155. }
  1156. // returns true when there is more work to do
  1157. static bool flush_pages(PGC *cache, size_t max_flushes, Word_t section, bool wait, bool all_of_them) {
  1158. internal_fatal(!cache->dirty.linked_list_in_sections_judy,
  1159. "wrong dirty pages configuration - dirty pages need to have a judy array, not a linked list");
  1160. if(!all_of_them && !wait) {
  1161. // we have been called from a data collection thread
  1162. // let's not waste its time...
  1163. if(!pgc_ll_trylock(cache, &cache->dirty)) {
  1164. // we would block, so give up...
  1165. return true;
  1166. }
  1167. // we got the lock at this point
  1168. }
  1169. else
  1170. pgc_ll_lock(cache, &cache->dirty);
  1171. size_t optimal_flush_size = cache->config.max_dirty_pages_per_call;
  1172. size_t dirty_version_at_entry = cache->dirty.version;
  1173. if(!all_of_them && (cache->dirty.stats->entries < optimal_flush_size || cache->dirty.last_version_checked == dirty_version_at_entry)) {
  1174. pgc_ll_unlock(cache, &cache->dirty);
  1175. return false;
  1176. }
  1177. __atomic_add_fetch(&cache->stats.workers_flush, 1, __ATOMIC_RELAXED);
  1178. bool have_dirty_lock = true;
  1179. if(all_of_them || !max_flushes)
  1180. max_flushes = SIZE_MAX;
  1181. Word_t last_section = (section == PGC_SECTION_ALL) ? 0 : section;
  1182. size_t flushes_so_far = 0;
  1183. Pvoid_t *section_pages_pptr;
  1184. bool stopped_before_finishing = false;
  1185. size_t spins = 0;
  1186. bool first = true;
  1187. while (have_dirty_lock && (section_pages_pptr = JudyLFirstThenNext(cache->dirty.sections_judy, &last_section, &first))) {
  1188. if(section != PGC_SECTION_ALL && last_section != section)
  1189. break;
  1190. struct section_pages *sp = *section_pages_pptr;
  1191. if(!all_of_them && sp->entries < optimal_flush_size)
  1192. continue;
  1193. if(!all_of_them && flushes_so_far > max_flushes) {
  1194. stopped_before_finishing = true;
  1195. break;
  1196. }
  1197. if(++spins > 1)
  1198. __atomic_add_fetch(&cache->stats.flush_spins, 1, __ATOMIC_RELAXED);
  1199. PGC_ENTRY array[optimal_flush_size];
  1200. PGC_PAGE *pages[optimal_flush_size];
  1201. size_t pages_added = 0, pages_added_size = 0;
  1202. size_t pages_removed_dirty = 0, pages_removed_dirty_size = 0;
  1203. size_t pages_cancelled = 0, pages_cancelled_size = 0;
  1204. size_t pages_made_clean = 0, pages_made_clean_size = 0;
  1205. PGC_PAGE *page = sp->base;
  1206. while (page && pages_added < optimal_flush_size) {
  1207. PGC_PAGE *next = page->link.next;
  1208. internal_fatal(page_get_status_flags(page) != PGC_PAGE_DIRTY,
  1209. "DBENGINE CACHE: page should be in the dirty list before saved");
  1210. if (page_acquire(cache, page)) {
  1211. internal_fatal(page_get_status_flags(page) != PGC_PAGE_DIRTY,
  1212. "DBENGINE CACHE: page should be in the dirty list before saved");
  1213. internal_fatal(page->section != last_section,
  1214. "DBENGINE CACHE: dirty page is not in the right section (tier)");
  1215. if(!page_transition_trylock(cache, page)) {
  1216. page_release(cache, page, false);
  1217. // page ptr may be invalid now
  1218. }
  1219. else {
  1220. pages[pages_added] = page;
  1221. array[pages_added] = (PGC_ENTRY) {
  1222. .section = page->section,
  1223. .metric_id = page->metric_id,
  1224. .start_time_s = page->start_time_s,
  1225. .end_time_s = __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
  1226. .update_every_s = page->update_every_s,
  1227. .size = page_size_from_assumed_size(cache, page->assumed_size),
  1228. .data = page->data,
  1229. .custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL,
  1230. .hot = false,
  1231. };
  1232. pages_added_size += page->assumed_size;
  1233. pages_added++;
  1234. }
  1235. }
  1236. page = next;
  1237. }
  1238. // do we have enough to save?
  1239. if(all_of_them || pages_added == optimal_flush_size) {
  1240. // we should do it
  1241. for (size_t i = 0; i < pages_added; i++) {
  1242. PGC_PAGE *tpg = pages[i];
  1243. internal_fatal(page_get_status_flags(tpg) != PGC_PAGE_DIRTY,
  1244. "DBENGINE CACHE: page should be in the dirty list before saved");
  1245. __atomic_add_fetch(&cache->stats.flushing_entries, 1, __ATOMIC_RELAXED);
  1246. __atomic_add_fetch(&cache->stats.flushing_size, tpg->assumed_size, __ATOMIC_RELAXED);
  1247. // remove it from the dirty list
  1248. pgc_ll_del(cache, &cache->dirty, tpg, true);
  1249. pages_removed_dirty_size += tpg->assumed_size;
  1250. pages_removed_dirty++;
  1251. }
  1252. // next time, repeat the same section (tier)
  1253. first = true;
  1254. }
  1255. else {
  1256. // we can't do it
  1257. for (size_t i = 0; i < pages_added; i++) {
  1258. PGC_PAGE *tpg = pages[i];
  1259. internal_fatal(page_get_status_flags(tpg) != PGC_PAGE_DIRTY,
  1260. "DBENGINE CACHE: page should be in the dirty list before saved");
  1261. pages_cancelled_size += tpg->assumed_size;
  1262. pages_cancelled++;
  1263. page_transition_unlock(cache, tpg);
  1264. page_release(cache, tpg, false);
  1265. // page ptr may be invalid now
  1266. }
  1267. __atomic_add_fetch(&cache->stats.flushes_cancelled, pages_cancelled, __ATOMIC_RELAXED);
  1268. __atomic_add_fetch(&cache->stats.flushes_cancelled_size, pages_cancelled_size, __ATOMIC_RELAXED);
  1269. internal_fatal(pages_added != pages_cancelled || pages_added_size != pages_cancelled_size,
  1270. "DBENGINE CACHE: flushing cancel pages mismatch");
  1271. // next time, continue to the next section (tier)
  1272. first = false;
  1273. continue;
  1274. }
  1275. if(cache->config.pgc_save_init_cb)
  1276. cache->config.pgc_save_init_cb(cache, last_section);
  1277. pgc_ll_unlock(cache, &cache->dirty);
  1278. have_dirty_lock = false;
  1279. // call the callback to save them
  1280. // it may take some time, so let's release the lock
  1281. cache->config.pgc_save_dirty_cb(cache, array, pages, pages_added);
  1282. flushes_so_far++;
  1283. __atomic_add_fetch(&cache->stats.flushes_completed, pages_added, __ATOMIC_RELAXED);
  1284. __atomic_add_fetch(&cache->stats.flushes_completed_size, pages_added_size, __ATOMIC_RELAXED);
  1285. size_t pages_to_evict = 0; (void)pages_to_evict;
  1286. for (size_t i = 0; i < pages_added; i++) {
  1287. PGC_PAGE *tpg = pages[i];
  1288. internal_fatal(page_get_status_flags(tpg) != 0,
  1289. "DBENGINE CACHE: page should not be in any list while it is being saved");
  1290. __atomic_sub_fetch(&cache->stats.flushing_entries, 1, __ATOMIC_RELAXED);
  1291. __atomic_sub_fetch(&cache->stats.flushing_size, tpg->assumed_size, __ATOMIC_RELAXED);
  1292. pages_made_clean_size += tpg->assumed_size;
  1293. pages_made_clean++;
  1294. if(!tpg->accesses)
  1295. pages_to_evict++;
  1296. page_set_clean(cache, tpg, true, false);
  1297. page_transition_unlock(cache, tpg);
  1298. page_release(cache, tpg, false);
  1299. // tpg ptr may be invalid now
  1300. }
  1301. internal_fatal(pages_added != pages_made_clean || pages_added != pages_removed_dirty ||
  1302. pages_added_size != pages_made_clean_size || pages_added_size != pages_removed_dirty_size
  1303. , "DBENGINE CACHE: flushing pages mismatch");
  1304. if(!all_of_them && !wait) {
  1305. if(pgc_ll_trylock(cache, &cache->dirty))
  1306. have_dirty_lock = true;
  1307. else {
  1308. stopped_before_finishing = true;
  1309. have_dirty_lock = false;
  1310. }
  1311. }
  1312. else {
  1313. pgc_ll_lock(cache, &cache->dirty);
  1314. have_dirty_lock = true;
  1315. }
  1316. }
  1317. if(have_dirty_lock) {
  1318. if(!stopped_before_finishing && dirty_version_at_entry > cache->dirty.last_version_checked)
  1319. cache->dirty.last_version_checked = dirty_version_at_entry;
  1320. pgc_ll_unlock(cache, &cache->dirty);
  1321. }
  1322. __atomic_sub_fetch(&cache->stats.workers_flush, 1, __ATOMIC_RELAXED);
  1323. return stopped_before_finishing;
  1324. }
  1325. void free_all_unreferenced_clean_pages(PGC *cache) {
  1326. evict_pages(cache, 0, 0, true, true);
  1327. }
  1328. // ----------------------------------------------------------------------------
  1329. // public API
  1330. PGC *pgc_create(const char *name,
  1331. size_t clean_size_bytes, free_clean_page_callback pgc_free_cb,
  1332. size_t max_dirty_pages_per_flush,
  1333. save_dirty_init_callback pgc_save_init_cb,
  1334. save_dirty_page_callback pgc_save_dirty_cb,
  1335. size_t max_pages_per_inline_eviction, size_t max_inline_evictors,
  1336. size_t max_skip_pages_per_inline_eviction,
  1337. size_t max_flushes_inline,
  1338. PGC_OPTIONS options, size_t partitions, size_t additional_bytes_per_page) {
  1339. if(max_pages_per_inline_eviction < 2)
  1340. max_pages_per_inline_eviction = 2;
  1341. if(max_dirty_pages_per_flush < 1)
  1342. max_dirty_pages_per_flush = 1;
  1343. if(max_flushes_inline * max_dirty_pages_per_flush < 2)
  1344. max_flushes_inline = 2;
  1345. PGC *cache = callocz(1, sizeof(PGC));
  1346. strncpyz(cache->config.name, name, PGC_NAME_MAX);
  1347. cache->config.options = options;
  1348. cache->config.clean_size = (clean_size_bytes < 1 * 1024 * 1024) ? 1 * 1024 * 1024 : clean_size_bytes;
  1349. cache->config.pgc_free_clean_cb = pgc_free_cb;
  1350. cache->config.max_dirty_pages_per_call = max_dirty_pages_per_flush;
  1351. cache->config.pgc_save_init_cb = pgc_save_init_cb;
  1352. cache->config.pgc_save_dirty_cb = pgc_save_dirty_cb;
  1353. cache->config.max_pages_per_inline_eviction = (max_pages_per_inline_eviction < 2) ? 2 : max_pages_per_inline_eviction;
  1354. cache->config.max_skip_pages_per_inline_eviction = (max_skip_pages_per_inline_eviction < 2) ? 2 : max_skip_pages_per_inline_eviction;
  1355. cache->config.max_flushes_inline = (max_flushes_inline < 1) ? 1 : max_flushes_inline;
  1356. cache->config.partitions = partitions < 1 ? (size_t)get_netdata_cpus() : partitions;
  1357. cache->config.additional_bytes_per_page = additional_bytes_per_page;
  1358. cache->config.max_workers_evict_inline = max_inline_evictors;
  1359. cache->config.severe_pressure_per1000 = 1010;
  1360. cache->config.aggressive_evict_per1000 = 990;
  1361. cache->config.healthy_size_per1000 = 980;
  1362. cache->config.evict_low_threshold_per1000 = 970;
  1363. cache->index = callocz(cache->config.partitions, sizeof(struct pgc_index));
  1364. for(size_t part = 0; part < cache->config.partitions ; part++)
  1365. netdata_rwlock_init(&cache->index[part].rwlock);
  1366. netdata_spinlock_init(&cache->hot.spinlock);
  1367. netdata_spinlock_init(&cache->dirty.spinlock);
  1368. netdata_spinlock_init(&cache->clean.spinlock);
  1369. cache->hot.flags = PGC_PAGE_HOT;
  1370. cache->hot.linked_list_in_sections_judy = true;
  1371. cache->hot.stats = &cache->stats.queues.hot;
  1372. cache->dirty.flags = PGC_PAGE_DIRTY;
  1373. cache->dirty.linked_list_in_sections_judy = true;
  1374. cache->dirty.stats = &cache->stats.queues.dirty;
  1375. cache->clean.flags = PGC_PAGE_CLEAN;
  1376. cache->clean.linked_list_in_sections_judy = false;
  1377. cache->clean.stats = &cache->stats.queues.clean;
  1378. pgc_section_pages_static_aral_init();
  1379. #ifdef PGC_WITH_ARAL
  1380. cache->aral = callocz(cache->config.partitions, sizeof(ARAL *));
  1381. for(size_t part = 0; part < cache->config.partitions ; part++) {
  1382. char buf[100 +1];
  1383. snprintfz(buf, 100, "%s[%zu]", name, part);
  1384. cache->aral[part] = aral_create(
  1385. buf,
  1386. sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page,
  1387. 0,
  1388. 16384,
  1389. aral_statistics(pgc_section_pages_aral),
  1390. NULL, NULL, false, false);
  1391. }
  1392. #endif
  1393. pointer_index_init(cache);
  1394. return cache;
  1395. }
  1396. struct aral_statistics *pgc_aral_statistics(void) {
  1397. return aral_statistics(pgc_section_pages_aral);
  1398. }
  1399. size_t pgc_aral_structures(void) {
  1400. return aral_structures(pgc_section_pages_aral);
  1401. }
  1402. size_t pgc_aral_overhead(void) {
  1403. return aral_overhead(pgc_section_pages_aral);
  1404. }
  1405. void pgc_flush_all_hot_and_dirty_pages(PGC *cache, Word_t section) {
  1406. all_hot_pages_to_dirty(cache, section);
  1407. // save all dirty pages to make them clean
  1408. flush_pages(cache, 0, section, true, true);
  1409. }
  1410. void pgc_destroy(PGC *cache) {
  1411. // convert all hot pages to dirty
  1412. all_hot_pages_to_dirty(cache, PGC_SECTION_ALL);
  1413. // save all dirty pages to make them clean
  1414. flush_pages(cache, 0, PGC_SECTION_ALL, true, true);
  1415. // free all unreferenced clean pages
  1416. free_all_unreferenced_clean_pages(cache);
  1417. if(PGC_REFERENCED_PAGES(cache))
  1418. error("DBENGINE CACHE: there are %zu referenced cache pages - leaving the cache allocated", PGC_REFERENCED_PAGES(cache));
  1419. else {
  1420. pointer_destroy_index(cache);
  1421. for(size_t part = 0; part < cache->config.partitions ; part++)
  1422. netdata_rwlock_destroy(&cache->index[part].rwlock);
  1423. #ifdef PGC_WITH_ARAL
  1424. for(size_t part = 0; part < cache->config.partitions ; part++)
  1425. aral_destroy(cache->aral[part]);
  1426. freez(cache->aral);
  1427. #endif
  1428. freez(cache);
  1429. }
  1430. }
  1431. PGC_PAGE *pgc_page_add_and_acquire(PGC *cache, PGC_ENTRY entry, bool *added) {
  1432. return page_add(cache, &entry, added);
  1433. }
  1434. PGC_PAGE *pgc_page_dup(PGC *cache, PGC_PAGE *page) {
  1435. if(!page_acquire(cache, page))
  1436. fatal("DBENGINE CACHE: tried to dup a page that is not acquired!");
  1437. return page;
  1438. }
  1439. void pgc_page_release(PGC *cache, PGC_PAGE *page) {
  1440. page_release(cache, page, is_page_clean(page));
  1441. }
  1442. void pgc_page_hot_to_dirty_and_release(PGC *cache, PGC_PAGE *page) {
  1443. __atomic_add_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1444. //#ifdef NETDATA_INTERNAL_CHECKS
  1445. // page_transition_lock(cache, page);
  1446. // internal_fatal(!is_page_hot(page), "DBENGINE CACHE: called %s() but page is not hot", __FUNCTION__ );
  1447. // page_transition_unlock(cache, page);
  1448. //#endif
  1449. // make page dirty
  1450. page_set_dirty(cache, page, false);
  1451. // release the page
  1452. page_release(cache, page, true);
  1453. // page ptr may be invalid now
  1454. __atomic_sub_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1455. // flush, if we have to
  1456. if((cache->config.options & PGC_OPTIONS_FLUSH_PAGES_INLINE) || flushing_critical(cache)) {
  1457. flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL,
  1458. false, false);
  1459. }
  1460. }
  1461. bool pgc_page_to_clean_evict_or_release(PGC *cache, PGC_PAGE *page) {
  1462. bool ret;
  1463. __atomic_add_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1464. // prevent accesses from increasing the accesses counter
  1465. page_flag_set(page, PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES);
  1466. // zero the accesses counter
  1467. __atomic_store_n(&page->accesses, 0, __ATOMIC_RELEASE);
  1468. // if there are no other references to it, evict it immediately
  1469. if(make_acquired_page_clean_and_evict_or_page_release(cache, page)) {
  1470. __atomic_add_fetch(&cache->stats.hot_empty_pages_evicted_immediately, 1, __ATOMIC_RELAXED);
  1471. ret = true;
  1472. }
  1473. else {
  1474. __atomic_add_fetch(&cache->stats.hot_empty_pages_evicted_later, 1, __ATOMIC_RELAXED);
  1475. ret = false;
  1476. }
  1477. __atomic_sub_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1478. return ret;
  1479. }
  1480. Word_t pgc_page_section(PGC_PAGE *page) {
  1481. return page->section;
  1482. }
  1483. Word_t pgc_page_metric(PGC_PAGE *page) {
  1484. return page->metric_id;
  1485. }
  1486. time_t pgc_page_start_time_s(PGC_PAGE *page) {
  1487. return page->start_time_s;
  1488. }
  1489. time_t pgc_page_end_time_s(PGC_PAGE *page) {
  1490. return page->end_time_s;
  1491. }
  1492. time_t pgc_page_update_every_s(PGC_PAGE *page) {
  1493. return page->update_every_s;
  1494. }
  1495. time_t pgc_page_fix_update_every(PGC_PAGE *page, time_t update_every_s) {
  1496. if(page->update_every_s == 0)
  1497. page->update_every_s = update_every_s;
  1498. return page->update_every_s;
  1499. }
  1500. time_t pgc_page_fix_end_time_s(PGC_PAGE *page, time_t end_time_s) {
  1501. page->end_time_s = end_time_s;
  1502. return page->end_time_s;
  1503. }
  1504. void *pgc_page_data(PGC_PAGE *page) {
  1505. return page->data;
  1506. }
  1507. void *pgc_page_custom_data(PGC *cache, PGC_PAGE *page) {
  1508. if(cache->config.additional_bytes_per_page)
  1509. return page->custom_data;
  1510. return NULL;
  1511. }
  1512. size_t pgc_page_data_size(PGC *cache, PGC_PAGE *page) {
  1513. return page_size_from_assumed_size(cache, page->assumed_size);
  1514. }
  1515. bool pgc_is_page_hot(PGC_PAGE *page) {
  1516. return is_page_hot(page);
  1517. }
  1518. bool pgc_is_page_dirty(PGC_PAGE *page) {
  1519. return is_page_dirty(page);
  1520. }
  1521. bool pgc_is_page_clean(PGC_PAGE *page) {
  1522. return is_page_clean(page);
  1523. }
  1524. void pgc_reset_hot_max(PGC *cache) {
  1525. size_t entries = __atomic_load_n(&cache->hot.stats->entries, __ATOMIC_RELAXED);
  1526. size_t size = __atomic_load_n(&cache->hot.stats->size, __ATOMIC_RELAXED);
  1527. __atomic_store_n(&cache->hot.stats->max_entries, entries, __ATOMIC_RELAXED);
  1528. __atomic_store_n(&cache->hot.stats->max_size, size, __ATOMIC_RELAXED);
  1529. size_t size_to_evict = 0;
  1530. cache_usage_per1000(cache, &size_to_evict);
  1531. evict_pages(cache, 0, 0, true, false);
  1532. }
  1533. void pgc_set_dynamic_target_cache_size_callback(PGC *cache, dynamic_target_cache_size_callback callback) {
  1534. cache->config.dynamic_target_size_cb = callback;
  1535. size_t size_to_evict = 0;
  1536. cache_usage_per1000(cache, &size_to_evict);
  1537. evict_pages(cache, 0, 0, true, false);
  1538. }
  1539. size_t pgc_get_current_cache_size(PGC *cache) {
  1540. cache_usage_per1000(cache, NULL);
  1541. return __atomic_load_n(&cache->stats.current_cache_size, __ATOMIC_RELAXED);
  1542. }
  1543. size_t pgc_get_wanted_cache_size(PGC *cache) {
  1544. cache_usage_per1000(cache, NULL);
  1545. return __atomic_load_n(&cache->stats.wanted_cache_size, __ATOMIC_RELAXED);
  1546. }
  1547. bool pgc_evict_pages(PGC *cache, size_t max_skip, size_t max_evict) {
  1548. bool under_pressure = cache_needs_space_aggressively(cache);
  1549. return evict_pages(cache,
  1550. under_pressure ? 0 : max_skip,
  1551. under_pressure ? 0 : max_evict,
  1552. true, false);
  1553. }
  1554. bool pgc_flush_pages(PGC *cache, size_t max_flushes) {
  1555. bool under_pressure = flushing_critical(cache);
  1556. return flush_pages(cache, under_pressure ? 0 : max_flushes, PGC_SECTION_ALL, true, false);
  1557. }
  1558. void pgc_page_hot_set_end_time_s(PGC *cache __maybe_unused, PGC_PAGE *page, time_t end_time_s) {
  1559. internal_fatal(!is_page_hot(page),
  1560. "DBENGINE CACHE: end_time_s update on non-hot page");
  1561. internal_fatal(end_time_s < __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
  1562. "DBENGINE CACHE: end_time_s is not bigger than existing");
  1563. __atomic_store_n(&page->end_time_s, end_time_s, __ATOMIC_RELAXED);
  1564. #ifdef PGC_COUNT_POINTS_COLLECTED
  1565. __atomic_add_fetch(&cache->stats.points_collected, 1, __ATOMIC_RELAXED);
  1566. #endif
  1567. }
  1568. PGC_PAGE *pgc_page_get_and_acquire(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method) {
  1569. return page_find_and_acquire(cache, section, metric_id, start_time_s, method);
  1570. }
  1571. struct pgc_statistics pgc_get_statistics(PGC *cache) {
  1572. // FIXME - get the statistics atomically
  1573. return cache->stats;
  1574. }
  1575. size_t pgc_hot_and_dirty_entries(PGC *cache) {
  1576. size_t entries = 0;
  1577. entries += __atomic_load_n(&cache->hot.stats->entries, __ATOMIC_RELAXED);
  1578. entries += __atomic_load_n(&cache->dirty.stats->entries, __ATOMIC_RELAXED);
  1579. entries += __atomic_load_n(&cache->stats.flushing_entries, __ATOMIC_RELAXED);
  1580. entries += __atomic_load_n(&cache->stats.hot2dirty_entries, __ATOMIC_RELAXED);
  1581. return entries;
  1582. }
  1583. void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_fileno, uint8_t type, migrate_to_v2_callback cb, void *data) {
  1584. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.journal_v2_indexing_started, 1, __ATOMIC_RELAXED);
  1585. __atomic_add_fetch(&cache->stats.workers_jv2_flush, 1, __ATOMIC_RELAXED);
  1586. pgc_ll_lock(cache, &cache->hot);
  1587. Pvoid_t JudyL_metrics = NULL;
  1588. Pvoid_t JudyL_extents_pos = NULL;
  1589. size_t count_of_unique_extents = 0;
  1590. size_t count_of_unique_metrics = 0;
  1591. size_t count_of_unique_pages = 0;
  1592. size_t master_extent_index_id = 0;
  1593. Pvoid_t *section_pages_pptr = JudyLGet(cache->hot.sections_judy, section, PJE0);
  1594. if(!section_pages_pptr) {
  1595. pgc_ll_unlock(cache, &cache->hot);
  1596. return;
  1597. }
  1598. struct section_pages *sp = *section_pages_pptr;
  1599. if(!netdata_spinlock_trylock(&sp->migration_to_v2_spinlock)) {
  1600. internal_fatal(true, "DBENGINE: migration to journal v2 is already running for this section");
  1601. pgc_ll_unlock(cache, &cache->hot);
  1602. return;
  1603. }
  1604. ARAL *ar_mi = aral_by_size_acquire(sizeof(struct jv2_metrics_info));
  1605. ARAL *ar_pi = aral_by_size_acquire(sizeof(struct jv2_page_info));
  1606. ARAL *ar_ei = aral_by_size_acquire(sizeof(struct jv2_extents_info));
  1607. for(PGC_PAGE *page = sp->base; page ; page = page->link.next) {
  1608. struct extent_io_data *xio = (struct extent_io_data *)page->custom_data;
  1609. if(xio->fileno != datafile_fileno) continue;
  1610. if(page_flag_check(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2)) {
  1611. internal_fatal(true, "Migration to journal v2: page has already been migrated to v2");
  1612. continue;
  1613. }
  1614. if(!page_transition_trylock(cache, page)) {
  1615. internal_fatal(true, "Migration to journal v2: cannot get page transition lock");
  1616. continue;
  1617. }
  1618. if(!page_acquire(cache, page)) {
  1619. internal_fatal(true, "Migration to journal v2: cannot acquire page for migration to v2");
  1620. continue;
  1621. }
  1622. page_flag_set(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2);
  1623. pgc_ll_unlock(cache, &cache->hot);
  1624. // update the extents JudyL
  1625. size_t current_extent_index_id;
  1626. Pvoid_t *PValue = JudyLIns(&JudyL_extents_pos, xio->pos, PJE0);
  1627. if(!PValue || *PValue == PJERR)
  1628. fatal("Corrupted JudyL extents pos");
  1629. struct jv2_extents_info *ei;
  1630. if(!*PValue) {
  1631. ei = aral_mallocz(ar_ei); // callocz(1, sizeof(struct jv2_extents_info));
  1632. ei->pos = xio->pos;
  1633. ei->bytes = xio->bytes;
  1634. ei->number_of_pages = 1;
  1635. ei->index = master_extent_index_id++;
  1636. *PValue = ei;
  1637. count_of_unique_extents++;
  1638. }
  1639. else {
  1640. ei = *PValue;
  1641. ei->number_of_pages++;
  1642. }
  1643. current_extent_index_id = ei->index;
  1644. // update the metrics JudyL
  1645. PValue = JudyLIns(&JudyL_metrics, page->metric_id, PJE0);
  1646. if(!PValue || *PValue == PJERR)
  1647. fatal("Corrupted JudyL metrics");
  1648. struct jv2_metrics_info *mi;
  1649. if(!*PValue) {
  1650. mi = aral_mallocz(ar_mi); // callocz(1, sizeof(struct jv2_metrics_info));
  1651. mi->uuid = mrg_metric_uuid(main_mrg, (METRIC *)page->metric_id);
  1652. mi->first_time_s = page->start_time_s;
  1653. mi->last_time_s = page->end_time_s;
  1654. mi->number_of_pages = 1;
  1655. mi->page_list_header = 0;
  1656. mi->JudyL_pages_by_start_time = NULL;
  1657. *PValue = mi;
  1658. count_of_unique_metrics++;
  1659. }
  1660. else {
  1661. mi = *PValue;
  1662. mi->number_of_pages++;
  1663. if(page->start_time_s < mi->first_time_s)
  1664. mi->first_time_s = page->start_time_s;
  1665. if(page->end_time_s > mi->last_time_s)
  1666. mi->last_time_s = page->end_time_s;
  1667. }
  1668. PValue = JudyLIns(&mi->JudyL_pages_by_start_time, page->start_time_s, PJE0);
  1669. if(!PValue || *PValue == PJERR)
  1670. fatal("Corrupted JudyL metric pages");
  1671. if(!*PValue) {
  1672. struct jv2_page_info *pi = aral_mallocz(ar_pi); // callocz(1, (sizeof(struct jv2_page_info)));
  1673. pi->start_time_s = page->start_time_s;
  1674. pi->end_time_s = page->end_time_s;
  1675. pi->update_every_s = page->update_every_s;
  1676. pi->page_length = page_size_from_assumed_size(cache, page->assumed_size);
  1677. pi->page = page;
  1678. pi->extent_index = current_extent_index_id;
  1679. pi->custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL;
  1680. *PValue = pi;
  1681. count_of_unique_pages++;
  1682. }
  1683. else {
  1684. // impossible situation
  1685. internal_fatal(true, "Page is already in JudyL metric pages");
  1686. page_flag_clear(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2);
  1687. page_transition_unlock(cache, page);
  1688. page_release(cache, page, false);
  1689. }
  1690. pgc_ll_lock(cache, &cache->hot);
  1691. }
  1692. netdata_spinlock_unlock(&sp->migration_to_v2_spinlock);
  1693. pgc_ll_unlock(cache, &cache->hot);
  1694. // callback
  1695. cb(section, datafile_fileno, type, JudyL_metrics, JudyL_extents_pos, count_of_unique_extents, count_of_unique_metrics, count_of_unique_pages, data);
  1696. {
  1697. Pvoid_t *PValue1;
  1698. bool metric_id_first = true;
  1699. Word_t metric_id = 0;
  1700. while ((PValue1 = JudyLFirstThenNext(JudyL_metrics, &metric_id, &metric_id_first))) {
  1701. struct jv2_metrics_info *mi = *PValue1;
  1702. Pvoid_t *PValue2;
  1703. bool start_time_first = true;
  1704. Word_t start_time = 0;
  1705. while ((PValue2 = JudyLFirstThenNext(mi->JudyL_pages_by_start_time, &start_time, &start_time_first))) {
  1706. struct jv2_page_info *pi = *PValue2;
  1707. page_transition_unlock(cache, pi->page);
  1708. pgc_page_hot_to_dirty_and_release(cache, pi->page);
  1709. // make_acquired_page_clean_and_evict_or_page_release(cache, pi->page);
  1710. aral_freez(ar_pi, pi);
  1711. }
  1712. JudyLFreeArray(&mi->JudyL_pages_by_start_time, PJE0);
  1713. aral_freez(ar_mi, mi);
  1714. }
  1715. JudyLFreeArray(&JudyL_metrics, PJE0);
  1716. }
  1717. {
  1718. Pvoid_t *PValue;
  1719. bool extent_pos_first = true;
  1720. Word_t extent_pos = 0;
  1721. while ((PValue = JudyLFirstThenNext(JudyL_extents_pos, &extent_pos, &extent_pos_first))) {
  1722. struct jv2_extents_info *ei = *PValue;
  1723. aral_freez(ar_ei, ei);
  1724. }
  1725. JudyLFreeArray(&JudyL_extents_pos, PJE0);
  1726. }
  1727. aral_by_size_release(ar_ei);
  1728. aral_by_size_release(ar_pi);
  1729. aral_by_size_release(ar_mi);
  1730. __atomic_sub_fetch(&cache->stats.workers_jv2_flush, 1, __ATOMIC_RELAXED);
  1731. }
  1732. static bool match_page_data(PGC_PAGE *page, void *data) {
  1733. return (page->data == data);
  1734. }
  1735. void pgc_open_evict_clean_pages_of_datafile(PGC *cache, struct rrdengine_datafile *datafile) {
  1736. evict_pages_with_filter(cache, 0, 0, true, true, match_page_data, datafile);
  1737. }
  1738. size_t pgc_count_clean_pages_having_data_ptr(PGC *cache, Word_t section, void *ptr) {
  1739. size_t found = 0;
  1740. pgc_ll_lock(cache, &cache->clean);
  1741. for(PGC_PAGE *page = cache->clean.base; page ;page = page->link.next)
  1742. found += (page->data == ptr && page->section == section) ? 1 : 0;
  1743. pgc_ll_unlock(cache, &cache->clean);
  1744. return found;
  1745. }
  1746. size_t pgc_count_hot_pages_having_data_ptr(PGC *cache, Word_t section, void *ptr) {
  1747. size_t found = 0;
  1748. pgc_ll_lock(cache, &cache->hot);
  1749. Pvoid_t *section_pages_pptr = JudyLGet(cache->hot.sections_judy, section, PJE0);
  1750. if(section_pages_pptr) {
  1751. struct section_pages *sp = *section_pages_pptr;
  1752. for(PGC_PAGE *page = sp->base; page ;page = page->link.next)
  1753. found += (page->data == ptr) ? 1 : 0;
  1754. }
  1755. pgc_ll_unlock(cache, &cache->hot);
  1756. return found;
  1757. }
  1758. // ----------------------------------------------------------------------------
  1759. // unittest
  1760. static void unittest_free_clean_page_callback(PGC *cache __maybe_unused, PGC_ENTRY entry __maybe_unused) {
  1761. ;
  1762. }
  1763. static void unittest_save_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused) {
  1764. ;
  1765. }
  1766. #ifdef PGC_STRESS_TEST
  1767. struct {
  1768. bool stop;
  1769. PGC *cache;
  1770. PGC_PAGE **metrics;
  1771. size_t clean_metrics;
  1772. size_t hot_metrics;
  1773. time_t first_time_t;
  1774. time_t last_time_t;
  1775. size_t cache_size;
  1776. size_t query_threads;
  1777. size_t collect_threads;
  1778. size_t partitions;
  1779. size_t points_per_page;
  1780. time_t time_per_collection_ut;
  1781. time_t time_per_query_ut;
  1782. time_t time_per_flush_ut;
  1783. PGC_OPTIONS options;
  1784. char rand_statebufs[1024];
  1785. struct random_data *random_data;
  1786. } pgc_uts = {
  1787. .stop = false,
  1788. .metrics = NULL,
  1789. .clean_metrics = 100000,
  1790. .hot_metrics = 1000000,
  1791. .first_time_t = 100000000,
  1792. .last_time_t = 0,
  1793. .cache_size = 0, // get the default (8MB)
  1794. .collect_threads = 16,
  1795. .query_threads = 16,
  1796. .partitions = 0, // get the default (system cpus)
  1797. .options = PGC_OPTIONS_AUTOSCALE,/* PGC_OPTIONS_FLUSH_PAGES_INLINE | PGC_OPTIONS_EVICT_PAGES_INLINE,*/
  1798. .points_per_page = 10,
  1799. .time_per_collection_ut = 1000000,
  1800. .time_per_query_ut = 250,
  1801. .time_per_flush_ut = 100,
  1802. .rand_statebufs = {},
  1803. .random_data = NULL,
  1804. };
  1805. void *unittest_stress_test_collector(void *ptr) {
  1806. size_t id = *((size_t *)ptr);
  1807. size_t metric_start = pgc_uts.clean_metrics;
  1808. size_t metric_end = pgc_uts.clean_metrics + pgc_uts.hot_metrics;
  1809. size_t number_of_metrics = metric_end - metric_start;
  1810. size_t per_collector_metrics = number_of_metrics / pgc_uts.collect_threads;
  1811. metric_start = metric_start + per_collector_metrics * id + 1;
  1812. metric_end = metric_start + per_collector_metrics - 1;
  1813. time_t start_time_t = pgc_uts.first_time_t + 1;
  1814. heartbeat_t hb;
  1815. heartbeat_init(&hb);
  1816. while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1817. // info("COLLECTOR %zu: collecting metrics %zu to %zu, from %ld to %lu", id, metric_start, metric_end, start_time_t, start_time_t + pgc_uts.points_per_page);
  1818. netdata_thread_disable_cancelability();
  1819. for (size_t i = metric_start; i < metric_end; i++) {
  1820. bool added;
  1821. pgc_uts.metrics[i] = pgc_page_add_and_acquire(pgc_uts.cache, (PGC_ENTRY) {
  1822. .section = 1,
  1823. .metric_id = i,
  1824. .start_time_t = start_time_t,
  1825. .end_time_t = start_time_t,
  1826. .update_every = 1,
  1827. .size = 4096,
  1828. .data = NULL,
  1829. .hot = true,
  1830. }, &added);
  1831. if(!pgc_is_page_hot(pgc_uts.metrics[i]) || !added) {
  1832. pgc_page_release(pgc_uts.cache, pgc_uts.metrics[i]);
  1833. pgc_uts.metrics[i] = NULL;
  1834. }
  1835. }
  1836. time_t end_time_t = start_time_t + (time_t)pgc_uts.points_per_page;
  1837. while(++start_time_t <= end_time_t && !__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1838. heartbeat_next(&hb, pgc_uts.time_per_collection_ut);
  1839. for (size_t i = metric_start; i < metric_end; i++) {
  1840. if(pgc_uts.metrics[i])
  1841. pgc_page_hot_set_end_time_t(pgc_uts.cache, pgc_uts.metrics[i], start_time_t);
  1842. }
  1843. __atomic_store_n(&pgc_uts.last_time_t, start_time_t, __ATOMIC_RELAXED);
  1844. }
  1845. for (size_t i = metric_start; i < metric_end; i++) {
  1846. if (pgc_uts.metrics[i]) {
  1847. if(i % 10 == 0)
  1848. pgc_page_to_clean_evict_or_release(pgc_uts.cache, pgc_uts.metrics[i]);
  1849. else
  1850. pgc_page_hot_to_dirty_and_release(pgc_uts.cache, pgc_uts.metrics[i]);
  1851. }
  1852. }
  1853. netdata_thread_enable_cancelability();
  1854. }
  1855. return ptr;
  1856. }
  1857. void *unittest_stress_test_queries(void *ptr) {
  1858. size_t id = *((size_t *)ptr);
  1859. struct random_data *random_data = &pgc_uts.random_data[id];
  1860. size_t start = 0;
  1861. size_t end = pgc_uts.clean_metrics + pgc_uts.hot_metrics;
  1862. while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1863. netdata_thread_disable_cancelability();
  1864. int32_t random_number;
  1865. random_r(random_data, &random_number);
  1866. size_t metric_id = random_number % (end - start);
  1867. time_t start_time_t = pgc_uts.first_time_t;
  1868. time_t end_time_t = __atomic_load_n(&pgc_uts.last_time_t, __ATOMIC_RELAXED);
  1869. if(end_time_t <= start_time_t)
  1870. end_time_t = start_time_t + 1;
  1871. size_t pages = (end_time_t - start_time_t) / pgc_uts.points_per_page + 1;
  1872. PGC_PAGE *array[pages];
  1873. for(size_t i = 0; i < pages ;i++)
  1874. array[i] = NULL;
  1875. // find the pages the cache has
  1876. for(size_t i = 0; i < pages ;i++) {
  1877. time_t page_start_time = start_time_t + (time_t)(i * pgc_uts.points_per_page);
  1878. array[i] = pgc_page_get_and_acquire(pgc_uts.cache, 1, metric_id,
  1879. page_start_time, (i < pages - 1)?PGC_SEARCH_EXACT:PGC_SEARCH_CLOSEST);
  1880. }
  1881. // load the rest of the pages
  1882. for(size_t i = 0; i < pages ;i++) {
  1883. if(array[i]) continue;
  1884. time_t page_start_time = start_time_t + (time_t)(i * pgc_uts.points_per_page);
  1885. array[i] = pgc_page_add_and_acquire(pgc_uts.cache, (PGC_ENTRY) {
  1886. .section = 1,
  1887. .metric_id = metric_id,
  1888. .start_time_t = page_start_time,
  1889. .end_time_t = page_start_time + (time_t)pgc_uts.points_per_page,
  1890. .update_every = 1,
  1891. .size = 4096,
  1892. .data = NULL,
  1893. .hot = false,
  1894. }, NULL);
  1895. }
  1896. // do the query
  1897. // ...
  1898. struct timespec work_duration = {.tv_sec = 0, .tv_nsec = pgc_uts.time_per_query_ut * NSEC_PER_USEC };
  1899. nanosleep(&work_duration, NULL);
  1900. // release the pages
  1901. for(size_t i = 0; i < pages ;i++) {
  1902. if(!array[i]) continue;
  1903. pgc_page_release(pgc_uts.cache, array[i]);
  1904. array[i] = NULL;
  1905. }
  1906. netdata_thread_enable_cancelability();
  1907. }
  1908. return ptr;
  1909. }
  1910. void *unittest_stress_test_service(void *ptr) {
  1911. heartbeat_t hb;
  1912. heartbeat_init(&hb);
  1913. while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1914. heartbeat_next(&hb, 1 * USEC_PER_SEC);
  1915. pgc_flush_pages(pgc_uts.cache, 1000);
  1916. pgc_evict_pages(pgc_uts.cache, 0, 0);
  1917. }
  1918. return ptr;
  1919. }
  1920. static void unittest_stress_test_save_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused) {
  1921. // info("SAVE %zu pages", entries);
  1922. if(!pgc_uts.stop) {
  1923. usec_t t = pgc_uts.time_per_flush_ut;
  1924. if(t > 0) {
  1925. struct timespec work_duration = {
  1926. .tv_sec = t / USEC_PER_SEC,
  1927. .tv_nsec = (long) ((t % USEC_PER_SEC) * NSEC_PER_USEC)
  1928. };
  1929. nanosleep(&work_duration, NULL);
  1930. }
  1931. }
  1932. }
  1933. void unittest_stress_test(void) {
  1934. pgc_uts.cache = pgc_create(pgc_uts.cache_size * 1024 * 1024,
  1935. unittest_free_clean_page_callback,
  1936. 64, unittest_stress_test_save_dirty_page_callback,
  1937. 1000, 10000, 1,
  1938. pgc_uts.options, pgc_uts.partitions, 0);
  1939. pgc_uts.metrics = callocz(pgc_uts.clean_metrics + pgc_uts.hot_metrics, sizeof(PGC_PAGE *));
  1940. pthread_t service_thread;
  1941. netdata_thread_create(&service_thread, "SERVICE",
  1942. NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
  1943. unittest_stress_test_service, NULL);
  1944. pthread_t collect_threads[pgc_uts.collect_threads];
  1945. size_t collect_thread_ids[pgc_uts.collect_threads];
  1946. for(size_t i = 0; i < pgc_uts.collect_threads ;i++) {
  1947. collect_thread_ids[i] = i;
  1948. char buffer[100 + 1];
  1949. snprintfz(buffer, 100, "COLLECT_%zu", i);
  1950. netdata_thread_create(&collect_threads[i], buffer,
  1951. NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
  1952. unittest_stress_test_collector, &collect_thread_ids[i]);
  1953. }
  1954. pthread_t queries_threads[pgc_uts.query_threads];
  1955. size_t query_thread_ids[pgc_uts.query_threads];
  1956. pgc_uts.random_data = callocz(pgc_uts.query_threads, sizeof(struct random_data));
  1957. for(size_t i = 0; i < pgc_uts.query_threads ;i++) {
  1958. query_thread_ids[i] = i;
  1959. char buffer[100 + 1];
  1960. snprintfz(buffer, 100, "QUERY_%zu", i);
  1961. initstate_r(1, pgc_uts.rand_statebufs, 1024, &pgc_uts.random_data[i]);
  1962. netdata_thread_create(&queries_threads[i], buffer,
  1963. NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
  1964. unittest_stress_test_queries, &query_thread_ids[i]);
  1965. }
  1966. heartbeat_t hb;
  1967. heartbeat_init(&hb);
  1968. struct {
  1969. size_t entries;
  1970. size_t added;
  1971. size_t deleted;
  1972. size_t referenced;
  1973. size_t hot_entries;
  1974. size_t hot_added;
  1975. size_t hot_deleted;
  1976. size_t dirty_entries;
  1977. size_t dirty_added;
  1978. size_t dirty_deleted;
  1979. size_t clean_entries;
  1980. size_t clean_added;
  1981. size_t clean_deleted;
  1982. size_t searches_exact;
  1983. size_t searches_exact_hits;
  1984. size_t searches_closest;
  1985. size_t searches_closest_hits;
  1986. size_t collections;
  1987. size_t events_cache_under_severe_pressure;
  1988. size_t events_cache_needs_space_90;
  1989. size_t events_flush_critical;
  1990. } stats = {}, old_stats = {};
  1991. for(int i = 0; i < 86400 ;i++) {
  1992. heartbeat_next(&hb, 1 * USEC_PER_SEC);
  1993. old_stats = stats;
  1994. stats.entries = __atomic_load_n(&pgc_uts.cache->stats.entries, __ATOMIC_RELAXED);
  1995. stats.added = __atomic_load_n(&pgc_uts.cache->stats.added_entries, __ATOMIC_RELAXED);
  1996. stats.deleted = __atomic_load_n(&pgc_uts.cache->stats.removed_entries, __ATOMIC_RELAXED);
  1997. stats.referenced = __atomic_load_n(&pgc_uts.cache->stats.referenced_entries, __ATOMIC_RELAXED);
  1998. stats.hot_entries = __atomic_load_n(&pgc_uts.cache->hot.stats->entries, __ATOMIC_RELAXED);
  1999. stats.hot_added = __atomic_load_n(&pgc_uts.cache->hot.stats->added_entries, __ATOMIC_RELAXED);
  2000. stats.hot_deleted = __atomic_load_n(&pgc_uts.cache->hot.stats->removed_entries, __ATOMIC_RELAXED);
  2001. stats.dirty_entries = __atomic_load_n(&pgc_uts.cache->dirty.stats->entries, __ATOMIC_RELAXED);
  2002. stats.dirty_added = __atomic_load_n(&pgc_uts.cache->dirty.stats->added_entries, __ATOMIC_RELAXED);
  2003. stats.dirty_deleted = __atomic_load_n(&pgc_uts.cache->dirty.stats->removed_entries, __ATOMIC_RELAXED);
  2004. stats.clean_entries = __atomic_load_n(&pgc_uts.cache->clean.stats->entries, __ATOMIC_RELAXED);
  2005. stats.clean_added = __atomic_load_n(&pgc_uts.cache->clean.stats->added_entries, __ATOMIC_RELAXED);
  2006. stats.clean_deleted = __atomic_load_n(&pgc_uts.cache->clean.stats->removed_entries, __ATOMIC_RELAXED);
  2007. stats.searches_exact = __atomic_load_n(&pgc_uts.cache->stats.searches_exact, __ATOMIC_RELAXED);
  2008. stats.searches_exact_hits = __atomic_load_n(&pgc_uts.cache->stats.searches_exact_hits, __ATOMIC_RELAXED);
  2009. stats.searches_closest = __atomic_load_n(&pgc_uts.cache->stats.searches_closest, __ATOMIC_RELAXED);
  2010. stats.searches_closest_hits = __atomic_load_n(&pgc_uts.cache->stats.searches_closest_hits, __ATOMIC_RELAXED);
  2011. stats.events_cache_under_severe_pressure = __atomic_load_n(&pgc_uts.cache->stats.events_cache_under_severe_pressure, __ATOMIC_RELAXED);
  2012. stats.events_cache_needs_space_90 = __atomic_load_n(&pgc_uts.cache->stats.events_cache_needs_space_aggressively, __ATOMIC_RELAXED);
  2013. stats.events_flush_critical = __atomic_load_n(&pgc_uts.cache->stats.events_flush_critical, __ATOMIC_RELAXED);
  2014. size_t searches_exact = stats.searches_exact - old_stats.searches_exact;
  2015. size_t searches_closest = stats.searches_closest - old_stats.searches_closest;
  2016. size_t hit_exact = stats.searches_exact_hits - old_stats.searches_exact_hits;
  2017. size_t hit_closest = stats.searches_closest_hits - old_stats.searches_closest_hits;
  2018. double hit_exact_pc = (searches_exact > 0) ? (double)hit_exact * 100.0 / (double)searches_exact : 0.0;
  2019. double hit_closest_pc = (searches_closest > 0) ? (double)hit_closest * 100.0 / (double)searches_closest : 0.0;
  2020. #ifdef PGC_COUNT_POINTS_COLLECTED
  2021. stats.collections = __atomic_load_n(&pgc_uts.cache->stats.points_collected, __ATOMIC_RELAXED);
  2022. #endif
  2023. char *cache_status = "N";
  2024. if(stats.events_cache_under_severe_pressure > old_stats.events_cache_under_severe_pressure)
  2025. cache_status = "F";
  2026. else if(stats.events_cache_needs_space_90 > old_stats.events_cache_needs_space_90)
  2027. cache_status = "f";
  2028. char *flushing_status = "N";
  2029. if(stats.events_flush_critical > old_stats.events_flush_critical)
  2030. flushing_status = "F";
  2031. info("PGS %5zuk +%4zuk/-%4zuk "
  2032. "| RF %5zuk "
  2033. "| HOT %5zuk +%4zuk -%4zuk "
  2034. "| DRT %s %5zuk +%4zuk -%4zuk "
  2035. "| CLN %s %5zuk +%4zuk -%4zuk "
  2036. "| SRCH %4zuk %4zuk, HIT %4.1f%% %4.1f%% "
  2037. #ifdef PGC_COUNT_POINTS_COLLECTED
  2038. "| CLCT %8.4f Mps"
  2039. #endif
  2040. , stats.entries / 1000
  2041. , (stats.added - old_stats.added) / 1000, (stats.deleted - old_stats.deleted) / 1000
  2042. , stats.referenced / 1000
  2043. , stats.hot_entries / 1000, (stats.hot_added - old_stats.hot_added) / 1000, (stats.hot_deleted - old_stats.hot_deleted) / 1000
  2044. , flushing_status
  2045. , stats.dirty_entries / 1000
  2046. , (stats.dirty_added - old_stats.dirty_added) / 1000, (stats.dirty_deleted - old_stats.dirty_deleted) / 1000
  2047. , cache_status
  2048. , stats.clean_entries / 1000
  2049. , (stats.clean_added - old_stats.clean_added) / 1000, (stats.clean_deleted - old_stats.clean_deleted) / 1000
  2050. , searches_exact / 1000, searches_closest / 1000
  2051. , hit_exact_pc, hit_closest_pc
  2052. #ifdef PGC_COUNT_POINTS_COLLECTED
  2053. , (double)(stats.collections - old_stats.collections) / 1000.0 / 1000.0
  2054. #endif
  2055. );
  2056. }
  2057. info("Waiting for threads to stop...");
  2058. __atomic_store_n(&pgc_uts.stop, true, __ATOMIC_RELAXED);
  2059. netdata_thread_join(service_thread, NULL);
  2060. for(size_t i = 0; i < pgc_uts.collect_threads ;i++)
  2061. netdata_thread_join(collect_threads[i],NULL);
  2062. for(size_t i = 0; i < pgc_uts.query_threads ;i++)
  2063. netdata_thread_join(queries_threads[i],NULL);
  2064. pgc_destroy(pgc_uts.cache);
  2065. freez(pgc_uts.metrics);
  2066. freez(pgc_uts.random_data);
  2067. }
  2068. #endif
  2069. int pgc_unittest(void) {
  2070. PGC *cache = pgc_create("test",
  2071. 32 * 1024 * 1024, unittest_free_clean_page_callback,
  2072. 64, NULL, unittest_save_dirty_page_callback,
  2073. 10, 10, 1000, 10,
  2074. PGC_OPTIONS_DEFAULT, 1, 11);
  2075. // FIXME - unit tests
  2076. // - add clean page
  2077. // - add clean page again (should not add it)
  2078. // - release page (should decrement counters)
  2079. // - add hot page
  2080. // - add hot page again (should not add it)
  2081. // - turn hot page to dirty, with and without a reference counter to it
  2082. // - dirty pages are saved once there are enough of them
  2083. // - find page exact
  2084. // - find page (should return last)
  2085. // - find page (should return next)
  2086. // - page cache full (should evict)
  2087. // - on destroy, turn hot pages to dirty and save them
  2088. PGC_PAGE *page1 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
  2089. .section = 1,
  2090. .metric_id = 10,
  2091. .start_time_s = 100,
  2092. .end_time_s = 1000,
  2093. .size = 4096,
  2094. .data = NULL,
  2095. .hot = false,
  2096. .custom_data = (uint8_t *)"0123456789",
  2097. }, NULL);
  2098. if(strcmp(pgc_page_custom_data(cache, page1), "0123456789") != 0)
  2099. fatal("custom data do not work");
  2100. memcpy(pgc_page_custom_data(cache, page1), "ABCDEFGHIJ", 11);
  2101. if(strcmp(pgc_page_custom_data(cache, page1), "ABCDEFGHIJ") != 0)
  2102. fatal("custom data do not work");
  2103. pgc_page_release(cache, page1);
  2104. PGC_PAGE *page2 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
  2105. .section = 2,
  2106. .metric_id = 10,
  2107. .start_time_s = 1001,
  2108. .end_time_s = 2000,
  2109. .size = 4096,
  2110. .data = NULL,
  2111. .hot = true,
  2112. }, NULL);
  2113. pgc_page_hot_set_end_time_s(cache, page2, 2001);
  2114. pgc_page_hot_to_dirty_and_release(cache, page2);
  2115. PGC_PAGE *page3 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
  2116. .section = 3,
  2117. .metric_id = 10,
  2118. .start_time_s = 1001,
  2119. .end_time_s = 2000,
  2120. .size = 4096,
  2121. .data = NULL,
  2122. .hot = true,
  2123. }, NULL);
  2124. pgc_page_hot_set_end_time_s(cache, page3, 2001);
  2125. pgc_page_hot_to_dirty_and_release(cache, page3);
  2126. pgc_destroy(cache);
  2127. #ifdef PGC_STRESS_TEST
  2128. unittest_stress_test();
  2129. #endif
  2130. return 0;
  2131. }