cache.c 99 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744
  1. #include "cache.h"
  2. /* STATES AND TRANSITIONS
  3. *
  4. * entry | entry
  5. * v v
  6. * HOT -> DIRTY --> CLEAN --> EVICT
  7. * v | v
  8. * flush | evict
  9. * v | v
  10. * save | free
  11. * callback | callback
  12. *
  13. */
  14. typedef int32_t REFCOUNT;
  15. #define REFCOUNT_DELETING (-100)
  16. // to use ARAL uncomment the following line:
  17. #define PGC_WITH_ARAL 1
  18. typedef enum __attribute__ ((__packed__)) {
  19. // mutually exclusive flags
  20. PGC_PAGE_CLEAN = (1 << 0), // none of the following
  21. PGC_PAGE_DIRTY = (1 << 1), // contains unsaved data
  22. PGC_PAGE_HOT = (1 << 2), // currently being collected
  23. // flags related to various actions on each page
  24. PGC_PAGE_IS_BEING_DELETED = (1 << 3),
  25. PGC_PAGE_IS_BEING_MIGRATED_TO_V2 = (1 << 4),
  26. PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES = (1 << 5),
  27. PGC_PAGE_HAS_BEEN_ACCESSED = (1 << 6),
  28. } PGC_PAGE_FLAGS;
  29. #define page_flag_check(page, flag) (__atomic_load_n(&((page)->flags), __ATOMIC_ACQUIRE) & (flag))
  30. #define page_flag_set(page, flag) __atomic_or_fetch(&((page)->flags), flag, __ATOMIC_RELEASE)
  31. #define page_flag_clear(page, flag) __atomic_and_fetch(&((page)->flags), ~(flag), __ATOMIC_RELEASE)
  32. #define page_get_status_flags(page) page_flag_check(page, PGC_PAGE_HOT | PGC_PAGE_DIRTY | PGC_PAGE_CLEAN)
  33. #define is_page_hot(page) (page_get_status_flags(page) == PGC_PAGE_HOT)
  34. #define is_page_dirty(page) (page_get_status_flags(page) == PGC_PAGE_DIRTY)
  35. #define is_page_clean(page) (page_get_status_flags(page) == PGC_PAGE_CLEAN)
  36. struct pgc_page {
  37. // indexing data
  38. Word_t section;
  39. Word_t metric_id;
  40. time_t start_time_s;
  41. time_t end_time_s;
  42. uint32_t update_every_s;
  43. uint32_t assumed_size;
  44. REFCOUNT refcount;
  45. uint16_t accesses; // counts the number of accesses on this page
  46. PGC_PAGE_FLAGS flags;
  47. SPINLOCK transition_spinlock; // when the page changes between HOT, DIRTY, CLEAN, we have to get this lock
  48. struct {
  49. struct pgc_page *next;
  50. struct pgc_page *prev;
  51. } link;
  52. void *data;
  53. uint8_t custom_data[];
  54. // IMPORTANT!
  55. // THIS STRUCTURE NEEDS TO BE INITIALIZED BY HAND!
  56. };
  57. struct pgc_linked_list {
  58. SPINLOCK spinlock;
  59. union {
  60. PGC_PAGE *base;
  61. Pvoid_t sections_judy;
  62. };
  63. PGC_PAGE_FLAGS flags;
  64. size_t version;
  65. size_t last_version_checked;
  66. bool linked_list_in_sections_judy; // when true, we use 'sections_judy', otherwise we use 'base'
  67. struct pgc_queue_statistics *stats;
  68. };
  69. struct pgc {
  70. struct {
  71. char name[PGC_NAME_MAX + 1];
  72. size_t partitions;
  73. size_t clean_size;
  74. size_t max_dirty_pages_per_call;
  75. size_t max_pages_per_inline_eviction;
  76. size_t max_skip_pages_per_inline_eviction;
  77. size_t max_flushes_inline;
  78. size_t max_workers_evict_inline;
  79. size_t additional_bytes_per_page;
  80. free_clean_page_callback pgc_free_clean_cb;
  81. save_dirty_page_callback pgc_save_dirty_cb;
  82. save_dirty_init_callback pgc_save_init_cb;
  83. PGC_OPTIONS options;
  84. size_t severe_pressure_per1000;
  85. size_t aggressive_evict_per1000;
  86. size_t healthy_size_per1000;
  87. size_t evict_low_threshold_per1000;
  88. dynamic_target_cache_size_callback dynamic_target_size_cb;
  89. } config;
  90. #ifdef PGC_WITH_ARAL
  91. ARAL **aral;
  92. #endif
  93. PGC_CACHE_LINE_PADDING(0);
  94. struct pgc_index {
  95. RW_SPINLOCK rw_spinlock;
  96. Pvoid_t sections_judy;
  97. PGC_CACHE_LINE_PADDING(0);
  98. } *index;
  99. PGC_CACHE_LINE_PADDING(1);
  100. struct {
  101. SPINLOCK spinlock;
  102. size_t per1000;
  103. } usage;
  104. PGC_CACHE_LINE_PADDING(2);
  105. struct pgc_linked_list clean; // LRU is applied here to free memory from the cache
  106. PGC_CACHE_LINE_PADDING(3);
  107. struct pgc_linked_list dirty; // in the dirty list, pages are ordered the way they were marked dirty
  108. PGC_CACHE_LINE_PADDING(4);
  109. struct pgc_linked_list hot; // in the hot list, pages are order the way they were marked hot
  110. PGC_CACHE_LINE_PADDING(5);
  111. struct pgc_statistics stats; // statistics
  112. #ifdef NETDATA_PGC_POINTER_CHECK
  113. PGC_CACHE_LINE_PADDING(6);
  114. netdata_mutex_t global_pointer_registry_mutex;
  115. Pvoid_t global_pointer_registry;
  116. #endif
  117. };
  118. // ----------------------------------------------------------------------------
  119. // validate each pointer is indexed once - internal checks only
  120. static inline void pointer_index_init(PGC *cache __maybe_unused) {
  121. #ifdef NETDATA_PGC_POINTER_CHECK
  122. netdata_mutex_init(&cache->global_pointer_registry_mutex);
  123. #else
  124. ;
  125. #endif
  126. }
  127. static inline void pointer_destroy_index(PGC *cache __maybe_unused) {
  128. #ifdef NETDATA_PGC_POINTER_CHECK
  129. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  130. JudyHSFreeArray(&cache->global_pointer_registry, PJE0);
  131. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  132. #else
  133. ;
  134. #endif
  135. }
  136. static inline void pointer_add(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
  137. #ifdef NETDATA_PGC_POINTER_CHECK
  138. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  139. Pvoid_t *PValue = JudyHSIns(&cache->global_pointer_registry, &page, sizeof(void *), PJE0);
  140. if(*PValue != NULL)
  141. fatal("pointer already exists in registry");
  142. *PValue = page;
  143. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  144. #else
  145. ;
  146. #endif
  147. }
  148. static inline void pointer_check(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
  149. #ifdef NETDATA_PGC_POINTER_CHECK
  150. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  151. Pvoid_t *PValue = JudyHSGet(cache->global_pointer_registry, &page, sizeof(void *));
  152. if(PValue == NULL)
  153. fatal("pointer is not found in registry");
  154. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  155. #else
  156. ;
  157. #endif
  158. }
  159. static inline void pointer_del(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
  160. #ifdef NETDATA_PGC_POINTER_CHECK
  161. netdata_mutex_lock(&cache->global_pointer_registry_mutex);
  162. int ret = JudyHSDel(&cache->global_pointer_registry, &page, sizeof(void *), PJE0);
  163. if(!ret)
  164. fatal("pointer to be deleted does not exist in registry");
  165. netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
  166. #else
  167. ;
  168. #endif
  169. }
  170. // ----------------------------------------------------------------------------
  171. // locking
  172. static inline size_t pgc_indexing_partition(PGC *cache, Word_t metric_id) {
  173. static __thread Word_t last_metric_id = 0;
  174. static __thread size_t last_partition = 0;
  175. if(metric_id == last_metric_id || cache->config.partitions == 1)
  176. return last_partition;
  177. last_metric_id = metric_id;
  178. last_partition = indexing_partition(metric_id, cache->config.partitions);
  179. return last_partition;
  180. }
  181. static inline void pgc_index_read_lock(PGC *cache, size_t partition) {
  182. rw_spinlock_read_lock(&cache->index[partition].rw_spinlock);
  183. }
  184. static inline void pgc_index_read_unlock(PGC *cache, size_t partition) {
  185. rw_spinlock_read_unlock(&cache->index[partition].rw_spinlock);
  186. }
  187. static inline void pgc_index_write_lock(PGC *cache, size_t partition) {
  188. rw_spinlock_write_lock(&cache->index[partition].rw_spinlock);
  189. }
  190. static inline void pgc_index_write_unlock(PGC *cache, size_t partition) {
  191. rw_spinlock_write_unlock(&cache->index[partition].rw_spinlock);
  192. }
  193. static inline bool pgc_ll_trylock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
  194. return spinlock_trylock(&ll->spinlock);
  195. }
  196. static inline void pgc_ll_lock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
  197. spinlock_lock(&ll->spinlock);
  198. }
  199. static inline void pgc_ll_unlock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
  200. spinlock_unlock(&ll->spinlock);
  201. }
  202. static inline bool page_transition_trylock(PGC *cache __maybe_unused, PGC_PAGE *page) {
  203. return spinlock_trylock(&page->transition_spinlock);
  204. }
  205. static inline void page_transition_lock(PGC *cache __maybe_unused, PGC_PAGE *page) {
  206. spinlock_lock(&page->transition_spinlock);
  207. }
  208. static inline void page_transition_unlock(PGC *cache __maybe_unused, PGC_PAGE *page) {
  209. spinlock_unlock(&page->transition_spinlock);
  210. }
  211. // ----------------------------------------------------------------------------
  212. // evictions control
  213. static inline size_t cache_usage_per1000(PGC *cache, size_t *size_to_evict) {
  214. if(size_to_evict)
  215. spinlock_lock(&cache->usage.spinlock);
  216. else if(!spinlock_trylock(&cache->usage.spinlock))
  217. return __atomic_load_n(&cache->usage.per1000, __ATOMIC_RELAXED);
  218. size_t current_cache_size;
  219. size_t wanted_cache_size;
  220. size_t per1000;
  221. size_t dirty = __atomic_load_n(&cache->dirty.stats->size, __ATOMIC_RELAXED);
  222. size_t hot = __atomic_load_n(&cache->hot.stats->size, __ATOMIC_RELAXED);
  223. if(cache->config.options & PGC_OPTIONS_AUTOSCALE) {
  224. size_t dirty_max = __atomic_load_n(&cache->dirty.stats->max_size, __ATOMIC_RELAXED);
  225. size_t hot_max = __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED);
  226. // our promise to users
  227. size_t max_size1 = MAX(hot_max, hot) * 2;
  228. // protection against slow flushing
  229. size_t max_size2 = hot_max + ((dirty_max < hot_max / 2) ? hot_max / 2 : dirty_max * 2);
  230. // the final wanted cache size
  231. wanted_cache_size = MIN(max_size1, max_size2);
  232. if(cache->config.dynamic_target_size_cb) {
  233. size_t wanted_cache_size_cb = cache->config.dynamic_target_size_cb();
  234. if(wanted_cache_size_cb > wanted_cache_size)
  235. wanted_cache_size = wanted_cache_size_cb;
  236. }
  237. if (wanted_cache_size < hot + dirty + cache->config.clean_size)
  238. wanted_cache_size = hot + dirty + cache->config.clean_size;
  239. }
  240. else
  241. wanted_cache_size = hot + dirty + cache->config.clean_size;
  242. // protection again huge queries
  243. // if huge queries are running, or huge amounts need to be saved
  244. // allow the cache to grow more (hot pages in main cache are also referenced)
  245. size_t referenced_size = __atomic_load_n(&cache->stats.referenced_size, __ATOMIC_RELAXED);
  246. if(unlikely(wanted_cache_size < referenced_size * 2 / 3))
  247. wanted_cache_size = referenced_size * 2 / 3;
  248. current_cache_size = __atomic_load_n(&cache->stats.size, __ATOMIC_RELAXED); // + pgc_aral_overhead();
  249. per1000 = (size_t)((unsigned long long)current_cache_size * 1000ULL / (unsigned long long)wanted_cache_size);
  250. __atomic_store_n(&cache->usage.per1000, per1000, __ATOMIC_RELAXED);
  251. __atomic_store_n(&cache->stats.wanted_cache_size, wanted_cache_size, __ATOMIC_RELAXED);
  252. __atomic_store_n(&cache->stats.current_cache_size, current_cache_size, __ATOMIC_RELAXED);
  253. spinlock_unlock(&cache->usage.spinlock);
  254. if(size_to_evict) {
  255. size_t target = (size_t)((unsigned long long)wanted_cache_size * (unsigned long long)cache->config.evict_low_threshold_per1000 / 1000ULL);
  256. if(current_cache_size > target)
  257. *size_to_evict = current_cache_size - target;
  258. else
  259. *size_to_evict = 0;
  260. }
  261. if(per1000 >= cache->config.severe_pressure_per1000)
  262. __atomic_add_fetch(&cache->stats.events_cache_under_severe_pressure, 1, __ATOMIC_RELAXED);
  263. else if(per1000 >= cache->config.aggressive_evict_per1000)
  264. __atomic_add_fetch(&cache->stats.events_cache_needs_space_aggressively, 1, __ATOMIC_RELAXED);
  265. return per1000;
  266. }
  267. static inline bool cache_pressure(PGC *cache, size_t limit) {
  268. return (cache_usage_per1000(cache, NULL) >= limit);
  269. }
  270. #define cache_under_severe_pressure(cache) cache_pressure(cache, (cache)->config.severe_pressure_per1000)
  271. #define cache_needs_space_aggressively(cache) cache_pressure(cache, (cache)->config.aggressive_evict_per1000)
  272. #define cache_above_healthy_limit(cache) cache_pressure(cache, (cache)->config.healthy_size_per1000)
  273. typedef bool (*evict_filter)(PGC_PAGE *page, void *data);
  274. static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evict, bool wait, bool all_of_them, evict_filter filter, void *data);
  275. #define evict_pages(cache, max_skip, max_evict, wait, all_of_them) evict_pages_with_filter(cache, max_skip, max_evict, wait, all_of_them, NULL, NULL)
  276. static inline void evict_on_clean_page_added(PGC *cache __maybe_unused) {
  277. if((cache->config.options & PGC_OPTIONS_EVICT_PAGES_INLINE) || cache_needs_space_aggressively(cache)) {
  278. evict_pages(cache,
  279. cache->config.max_skip_pages_per_inline_eviction,
  280. cache->config.max_pages_per_inline_eviction,
  281. false, false);
  282. }
  283. }
  284. static inline void evict_on_page_release_when_permitted(PGC *cache __maybe_unused) {
  285. if ((cache->config.options & PGC_OPTIONS_EVICT_PAGES_INLINE) || cache_under_severe_pressure(cache)) {
  286. evict_pages(cache,
  287. cache->config.max_skip_pages_per_inline_eviction,
  288. cache->config.max_pages_per_inline_eviction,
  289. false, false);
  290. }
  291. }
  292. // ----------------------------------------------------------------------------
  293. // flushing control
  294. static bool flush_pages(PGC *cache, size_t max_flushes, Word_t section, bool wait, bool all_of_them);
  295. static inline bool flushing_critical(PGC *cache) {
  296. if(unlikely(__atomic_load_n(&cache->dirty.stats->size, __ATOMIC_RELAXED) > __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED))) {
  297. __atomic_add_fetch(&cache->stats.events_flush_critical, 1, __ATOMIC_RELAXED);
  298. return true;
  299. }
  300. return false;
  301. }
  302. // ----------------------------------------------------------------------------
  303. // helpers
  304. static inline size_t page_assumed_size(PGC *cache, size_t size) {
  305. return size + (sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page + sizeof(Word_t) * 3);
  306. }
  307. static inline size_t page_size_from_assumed_size(PGC *cache, size_t assumed_size) {
  308. return assumed_size - (sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page + sizeof(Word_t) * 3);
  309. }
  310. // ----------------------------------------------------------------------------
  311. // Linked list management
  312. static inline void atomic_set_max(size_t *max, size_t desired) {
  313. size_t expected;
  314. expected = __atomic_load_n(max, __ATOMIC_RELAXED);
  315. do {
  316. if(expected >= desired)
  317. return;
  318. } while(!__atomic_compare_exchange_n(max, &expected, desired,
  319. false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
  320. }
  321. struct section_pages {
  322. SPINLOCK migration_to_v2_spinlock;
  323. size_t entries;
  324. size_t size;
  325. PGC_PAGE *base;
  326. };
  327. static ARAL *pgc_section_pages_aral = NULL;
  328. static void pgc_section_pages_static_aral_init(void) {
  329. static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER;
  330. if(unlikely(!pgc_section_pages_aral)) {
  331. spinlock_lock(&spinlock);
  332. // we have to check again
  333. if(!pgc_section_pages_aral)
  334. pgc_section_pages_aral = aral_create(
  335. "pgc_section",
  336. sizeof(struct section_pages),
  337. 0,
  338. 65536, NULL,
  339. NULL, NULL, false, false);
  340. spinlock_unlock(&spinlock);
  341. }
  342. }
  343. static inline void pgc_stats_ll_judy_change(PGC *cache, struct pgc_linked_list *ll, size_t mem_before_judyl, size_t mem_after_judyl) {
  344. if(mem_after_judyl > mem_before_judyl) {
  345. __atomic_add_fetch(&ll->stats->size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
  346. __atomic_add_fetch(&cache->stats.size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
  347. }
  348. else if(mem_after_judyl < mem_before_judyl) {
  349. __atomic_sub_fetch(&ll->stats->size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
  350. __atomic_sub_fetch(&cache->stats.size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
  351. }
  352. }
  353. static inline void pgc_stats_index_judy_change(PGC *cache, size_t mem_before_judyl, size_t mem_after_judyl) {
  354. if(mem_after_judyl > mem_before_judyl) {
  355. __atomic_add_fetch(&cache->stats.size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
  356. }
  357. else if(mem_after_judyl < mem_before_judyl) {
  358. __atomic_sub_fetch(&cache->stats.size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
  359. }
  360. }
  361. static void pgc_ll_add(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PGC_PAGE *page, bool having_lock) {
  362. if(!having_lock)
  363. pgc_ll_lock(cache, ll);
  364. internal_fatal(page_get_status_flags(page) != 0,
  365. "DBENGINE CACHE: invalid page flags, the page has %d, but it is should be %d",
  366. page_get_status_flags(page),
  367. 0);
  368. if(ll->linked_list_in_sections_judy) {
  369. size_t mem_before_judyl, mem_after_judyl;
  370. mem_before_judyl = JudyLMemUsed(ll->sections_judy);
  371. Pvoid_t *section_pages_pptr = JudyLIns(&ll->sections_judy, page->section, PJE0);
  372. mem_after_judyl = JudyLMemUsed(ll->sections_judy);
  373. struct section_pages *sp = *section_pages_pptr;
  374. if(!sp) {
  375. // sp = callocz(1, sizeof(struct section_pages));
  376. sp = aral_mallocz(pgc_section_pages_aral);
  377. memset(sp, 0, sizeof(struct section_pages));
  378. *section_pages_pptr = sp;
  379. mem_after_judyl += sizeof(struct section_pages);
  380. }
  381. pgc_stats_ll_judy_change(cache, ll, mem_before_judyl, mem_after_judyl);
  382. sp->entries++;
  383. sp->size += page->assumed_size;
  384. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(sp->base, page, link.prev, link.next);
  385. if((sp->entries % cache->config.max_dirty_pages_per_call) == 0)
  386. ll->version++;
  387. }
  388. else {
  389. // CLEAN pages end up here.
  390. // - New pages created as CLEAN, always have 1 access.
  391. // - DIRTY pages made CLEAN, depending on their accesses may be appended (accesses > 0) or prepended (accesses = 0).
  392. if(page->accesses || page_flag_check(page, PGC_PAGE_HAS_BEEN_ACCESSED | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES) == PGC_PAGE_HAS_BEEN_ACCESSED) {
  393. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
  394. page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  395. }
  396. else
  397. DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
  398. ll->version++;
  399. }
  400. page_flag_set(page, ll->flags);
  401. if(!having_lock)
  402. pgc_ll_unlock(cache, ll);
  403. size_t entries = __atomic_add_fetch(&ll->stats->entries, 1, __ATOMIC_RELAXED);
  404. size_t size = __atomic_add_fetch(&ll->stats->size, page->assumed_size, __ATOMIC_RELAXED);
  405. __atomic_add_fetch(&ll->stats->added_entries, 1, __ATOMIC_RELAXED);
  406. __atomic_add_fetch(&ll->stats->added_size, page->assumed_size, __ATOMIC_RELAXED);
  407. atomic_set_max(&ll->stats->max_entries, entries);
  408. atomic_set_max(&ll->stats->max_size, size);
  409. }
  410. static void pgc_ll_del(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PGC_PAGE *page, bool having_lock) {
  411. __atomic_sub_fetch(&ll->stats->entries, 1, __ATOMIC_RELAXED);
  412. __atomic_sub_fetch(&ll->stats->size, page->assumed_size, __ATOMIC_RELAXED);
  413. __atomic_add_fetch(&ll->stats->removed_entries, 1, __ATOMIC_RELAXED);
  414. __atomic_add_fetch(&ll->stats->removed_size, page->assumed_size, __ATOMIC_RELAXED);
  415. if(!having_lock)
  416. pgc_ll_lock(cache, ll);
  417. internal_fatal(page_get_status_flags(page) != ll->flags,
  418. "DBENGINE CACHE: invalid page flags, the page has %d, but it is should be %d",
  419. page_get_status_flags(page),
  420. ll->flags);
  421. page_flag_clear(page, ll->flags);
  422. if(ll->linked_list_in_sections_judy) {
  423. Pvoid_t *section_pages_pptr = JudyLGet(ll->sections_judy, page->section, PJE0);
  424. internal_fatal(!section_pages_pptr, "DBENGINE CACHE: page should be in Judy LL, but it is not");
  425. struct section_pages *sp = *section_pages_pptr;
  426. sp->entries--;
  427. sp->size -= page->assumed_size;
  428. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(sp->base, page, link.prev, link.next);
  429. if(!sp->base) {
  430. size_t mem_before_judyl, mem_after_judyl;
  431. mem_before_judyl = JudyLMemUsed(ll->sections_judy);
  432. int rc = JudyLDel(&ll->sections_judy, page->section, PJE0);
  433. mem_after_judyl = JudyLMemUsed(ll->sections_judy);
  434. if(!rc)
  435. fatal("DBENGINE CACHE: cannot delete section from Judy LL");
  436. // freez(sp);
  437. aral_freez(pgc_section_pages_aral, sp);
  438. mem_after_judyl -= sizeof(struct section_pages);
  439. pgc_stats_ll_judy_change(cache, ll, mem_before_judyl, mem_after_judyl);
  440. }
  441. }
  442. else {
  443. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
  444. ll->version++;
  445. }
  446. if(!having_lock)
  447. pgc_ll_unlock(cache, ll);
  448. }
  449. static inline void page_has_been_accessed(PGC *cache, PGC_PAGE *page) {
  450. PGC_PAGE_FLAGS flags = page_flag_check(page, PGC_PAGE_CLEAN | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES);
  451. if (!(flags & PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES)) {
  452. __atomic_add_fetch(&page->accesses, 1, __ATOMIC_RELAXED);
  453. if (flags & PGC_PAGE_CLEAN) {
  454. if(pgc_ll_trylock(cache, &cache->clean)) {
  455. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  456. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  457. pgc_ll_unlock(cache, &cache->clean);
  458. page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  459. }
  460. else
  461. page_flag_set(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  462. }
  463. }
  464. }
  465. // ----------------------------------------------------------------------------
  466. // state transitions
  467. static inline void page_set_clean(PGC *cache, PGC_PAGE *page, bool having_transition_lock, bool having_clean_lock) {
  468. if(!having_transition_lock)
  469. page_transition_lock(cache, page);
  470. PGC_PAGE_FLAGS flags = page_get_status_flags(page);
  471. if(flags & PGC_PAGE_CLEAN) {
  472. if(!having_transition_lock)
  473. page_transition_unlock(cache, page);
  474. return;
  475. }
  476. if(flags & PGC_PAGE_HOT)
  477. pgc_ll_del(cache, &cache->hot, page, false);
  478. if(flags & PGC_PAGE_DIRTY)
  479. pgc_ll_del(cache, &cache->dirty, page, false);
  480. // first add to linked list, the set the flag (required for move_page_last())
  481. pgc_ll_add(cache, &cache->clean, page, having_clean_lock);
  482. if(!having_transition_lock)
  483. page_transition_unlock(cache, page);
  484. }
  485. static inline void page_set_dirty(PGC *cache, PGC_PAGE *page, bool having_hot_lock) {
  486. if(!having_hot_lock)
  487. // to avoid deadlocks, we have to get the hot lock before the page transition
  488. // since this is what all_hot_to_dirty() does
  489. pgc_ll_lock(cache, &cache->hot);
  490. page_transition_lock(cache, page);
  491. PGC_PAGE_FLAGS flags = page_get_status_flags(page);
  492. if(flags & PGC_PAGE_DIRTY) {
  493. page_transition_unlock(cache, page);
  494. if(!having_hot_lock)
  495. // we don't need the hot lock anymore
  496. pgc_ll_unlock(cache, &cache->hot);
  497. return;
  498. }
  499. __atomic_add_fetch(&cache->stats.hot2dirty_entries, 1, __ATOMIC_RELAXED);
  500. __atomic_add_fetch(&cache->stats.hot2dirty_size, page->assumed_size, __ATOMIC_RELAXED);
  501. if(likely(flags & PGC_PAGE_HOT))
  502. pgc_ll_del(cache, &cache->hot, page, true);
  503. if(!having_hot_lock)
  504. // we don't need the hot lock anymore
  505. pgc_ll_unlock(cache, &cache->hot);
  506. if(unlikely(flags & PGC_PAGE_CLEAN))
  507. pgc_ll_del(cache, &cache->clean, page, false);
  508. // first add to linked list, the set the flag (required for move_page_last())
  509. pgc_ll_add(cache, &cache->dirty, page, false);
  510. __atomic_sub_fetch(&cache->stats.hot2dirty_entries, 1, __ATOMIC_RELAXED);
  511. __atomic_sub_fetch(&cache->stats.hot2dirty_size, page->assumed_size, __ATOMIC_RELAXED);
  512. page_transition_unlock(cache, page);
  513. }
  514. static inline void page_set_hot(PGC *cache, PGC_PAGE *page) {
  515. page_transition_lock(cache, page);
  516. PGC_PAGE_FLAGS flags = page_get_status_flags(page);
  517. if(flags & PGC_PAGE_HOT) {
  518. page_transition_unlock(cache, page);
  519. return;
  520. }
  521. if(flags & PGC_PAGE_DIRTY)
  522. pgc_ll_del(cache, &cache->dirty, page, false);
  523. if(flags & PGC_PAGE_CLEAN)
  524. pgc_ll_del(cache, &cache->clean, page, false);
  525. // first add to linked list, the set the flag (required for move_page_last())
  526. pgc_ll_add(cache, &cache->hot, page, false);
  527. page_transition_unlock(cache, page);
  528. }
  529. // ----------------------------------------------------------------------------
  530. // Referencing
  531. static inline size_t PGC_REFERENCED_PAGES(PGC *cache) {
  532. return __atomic_load_n(&cache->stats.referenced_entries, __ATOMIC_RELAXED);
  533. }
  534. static inline void PGC_REFERENCED_PAGES_PLUS1(PGC *cache, PGC_PAGE *page) {
  535. __atomic_add_fetch(&cache->stats.referenced_entries, 1, __ATOMIC_RELAXED);
  536. __atomic_add_fetch(&cache->stats.referenced_size, page->assumed_size, __ATOMIC_RELAXED);
  537. }
  538. static inline void PGC_REFERENCED_PAGES_MINUS1(PGC *cache, size_t assumed_size) {
  539. __atomic_sub_fetch(&cache->stats.referenced_entries, 1, __ATOMIC_RELAXED);
  540. __atomic_sub_fetch(&cache->stats.referenced_size, assumed_size, __ATOMIC_RELAXED);
  541. }
  542. // If the page is not already acquired,
  543. // YOU HAVE TO HAVE THE QUEUE (hot, dirty, clean) THE PAGE IS IN, L O C K E D !
  544. // If you don't have it locked, NOTHING PREVENTS THIS PAGE FOR VANISHING WHILE THIS IS CALLED!
  545. static inline bool page_acquire(PGC *cache, PGC_PAGE *page) {
  546. __atomic_add_fetch(&cache->stats.acquires, 1, __ATOMIC_RELAXED);
  547. REFCOUNT expected, desired;
  548. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  549. size_t spins = 0;
  550. do {
  551. spins++;
  552. if(unlikely(expected < 0))
  553. return false;
  554. desired = expected + 1;
  555. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED));
  556. if(unlikely(spins > 1))
  557. __atomic_add_fetch(&cache->stats.acquire_spins, spins - 1, __ATOMIC_RELAXED);
  558. if(desired == 1)
  559. PGC_REFERENCED_PAGES_PLUS1(cache, page);
  560. return true;
  561. }
  562. static inline void page_release(PGC *cache, PGC_PAGE *page, bool evict_if_necessary) {
  563. __atomic_add_fetch(&cache->stats.releases, 1, __ATOMIC_RELAXED);
  564. size_t assumed_size = page->assumed_size; // take the size before we release it
  565. REFCOUNT expected, desired;
  566. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  567. size_t spins = 0;
  568. do {
  569. spins++;
  570. internal_fatal(expected <= 0,
  571. "DBENGINE CACHE: trying to release a page with reference counter %d", expected);
  572. desired = expected - 1;
  573. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
  574. if(unlikely(spins > 1))
  575. __atomic_add_fetch(&cache->stats.release_spins, spins - 1, __ATOMIC_RELAXED);
  576. if(desired == 0) {
  577. PGC_REFERENCED_PAGES_MINUS1(cache, assumed_size);
  578. if(evict_if_necessary)
  579. evict_on_page_release_when_permitted(cache);
  580. }
  581. }
  582. static inline bool non_acquired_page_get_for_deletion___while_having_clean_locked(PGC *cache __maybe_unused, PGC_PAGE *page) {
  583. __atomic_add_fetch(&cache->stats.acquires_for_deletion, 1, __ATOMIC_RELAXED);
  584. internal_fatal(!is_page_clean(page),
  585. "DBENGINE CACHE: only clean pages can be deleted");
  586. REFCOUNT expected, desired;
  587. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  588. size_t spins = 0;
  589. bool delete_it;
  590. do {
  591. spins++;
  592. if (expected == 0) {
  593. desired = REFCOUNT_DELETING;
  594. delete_it = true;
  595. }
  596. else {
  597. delete_it = false;
  598. break;
  599. }
  600. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
  601. if(delete_it) {
  602. // we can delete this page
  603. internal_fatal(page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
  604. "DBENGINE CACHE: page is already being deleted");
  605. page_flag_set(page, PGC_PAGE_IS_BEING_DELETED);
  606. }
  607. if(unlikely(spins > 1))
  608. __atomic_add_fetch(&cache->stats.delete_spins, spins - 1, __ATOMIC_RELAXED);
  609. return delete_it;
  610. }
  611. static inline bool acquired_page_get_for_deletion_or_release_it(PGC *cache __maybe_unused, PGC_PAGE *page) {
  612. __atomic_add_fetch(&cache->stats.acquires_for_deletion, 1, __ATOMIC_RELAXED);
  613. size_t assumed_size = page->assumed_size; // take the size before we release it
  614. REFCOUNT expected, desired;
  615. expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
  616. size_t spins = 0;
  617. bool delete_it;
  618. do {
  619. spins++;
  620. internal_fatal(expected < 1,
  621. "DBENGINE CACHE: page to be deleted should be acquired by the caller.");
  622. if (expected == 1) {
  623. // we are the only one having this page referenced
  624. desired = REFCOUNT_DELETING;
  625. delete_it = true;
  626. }
  627. else {
  628. // this page cannot be deleted
  629. desired = expected - 1;
  630. delete_it = false;
  631. }
  632. } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
  633. if(delete_it) {
  634. PGC_REFERENCED_PAGES_MINUS1(cache, assumed_size);
  635. // we can delete this page
  636. internal_fatal(page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
  637. "DBENGINE CACHE: page is already being deleted");
  638. page_flag_set(page, PGC_PAGE_IS_BEING_DELETED);
  639. }
  640. if(unlikely(spins > 1))
  641. __atomic_add_fetch(&cache->stats.delete_spins, spins - 1, __ATOMIC_RELAXED);
  642. return delete_it;
  643. }
  644. // ----------------------------------------------------------------------------
  645. // Indexing
  646. static inline void free_this_page(PGC *cache, PGC_PAGE *page, size_t partition __maybe_unused) {
  647. // call the callback to free the user supplied memory
  648. cache->config.pgc_free_clean_cb(cache, (PGC_ENTRY){
  649. .section = page->section,
  650. .metric_id = page->metric_id,
  651. .start_time_s = page->start_time_s,
  652. .end_time_s = __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
  653. .update_every_s = page->update_every_s,
  654. .size = page_size_from_assumed_size(cache, page->assumed_size),
  655. .hot = (is_page_hot(page)) ? true : false,
  656. .data = page->data,
  657. .custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL,
  658. });
  659. // update statistics
  660. __atomic_add_fetch(&cache->stats.removed_entries, 1, __ATOMIC_RELAXED);
  661. __atomic_add_fetch(&cache->stats.removed_size, page->assumed_size, __ATOMIC_RELAXED);
  662. __atomic_sub_fetch(&cache->stats.entries, 1, __ATOMIC_RELAXED);
  663. __atomic_sub_fetch(&cache->stats.size, page->assumed_size, __ATOMIC_RELAXED);
  664. // free our memory
  665. #ifdef PGC_WITH_ARAL
  666. aral_freez(cache->aral[partition], page);
  667. #else
  668. freez(page);
  669. #endif
  670. }
  671. static void remove_this_page_from_index_unsafe(PGC *cache, PGC_PAGE *page, size_t partition) {
  672. // remove it from the Judy arrays
  673. pointer_check(cache, page);
  674. internal_fatal(page_flag_check(page, PGC_PAGE_HOT | PGC_PAGE_DIRTY | PGC_PAGE_CLEAN),
  675. "DBENGINE CACHE: page to be removed from the cache is still in the linked-list");
  676. internal_fatal(!page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
  677. "DBENGINE CACHE: page to be removed from the index, is not marked for deletion");
  678. internal_fatal(partition != pgc_indexing_partition(cache, page->metric_id),
  679. "DBENGINE CACHE: attempted to remove this page from the wrong partition of the cache");
  680. Pvoid_t *metrics_judy_pptr = JudyLGet(cache->index[partition].sections_judy, page->section, PJE0);
  681. if(unlikely(!metrics_judy_pptr))
  682. fatal("DBENGINE CACHE: section '%lu' should exist, but it does not.", page->section);
  683. Pvoid_t *pages_judy_pptr = JudyLGet(*metrics_judy_pptr, page->metric_id, PJE0);
  684. if(unlikely(!pages_judy_pptr))
  685. fatal("DBENGINE CACHE: metric '%lu' in section '%lu' should exist, but it does not.",
  686. page->metric_id, page->section);
  687. Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, page->start_time_s, PJE0);
  688. if(unlikely(!page_ptr))
  689. fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' should exist, but it does not.",
  690. page->start_time_s, page->metric_id, page->section);
  691. PGC_PAGE *found_page = *page_ptr;
  692. if(unlikely(found_page != page))
  693. fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' should exist, but the index returned a different address.",
  694. page->start_time_s, page->metric_id, page->section);
  695. size_t mem_before_judyl = 0, mem_after_judyl = 0;
  696. mem_before_judyl += JudyLMemUsed(*pages_judy_pptr);
  697. if(unlikely(!JudyLDel(pages_judy_pptr, page->start_time_s, PJE0)))
  698. fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' exists, but cannot be deleted.",
  699. page->start_time_s, page->metric_id, page->section);
  700. mem_after_judyl += JudyLMemUsed(*pages_judy_pptr);
  701. mem_before_judyl += JudyLMemUsed(*metrics_judy_pptr);
  702. if(!*pages_judy_pptr && !JudyLDel(metrics_judy_pptr, page->metric_id, PJE0))
  703. fatal("DBENGINE CACHE: metric '%lu' in section '%lu' exists and is empty, but cannot be deleted.",
  704. page->metric_id, page->section);
  705. mem_after_judyl += JudyLMemUsed(*metrics_judy_pptr);
  706. mem_before_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  707. if(!*metrics_judy_pptr && !JudyLDel(&cache->index[partition].sections_judy, page->section, PJE0))
  708. fatal("DBENGINE CACHE: section '%lu' exists and is empty, but cannot be deleted.", page->section);
  709. mem_after_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  710. pgc_stats_index_judy_change(cache, mem_before_judyl, mem_after_judyl);
  711. pointer_del(cache, page);
  712. }
  713. static inline void remove_and_free_page_not_in_any_queue_and_acquired_for_deletion(PGC *cache, PGC_PAGE *page) {
  714. size_t partition = pgc_indexing_partition(cache, page->metric_id);
  715. pgc_index_write_lock(cache, partition);
  716. remove_this_page_from_index_unsafe(cache, page, partition);
  717. pgc_index_write_unlock(cache, partition);
  718. free_this_page(cache, page, partition);
  719. }
  720. static inline bool make_acquired_page_clean_and_evict_or_page_release(PGC *cache, PGC_PAGE *page) {
  721. pointer_check(cache, page);
  722. page_transition_lock(cache, page);
  723. pgc_ll_lock(cache, &cache->clean);
  724. // make it clean - it does not have any accesses, so it will be prepended
  725. page_set_clean(cache, page, true, true);
  726. if(!acquired_page_get_for_deletion_or_release_it(cache, page)) {
  727. pgc_ll_unlock(cache, &cache->clean);
  728. page_transition_unlock(cache, page);
  729. return false;
  730. }
  731. // remove it from the linked list
  732. pgc_ll_del(cache, &cache->clean, page, true);
  733. pgc_ll_unlock(cache, &cache->clean);
  734. page_transition_unlock(cache, page);
  735. remove_and_free_page_not_in_any_queue_and_acquired_for_deletion(cache, page);
  736. return true;
  737. }
  738. // returns true, when there is more work to do
  739. static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evict, bool wait, bool all_of_them, evict_filter filter, void *data) {
  740. size_t per1000 = cache_usage_per1000(cache, NULL);
  741. if(!all_of_them && per1000 < cache->config.healthy_size_per1000)
  742. // don't bother - not enough to do anything
  743. return false;
  744. size_t workers_running = __atomic_add_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
  745. if(!wait && !all_of_them && workers_running > cache->config.max_workers_evict_inline && per1000 < cache->config.severe_pressure_per1000) {
  746. __atomic_sub_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
  747. return false;
  748. }
  749. internal_fatal(cache->clean.linked_list_in_sections_judy,
  750. "wrong clean pages configuration - clean pages need to have a linked list, not a judy array");
  751. if(unlikely(!max_skip))
  752. max_skip = SIZE_MAX;
  753. else if(unlikely(max_skip < 2))
  754. max_skip = 2;
  755. if(unlikely(!max_evict))
  756. max_evict = SIZE_MAX;
  757. else if(unlikely(max_evict < 2))
  758. max_evict = 2;
  759. size_t total_pages_evicted = 0;
  760. size_t total_pages_skipped = 0;
  761. bool stopped_before_finishing = false;
  762. size_t spins = 0;
  763. do {
  764. if(++spins > 1)
  765. __atomic_add_fetch(&cache->stats.evict_spins, 1, __ATOMIC_RELAXED);
  766. bool batch;
  767. size_t max_size_to_evict = 0;
  768. if (unlikely(all_of_them)) {
  769. max_size_to_evict = SIZE_MAX;
  770. batch = true;
  771. }
  772. else if(unlikely(wait)) {
  773. per1000 = cache_usage_per1000(cache, &max_size_to_evict);
  774. batch = (wait && per1000 > cache->config.severe_pressure_per1000) ? true : false;
  775. }
  776. else {
  777. batch = false;
  778. max_size_to_evict = (cache_above_healthy_limit(cache)) ? 1 : 0;
  779. }
  780. if (!max_size_to_evict)
  781. break;
  782. // check if we have to stop
  783. if(total_pages_evicted >= max_evict && !all_of_them) {
  784. stopped_before_finishing = true;
  785. break;
  786. }
  787. if(!all_of_them && !wait) {
  788. if(!pgc_ll_trylock(cache, &cache->clean)) {
  789. stopped_before_finishing = true;
  790. goto premature_exit;
  791. }
  792. // at this point we have the clean lock
  793. }
  794. else
  795. pgc_ll_lock(cache, &cache->clean);
  796. // find a page to evict
  797. PGC_PAGE *pages_to_evict = NULL;
  798. size_t pages_to_evict_size = 0;
  799. for(PGC_PAGE *page = cache->clean.base, *next = NULL, *first_page_we_relocated = NULL; page ; page = next) {
  800. next = page->link.next;
  801. if(unlikely(page == first_page_we_relocated))
  802. // we did a complete loop on all pages
  803. break;
  804. if(unlikely(page_flag_check(page, PGC_PAGE_HAS_BEEN_ACCESSED | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES) == PGC_PAGE_HAS_BEEN_ACCESSED)) {
  805. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  806. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  807. page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
  808. continue;
  809. }
  810. if(unlikely(filter && !filter(page, data)))
  811. continue;
  812. if(non_acquired_page_get_for_deletion___while_having_clean_locked(cache, page)) {
  813. // we can delete this page
  814. // remove it from the clean list
  815. pgc_ll_del(cache, &cache->clean, page, true);
  816. __atomic_add_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
  817. __atomic_add_fetch(&cache->stats.evicting_size, page->assumed_size, __ATOMIC_RELAXED);
  818. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pages_to_evict, page, link.prev, link.next);
  819. pages_to_evict_size += page->assumed_size;
  820. if(unlikely(all_of_them || (batch && pages_to_evict_size < max_size_to_evict)))
  821. // get more pages
  822. ;
  823. else
  824. // one page at a time
  825. break;
  826. }
  827. else {
  828. // we can't delete this page
  829. if(!first_page_we_relocated)
  830. first_page_we_relocated = page;
  831. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  832. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
  833. // check if we have to stop
  834. if(unlikely(++total_pages_skipped >= max_skip && !all_of_them)) {
  835. stopped_before_finishing = true;
  836. break;
  837. }
  838. }
  839. }
  840. pgc_ll_unlock(cache, &cache->clean);
  841. if(likely(pages_to_evict)) {
  842. // remove them from the index
  843. if(unlikely(pages_to_evict->link.next)) {
  844. // we have many pages, let's minimize the index locks we are going to get
  845. PGC_PAGE *pages_per_partition[cache->config.partitions];
  846. memset(pages_per_partition, 0, sizeof(PGC_PAGE *) * cache->config.partitions);
  847. // sort them by partition
  848. for (PGC_PAGE *page = pages_to_evict, *next = NULL; page; page = next) {
  849. next = page->link.next;
  850. size_t partition = pgc_indexing_partition(cache, page->metric_id);
  851. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(pages_to_evict, page, link.prev, link.next);
  852. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pages_per_partition[partition], page, link.prev, link.next);
  853. }
  854. // remove them from the index
  855. for (size_t partition = 0; partition < cache->config.partitions; partition++) {
  856. if (!pages_per_partition[partition]) continue;
  857. pgc_index_write_lock(cache, partition);
  858. for (PGC_PAGE *page = pages_per_partition[partition]; page; page = page->link.next)
  859. remove_this_page_from_index_unsafe(cache, page, partition);
  860. pgc_index_write_unlock(cache, partition);
  861. }
  862. // free them
  863. for (size_t partition = 0; partition < cache->config.partitions; partition++) {
  864. if (!pages_per_partition[partition]) continue;
  865. for (PGC_PAGE *page = pages_per_partition[partition], *next = NULL; page; page = next) {
  866. next = page->link.next;
  867. size_t page_size = page->assumed_size;
  868. free_this_page(cache, page, partition);
  869. __atomic_sub_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
  870. __atomic_sub_fetch(&cache->stats.evicting_size, page_size, __ATOMIC_RELAXED);
  871. total_pages_evicted++;
  872. }
  873. }
  874. }
  875. else {
  876. // just one page to be evicted
  877. PGC_PAGE *page = pages_to_evict;
  878. size_t page_size = page->assumed_size;
  879. size_t partition = pgc_indexing_partition(cache, page->metric_id);
  880. pgc_index_write_lock(cache, partition);
  881. remove_this_page_from_index_unsafe(cache, page, partition);
  882. pgc_index_write_unlock(cache, partition);
  883. free_this_page(cache, page, partition);
  884. __atomic_sub_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
  885. __atomic_sub_fetch(&cache->stats.evicting_size, page_size, __ATOMIC_RELAXED);
  886. total_pages_evicted++;
  887. }
  888. }
  889. else
  890. break;
  891. } while(all_of_them || (total_pages_evicted < max_evict && total_pages_skipped < max_skip));
  892. if(all_of_them && !filter) {
  893. pgc_ll_lock(cache, &cache->clean);
  894. if(cache->clean.stats->entries) {
  895. error_limit_static_global_var(erl, 1, 0);
  896. error_limit(&erl, "DBENGINE CACHE: cannot free all clean pages, %zu are still in the clean queue",
  897. cache->clean.stats->entries);
  898. }
  899. pgc_ll_unlock(cache, &cache->clean);
  900. }
  901. premature_exit:
  902. if(unlikely(total_pages_skipped))
  903. __atomic_add_fetch(&cache->stats.evict_skipped, total_pages_skipped, __ATOMIC_RELAXED);
  904. __atomic_sub_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
  905. return stopped_before_finishing;
  906. }
  907. static PGC_PAGE *page_add(PGC *cache, PGC_ENTRY *entry, bool *added) {
  908. internal_fatal(entry->start_time_s < 0 || entry->end_time_s < 0,
  909. "DBENGINE CACHE: timestamps are negative");
  910. __atomic_add_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED);
  911. size_t partition = pgc_indexing_partition(cache, entry->metric_id);
  912. #ifdef PGC_WITH_ARAL
  913. PGC_PAGE *allocation = aral_mallocz(cache->aral[partition]);
  914. #endif
  915. PGC_PAGE *page;
  916. size_t spins = 0;
  917. if(unlikely(entry->start_time_s < 0))
  918. entry->start_time_s = 0;
  919. if(unlikely(entry->end_time_s < 0))
  920. entry->end_time_s = 0;
  921. do {
  922. if(++spins > 1)
  923. __atomic_add_fetch(&cache->stats.insert_spins, 1, __ATOMIC_RELAXED);
  924. pgc_index_write_lock(cache, partition);
  925. size_t mem_before_judyl = 0, mem_after_judyl = 0;
  926. mem_before_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  927. Pvoid_t *metrics_judy_pptr = JudyLIns(&cache->index[partition].sections_judy, entry->section, PJE0);
  928. if(unlikely(!metrics_judy_pptr || metrics_judy_pptr == PJERR))
  929. fatal("DBENGINE CACHE: corrupted sections judy array");
  930. mem_after_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
  931. mem_before_judyl += JudyLMemUsed(*metrics_judy_pptr);
  932. Pvoid_t *pages_judy_pptr = JudyLIns(metrics_judy_pptr, entry->metric_id, PJE0);
  933. if(unlikely(!pages_judy_pptr || pages_judy_pptr == PJERR))
  934. fatal("DBENGINE CACHE: corrupted pages judy array");
  935. mem_after_judyl += JudyLMemUsed(*metrics_judy_pptr);
  936. mem_before_judyl += JudyLMemUsed(*pages_judy_pptr);
  937. Pvoid_t *page_ptr = JudyLIns(pages_judy_pptr, entry->start_time_s, PJE0);
  938. if(unlikely(!page_ptr || page_ptr == PJERR))
  939. fatal("DBENGINE CACHE: corrupted page in judy array");
  940. mem_after_judyl += JudyLMemUsed(*pages_judy_pptr);
  941. pgc_stats_index_judy_change(cache, mem_before_judyl, mem_after_judyl);
  942. page = *page_ptr;
  943. if (likely(!page)) {
  944. #ifdef PGC_WITH_ARAL
  945. page = allocation;
  946. allocation = NULL;
  947. #else
  948. page = mallocz(sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page);
  949. #endif
  950. page->refcount = 1;
  951. page->accesses = (entry->hot) ? 0 : 1;
  952. page->flags = 0;
  953. page->section = entry->section;
  954. page->metric_id = entry->metric_id;
  955. page->start_time_s = entry->start_time_s;
  956. page->end_time_s = entry->end_time_s,
  957. page->update_every_s = entry->update_every_s,
  958. page->data = entry->data;
  959. page->assumed_size = page_assumed_size(cache, entry->size);
  960. spinlock_init(&page->transition_spinlock);
  961. page->link.prev = NULL;
  962. page->link.next = NULL;
  963. if(cache->config.additional_bytes_per_page) {
  964. if(entry->custom_data)
  965. memcpy(page->custom_data, entry->custom_data, cache->config.additional_bytes_per_page);
  966. else
  967. memset(page->custom_data, 0, cache->config.additional_bytes_per_page);
  968. }
  969. // put it in the index
  970. *page_ptr = page;
  971. pointer_add(cache, page);
  972. pgc_index_write_unlock(cache, partition);
  973. if (entry->hot)
  974. page_set_hot(cache, page);
  975. else
  976. page_set_clean(cache, page, false, false);
  977. PGC_REFERENCED_PAGES_PLUS1(cache, page);
  978. // update statistics
  979. __atomic_add_fetch(&cache->stats.added_entries, 1, __ATOMIC_RELAXED);
  980. __atomic_add_fetch(&cache->stats.added_size, page->assumed_size, __ATOMIC_RELAXED);
  981. __atomic_add_fetch(&cache->stats.entries, 1, __ATOMIC_RELAXED);
  982. __atomic_add_fetch(&cache->stats.size, page->assumed_size, __ATOMIC_RELAXED);
  983. if(added)
  984. *added = true;
  985. }
  986. else {
  987. if (!page_acquire(cache, page))
  988. page = NULL;
  989. else if(added)
  990. *added = false;
  991. pgc_index_write_unlock(cache, partition);
  992. if(unlikely(!page)) {
  993. // now that we don't have the lock,
  994. // give it some time for the old page to go away
  995. struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 };
  996. nanosleep(&ns, NULL);
  997. }
  998. }
  999. } while(!page);
  1000. #ifdef PGC_WITH_ARAL
  1001. if(allocation)
  1002. aral_freez(cache->aral[partition], allocation);
  1003. #endif
  1004. __atomic_sub_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED);
  1005. if(!entry->hot)
  1006. evict_on_clean_page_added(cache);
  1007. if((cache->config.options & PGC_OPTIONS_FLUSH_PAGES_INLINE) || flushing_critical(cache)) {
  1008. flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL,
  1009. false, false);
  1010. }
  1011. return page;
  1012. }
  1013. static PGC_PAGE *page_find_and_acquire(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method) {
  1014. __atomic_add_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED);
  1015. size_t *stats_hit_ptr, *stats_miss_ptr;
  1016. if(method == PGC_SEARCH_CLOSEST) {
  1017. __atomic_add_fetch(&cache->stats.searches_closest, 1, __ATOMIC_RELAXED);
  1018. stats_hit_ptr = &cache->stats.searches_closest_hits;
  1019. stats_miss_ptr = &cache->stats.searches_closest_misses;
  1020. }
  1021. else {
  1022. __atomic_add_fetch(&cache->stats.searches_exact, 1, __ATOMIC_RELAXED);
  1023. stats_hit_ptr = &cache->stats.searches_exact_hits;
  1024. stats_miss_ptr = &cache->stats.searches_exact_misses;
  1025. }
  1026. PGC_PAGE *page = NULL;
  1027. size_t partition = pgc_indexing_partition(cache, metric_id);
  1028. pgc_index_read_lock(cache, partition);
  1029. Pvoid_t *metrics_judy_pptr = JudyLGet(cache->index[partition].sections_judy, section, PJE0);
  1030. if(unlikely(metrics_judy_pptr == PJERR))
  1031. fatal("DBENGINE CACHE: corrupted sections judy array");
  1032. if(unlikely(!metrics_judy_pptr)) {
  1033. // section does not exist
  1034. goto cleanup;
  1035. }
  1036. Pvoid_t *pages_judy_pptr = JudyLGet(*metrics_judy_pptr, metric_id, PJE0);
  1037. if(unlikely(pages_judy_pptr == PJERR))
  1038. fatal("DBENGINE CACHE: corrupted pages judy array");
  1039. if(unlikely(!pages_judy_pptr)) {
  1040. // metric does not exist
  1041. goto cleanup;
  1042. }
  1043. switch(method) {
  1044. default:
  1045. case PGC_SEARCH_CLOSEST: {
  1046. Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, start_time_s, PJE0);
  1047. if (unlikely(page_ptr == PJERR))
  1048. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1049. if (page_ptr)
  1050. page = *page_ptr;
  1051. else {
  1052. Word_t time = start_time_s;
  1053. // find the previous page
  1054. page_ptr = JudyLPrev(*pages_judy_pptr, &time, PJE0);
  1055. if(unlikely(page_ptr == PJERR))
  1056. fatal("DBENGINE CACHE: corrupted page in pages judy array #2");
  1057. if(page_ptr) {
  1058. // found a page starting before our timestamp
  1059. // check if our timestamp is included
  1060. page = *page_ptr;
  1061. if(start_time_s > page->end_time_s)
  1062. // it is not good for us
  1063. page = NULL;
  1064. }
  1065. if(!page) {
  1066. // find the next page then...
  1067. time = start_time_s;
  1068. page_ptr = JudyLNext(*pages_judy_pptr, &time, PJE0);
  1069. if(page_ptr)
  1070. page = *page_ptr;
  1071. }
  1072. }
  1073. }
  1074. break;
  1075. case PGC_SEARCH_EXACT: {
  1076. Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, start_time_s, PJE0);
  1077. if (unlikely(page_ptr == PJERR))
  1078. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1079. if (page_ptr)
  1080. page = *page_ptr;
  1081. }
  1082. break;
  1083. case PGC_SEARCH_FIRST: {
  1084. Word_t time = start_time_s;
  1085. Pvoid_t *page_ptr = JudyLFirst(*pages_judy_pptr, &time, PJE0);
  1086. if (unlikely(page_ptr == PJERR))
  1087. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1088. if (page_ptr)
  1089. page = *page_ptr;
  1090. }
  1091. break;
  1092. case PGC_SEARCH_NEXT: {
  1093. Word_t time = start_time_s;
  1094. Pvoid_t *page_ptr = JudyLNext(*pages_judy_pptr, &time, PJE0);
  1095. if (unlikely(page_ptr == PJERR))
  1096. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1097. if (page_ptr)
  1098. page = *page_ptr;
  1099. }
  1100. break;
  1101. case PGC_SEARCH_LAST: {
  1102. Word_t time = start_time_s;
  1103. Pvoid_t *page_ptr = JudyLLast(*pages_judy_pptr, &time, PJE0);
  1104. if (unlikely(page_ptr == PJERR))
  1105. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1106. if (page_ptr)
  1107. page = *page_ptr;
  1108. }
  1109. break;
  1110. case PGC_SEARCH_PREV: {
  1111. Word_t time = start_time_s;
  1112. Pvoid_t *page_ptr = JudyLPrev(*pages_judy_pptr, &time, PJE0);
  1113. if (unlikely(page_ptr == PJERR))
  1114. fatal("DBENGINE CACHE: corrupted page in pages judy array");
  1115. if (page_ptr)
  1116. page = *page_ptr;
  1117. }
  1118. break;
  1119. }
  1120. if(page) {
  1121. pointer_check(cache, page);
  1122. if(!page_acquire(cache, page)) {
  1123. // this page is not good to use
  1124. page = NULL;
  1125. }
  1126. }
  1127. cleanup:
  1128. pgc_index_read_unlock(cache, partition);
  1129. if(page) {
  1130. __atomic_add_fetch(stats_hit_ptr, 1, __ATOMIC_RELAXED);
  1131. page_has_been_accessed(cache, page);
  1132. }
  1133. else
  1134. __atomic_add_fetch(stats_miss_ptr, 1, __ATOMIC_RELAXED);
  1135. __atomic_sub_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED);
  1136. return page;
  1137. }
  1138. static void all_hot_pages_to_dirty(PGC *cache, Word_t section) {
  1139. pgc_ll_lock(cache, &cache->hot);
  1140. bool first = true;
  1141. Word_t last_section = (section == PGC_SECTION_ALL) ? 0 : section;
  1142. Pvoid_t *section_pages_pptr;
  1143. while ((section_pages_pptr = JudyLFirstThenNext(cache->hot.sections_judy, &last_section, &first))) {
  1144. if(section != PGC_SECTION_ALL && last_section != section)
  1145. break;
  1146. struct section_pages *sp = *section_pages_pptr;
  1147. PGC_PAGE *page = sp->base;
  1148. while(page) {
  1149. PGC_PAGE *next = page->link.next;
  1150. if(page_acquire(cache, page)) {
  1151. page_set_dirty(cache, page, true);
  1152. page_release(cache, page, false);
  1153. // page ptr may be invalid now
  1154. }
  1155. page = next;
  1156. }
  1157. }
  1158. pgc_ll_unlock(cache, &cache->hot);
  1159. }
  1160. // returns true when there is more work to do
  1161. static bool flush_pages(PGC *cache, size_t max_flushes, Word_t section, bool wait, bool all_of_them) {
  1162. internal_fatal(!cache->dirty.linked_list_in_sections_judy,
  1163. "wrong dirty pages configuration - dirty pages need to have a judy array, not a linked list");
  1164. if(!all_of_them && !wait) {
  1165. // we have been called from a data collection thread
  1166. // let's not waste its time...
  1167. if(!pgc_ll_trylock(cache, &cache->dirty)) {
  1168. // we would block, so give up...
  1169. return true;
  1170. }
  1171. // we got the lock at this point
  1172. }
  1173. else
  1174. pgc_ll_lock(cache, &cache->dirty);
  1175. size_t optimal_flush_size = cache->config.max_dirty_pages_per_call;
  1176. size_t dirty_version_at_entry = cache->dirty.version;
  1177. if(!all_of_them && (cache->dirty.stats->entries < optimal_flush_size || cache->dirty.last_version_checked == dirty_version_at_entry)) {
  1178. pgc_ll_unlock(cache, &cache->dirty);
  1179. return false;
  1180. }
  1181. __atomic_add_fetch(&cache->stats.workers_flush, 1, __ATOMIC_RELAXED);
  1182. bool have_dirty_lock = true;
  1183. if(all_of_them || !max_flushes)
  1184. max_flushes = SIZE_MAX;
  1185. Word_t last_section = (section == PGC_SECTION_ALL) ? 0 : section;
  1186. size_t flushes_so_far = 0;
  1187. Pvoid_t *section_pages_pptr;
  1188. bool stopped_before_finishing = false;
  1189. size_t spins = 0;
  1190. bool first = true;
  1191. while (have_dirty_lock && (section_pages_pptr = JudyLFirstThenNext(cache->dirty.sections_judy, &last_section, &first))) {
  1192. if(section != PGC_SECTION_ALL && last_section != section)
  1193. break;
  1194. struct section_pages *sp = *section_pages_pptr;
  1195. if(!all_of_them && sp->entries < optimal_flush_size)
  1196. continue;
  1197. if(!all_of_them && flushes_so_far > max_flushes) {
  1198. stopped_before_finishing = true;
  1199. break;
  1200. }
  1201. if(++spins > 1)
  1202. __atomic_add_fetch(&cache->stats.flush_spins, 1, __ATOMIC_RELAXED);
  1203. PGC_ENTRY array[optimal_flush_size];
  1204. PGC_PAGE *pages[optimal_flush_size];
  1205. size_t pages_added = 0, pages_added_size = 0;
  1206. size_t pages_removed_dirty = 0, pages_removed_dirty_size = 0;
  1207. size_t pages_cancelled = 0, pages_cancelled_size = 0;
  1208. size_t pages_made_clean = 0, pages_made_clean_size = 0;
  1209. PGC_PAGE *page = sp->base;
  1210. while (page && pages_added < optimal_flush_size) {
  1211. PGC_PAGE *next = page->link.next;
  1212. internal_fatal(page_get_status_flags(page) != PGC_PAGE_DIRTY,
  1213. "DBENGINE CACHE: page should be in the dirty list before saved");
  1214. if (page_acquire(cache, page)) {
  1215. internal_fatal(page_get_status_flags(page) != PGC_PAGE_DIRTY,
  1216. "DBENGINE CACHE: page should be in the dirty list before saved");
  1217. internal_fatal(page->section != last_section,
  1218. "DBENGINE CACHE: dirty page is not in the right section (tier)");
  1219. if(!page_transition_trylock(cache, page)) {
  1220. page_release(cache, page, false);
  1221. // page ptr may be invalid now
  1222. }
  1223. else {
  1224. pages[pages_added] = page;
  1225. array[pages_added] = (PGC_ENTRY) {
  1226. .section = page->section,
  1227. .metric_id = page->metric_id,
  1228. .start_time_s = page->start_time_s,
  1229. .end_time_s = __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
  1230. .update_every_s = page->update_every_s,
  1231. .size = page_size_from_assumed_size(cache, page->assumed_size),
  1232. .data = page->data,
  1233. .custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL,
  1234. .hot = false,
  1235. };
  1236. pages_added_size += page->assumed_size;
  1237. pages_added++;
  1238. }
  1239. }
  1240. page = next;
  1241. }
  1242. // do we have enough to save?
  1243. if(all_of_them || pages_added == optimal_flush_size) {
  1244. // we should do it
  1245. for (size_t i = 0; i < pages_added; i++) {
  1246. PGC_PAGE *tpg = pages[i];
  1247. internal_fatal(page_get_status_flags(tpg) != PGC_PAGE_DIRTY,
  1248. "DBENGINE CACHE: page should be in the dirty list before saved");
  1249. __atomic_add_fetch(&cache->stats.flushing_entries, 1, __ATOMIC_RELAXED);
  1250. __atomic_add_fetch(&cache->stats.flushing_size, tpg->assumed_size, __ATOMIC_RELAXED);
  1251. // remove it from the dirty list
  1252. pgc_ll_del(cache, &cache->dirty, tpg, true);
  1253. pages_removed_dirty_size += tpg->assumed_size;
  1254. pages_removed_dirty++;
  1255. }
  1256. // next time, repeat the same section (tier)
  1257. first = true;
  1258. }
  1259. else {
  1260. // we can't do it
  1261. for (size_t i = 0; i < pages_added; i++) {
  1262. PGC_PAGE *tpg = pages[i];
  1263. internal_fatal(page_get_status_flags(tpg) != PGC_PAGE_DIRTY,
  1264. "DBENGINE CACHE: page should be in the dirty list before saved");
  1265. pages_cancelled_size += tpg->assumed_size;
  1266. pages_cancelled++;
  1267. page_transition_unlock(cache, tpg);
  1268. page_release(cache, tpg, false);
  1269. // page ptr may be invalid now
  1270. }
  1271. __atomic_add_fetch(&cache->stats.flushes_cancelled, pages_cancelled, __ATOMIC_RELAXED);
  1272. __atomic_add_fetch(&cache->stats.flushes_cancelled_size, pages_cancelled_size, __ATOMIC_RELAXED);
  1273. internal_fatal(pages_added != pages_cancelled || pages_added_size != pages_cancelled_size,
  1274. "DBENGINE CACHE: flushing cancel pages mismatch");
  1275. // next time, continue to the next section (tier)
  1276. first = false;
  1277. continue;
  1278. }
  1279. if(cache->config.pgc_save_init_cb)
  1280. cache->config.pgc_save_init_cb(cache, last_section);
  1281. pgc_ll_unlock(cache, &cache->dirty);
  1282. have_dirty_lock = false;
  1283. // call the callback to save them
  1284. // it may take some time, so let's release the lock
  1285. cache->config.pgc_save_dirty_cb(cache, array, pages, pages_added);
  1286. flushes_so_far++;
  1287. __atomic_add_fetch(&cache->stats.flushes_completed, pages_added, __ATOMIC_RELAXED);
  1288. __atomic_add_fetch(&cache->stats.flushes_completed_size, pages_added_size, __ATOMIC_RELAXED);
  1289. size_t pages_to_evict = 0; (void)pages_to_evict;
  1290. for (size_t i = 0; i < pages_added; i++) {
  1291. PGC_PAGE *tpg = pages[i];
  1292. internal_fatal(page_get_status_flags(tpg) != 0,
  1293. "DBENGINE CACHE: page should not be in any list while it is being saved");
  1294. __atomic_sub_fetch(&cache->stats.flushing_entries, 1, __ATOMIC_RELAXED);
  1295. __atomic_sub_fetch(&cache->stats.flushing_size, tpg->assumed_size, __ATOMIC_RELAXED);
  1296. pages_made_clean_size += tpg->assumed_size;
  1297. pages_made_clean++;
  1298. if(!tpg->accesses)
  1299. pages_to_evict++;
  1300. page_set_clean(cache, tpg, true, false);
  1301. page_transition_unlock(cache, tpg);
  1302. page_release(cache, tpg, false);
  1303. // tpg ptr may be invalid now
  1304. }
  1305. internal_fatal(pages_added != pages_made_clean || pages_added != pages_removed_dirty ||
  1306. pages_added_size != pages_made_clean_size || pages_added_size != pages_removed_dirty_size
  1307. , "DBENGINE CACHE: flushing pages mismatch");
  1308. if(!all_of_them && !wait) {
  1309. if(pgc_ll_trylock(cache, &cache->dirty))
  1310. have_dirty_lock = true;
  1311. else {
  1312. stopped_before_finishing = true;
  1313. have_dirty_lock = false;
  1314. }
  1315. }
  1316. else {
  1317. pgc_ll_lock(cache, &cache->dirty);
  1318. have_dirty_lock = true;
  1319. }
  1320. }
  1321. if(have_dirty_lock) {
  1322. if(!stopped_before_finishing && dirty_version_at_entry > cache->dirty.last_version_checked)
  1323. cache->dirty.last_version_checked = dirty_version_at_entry;
  1324. pgc_ll_unlock(cache, &cache->dirty);
  1325. }
  1326. __atomic_sub_fetch(&cache->stats.workers_flush, 1, __ATOMIC_RELAXED);
  1327. return stopped_before_finishing;
  1328. }
  1329. void free_all_unreferenced_clean_pages(PGC *cache) {
  1330. evict_pages(cache, 0, 0, true, true);
  1331. }
  1332. // ----------------------------------------------------------------------------
  1333. // public API
  1334. PGC *pgc_create(const char *name,
  1335. size_t clean_size_bytes, free_clean_page_callback pgc_free_cb,
  1336. size_t max_dirty_pages_per_flush,
  1337. save_dirty_init_callback pgc_save_init_cb,
  1338. save_dirty_page_callback pgc_save_dirty_cb,
  1339. size_t max_pages_per_inline_eviction, size_t max_inline_evictors,
  1340. size_t max_skip_pages_per_inline_eviction,
  1341. size_t max_flushes_inline,
  1342. PGC_OPTIONS options, size_t partitions, size_t additional_bytes_per_page) {
  1343. if(max_pages_per_inline_eviction < 2)
  1344. max_pages_per_inline_eviction = 2;
  1345. if(max_dirty_pages_per_flush < 1)
  1346. max_dirty_pages_per_flush = 1;
  1347. if(max_flushes_inline * max_dirty_pages_per_flush < 2)
  1348. max_flushes_inline = 2;
  1349. PGC *cache = callocz(1, sizeof(PGC));
  1350. strncpyz(cache->config.name, name, PGC_NAME_MAX);
  1351. cache->config.options = options;
  1352. cache->config.clean_size = (clean_size_bytes < 1 * 1024 * 1024) ? 1 * 1024 * 1024 : clean_size_bytes;
  1353. cache->config.pgc_free_clean_cb = pgc_free_cb;
  1354. cache->config.max_dirty_pages_per_call = max_dirty_pages_per_flush;
  1355. cache->config.pgc_save_init_cb = pgc_save_init_cb;
  1356. cache->config.pgc_save_dirty_cb = pgc_save_dirty_cb;
  1357. cache->config.max_pages_per_inline_eviction = max_pages_per_inline_eviction;
  1358. cache->config.max_skip_pages_per_inline_eviction = (max_skip_pages_per_inline_eviction < 2) ? 2 : max_skip_pages_per_inline_eviction;
  1359. cache->config.max_flushes_inline = (max_flushes_inline < 1) ? 1 : max_flushes_inline;
  1360. cache->config.partitions = partitions < 1 ? (size_t)get_netdata_cpus() : partitions;
  1361. cache->config.additional_bytes_per_page = additional_bytes_per_page;
  1362. cache->config.max_workers_evict_inline = max_inline_evictors;
  1363. cache->config.severe_pressure_per1000 = 1010;
  1364. cache->config.aggressive_evict_per1000 = 990;
  1365. cache->config.healthy_size_per1000 = 980;
  1366. cache->config.evict_low_threshold_per1000 = 970;
  1367. cache->index = callocz(cache->config.partitions, sizeof(struct pgc_index));
  1368. for(size_t part = 0; part < cache->config.partitions ; part++)
  1369. rw_spinlock_init(&cache->index[part].rw_spinlock);
  1370. spinlock_init(&cache->hot.spinlock);
  1371. spinlock_init(&cache->dirty.spinlock);
  1372. spinlock_init(&cache->clean.spinlock);
  1373. cache->hot.flags = PGC_PAGE_HOT;
  1374. cache->hot.linked_list_in_sections_judy = true;
  1375. cache->hot.stats = &cache->stats.queues.hot;
  1376. cache->dirty.flags = PGC_PAGE_DIRTY;
  1377. cache->dirty.linked_list_in_sections_judy = true;
  1378. cache->dirty.stats = &cache->stats.queues.dirty;
  1379. cache->clean.flags = PGC_PAGE_CLEAN;
  1380. cache->clean.linked_list_in_sections_judy = false;
  1381. cache->clean.stats = &cache->stats.queues.clean;
  1382. pgc_section_pages_static_aral_init();
  1383. #ifdef PGC_WITH_ARAL
  1384. cache->aral = callocz(cache->config.partitions, sizeof(ARAL *));
  1385. for(size_t part = 0; part < cache->config.partitions ; part++) {
  1386. char buf[100 +1];
  1387. snprintfz(buf, 100, "%s[%zu]", name, part);
  1388. cache->aral[part] = aral_create(
  1389. buf,
  1390. sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page,
  1391. 0,
  1392. 16384,
  1393. aral_statistics(pgc_section_pages_aral),
  1394. NULL, NULL, false, false);
  1395. }
  1396. #endif
  1397. pointer_index_init(cache);
  1398. return cache;
  1399. }
  1400. struct aral_statistics *pgc_aral_statistics(void) {
  1401. return aral_statistics(pgc_section_pages_aral);
  1402. }
  1403. size_t pgc_aral_structures(void) {
  1404. return aral_structures(pgc_section_pages_aral);
  1405. }
  1406. size_t pgc_aral_overhead(void) {
  1407. return aral_overhead(pgc_section_pages_aral);
  1408. }
  1409. void pgc_flush_all_hot_and_dirty_pages(PGC *cache, Word_t section) {
  1410. all_hot_pages_to_dirty(cache, section);
  1411. // save all dirty pages to make them clean
  1412. flush_pages(cache, 0, section, true, true);
  1413. }
  1414. void pgc_destroy(PGC *cache) {
  1415. // convert all hot pages to dirty
  1416. all_hot_pages_to_dirty(cache, PGC_SECTION_ALL);
  1417. // save all dirty pages to make them clean
  1418. flush_pages(cache, 0, PGC_SECTION_ALL, true, true);
  1419. // free all unreferenced clean pages
  1420. free_all_unreferenced_clean_pages(cache);
  1421. if(PGC_REFERENCED_PAGES(cache))
  1422. netdata_log_error("DBENGINE CACHE: there are %zu referenced cache pages - leaving the cache allocated", PGC_REFERENCED_PAGES(cache));
  1423. else {
  1424. pointer_destroy_index(cache);
  1425. // for(size_t part = 0; part < cache->config.partitions ; part++)
  1426. // netdata_rwlock_destroy(&cache->index[part].rw_spinlock);
  1427. #ifdef PGC_WITH_ARAL
  1428. for(size_t part = 0; part < cache->config.partitions ; part++)
  1429. aral_destroy(cache->aral[part]);
  1430. freez(cache->aral);
  1431. #endif
  1432. freez(cache);
  1433. }
  1434. }
  1435. PGC_PAGE *pgc_page_add_and_acquire(PGC *cache, PGC_ENTRY entry, bool *added) {
  1436. return page_add(cache, &entry, added);
  1437. }
  1438. PGC_PAGE *pgc_page_dup(PGC *cache, PGC_PAGE *page) {
  1439. if(!page_acquire(cache, page))
  1440. fatal("DBENGINE CACHE: tried to dup a page that is not acquired!");
  1441. return page;
  1442. }
  1443. void pgc_page_release(PGC *cache, PGC_PAGE *page) {
  1444. page_release(cache, page, is_page_clean(page));
  1445. }
  1446. void pgc_page_hot_to_dirty_and_release(PGC *cache, PGC_PAGE *page) {
  1447. __atomic_add_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1448. //#ifdef NETDATA_INTERNAL_CHECKS
  1449. // page_transition_lock(cache, page);
  1450. // internal_fatal(!is_page_hot(page), "DBENGINE CACHE: called %s() but page is not hot", __FUNCTION__ );
  1451. // page_transition_unlock(cache, page);
  1452. //#endif
  1453. // make page dirty
  1454. page_set_dirty(cache, page, false);
  1455. // release the page
  1456. page_release(cache, page, true);
  1457. // page ptr may be invalid now
  1458. __atomic_sub_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1459. // flush, if we have to
  1460. if((cache->config.options & PGC_OPTIONS_FLUSH_PAGES_INLINE) || flushing_critical(cache)) {
  1461. flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL,
  1462. false, false);
  1463. }
  1464. }
  1465. bool pgc_page_to_clean_evict_or_release(PGC *cache, PGC_PAGE *page) {
  1466. bool ret;
  1467. __atomic_add_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1468. // prevent accesses from increasing the accesses counter
  1469. page_flag_set(page, PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES);
  1470. // zero the accesses counter
  1471. __atomic_store_n(&page->accesses, 0, __ATOMIC_RELEASE);
  1472. // if there are no other references to it, evict it immediately
  1473. if(make_acquired_page_clean_and_evict_or_page_release(cache, page)) {
  1474. __atomic_add_fetch(&cache->stats.hot_empty_pages_evicted_immediately, 1, __ATOMIC_RELAXED);
  1475. ret = true;
  1476. }
  1477. else {
  1478. __atomic_add_fetch(&cache->stats.hot_empty_pages_evicted_later, 1, __ATOMIC_RELAXED);
  1479. ret = false;
  1480. }
  1481. __atomic_sub_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
  1482. return ret;
  1483. }
  1484. Word_t pgc_page_section(PGC_PAGE *page) {
  1485. return page->section;
  1486. }
  1487. Word_t pgc_page_metric(PGC_PAGE *page) {
  1488. return page->metric_id;
  1489. }
  1490. time_t pgc_page_start_time_s(PGC_PAGE *page) {
  1491. return page->start_time_s;
  1492. }
  1493. time_t pgc_page_end_time_s(PGC_PAGE *page) {
  1494. return page->end_time_s;
  1495. }
  1496. time_t pgc_page_update_every_s(PGC_PAGE *page) {
  1497. return page->update_every_s;
  1498. }
  1499. time_t pgc_page_fix_update_every(PGC_PAGE *page, time_t update_every_s) {
  1500. if(page->update_every_s == 0)
  1501. page->update_every_s = (uint32_t) update_every_s;
  1502. return page->update_every_s;
  1503. }
  1504. time_t pgc_page_fix_end_time_s(PGC_PAGE *page, time_t end_time_s) {
  1505. page->end_time_s = end_time_s;
  1506. return page->end_time_s;
  1507. }
  1508. void *pgc_page_data(PGC_PAGE *page) {
  1509. return page->data;
  1510. }
  1511. void *pgc_page_custom_data(PGC *cache, PGC_PAGE *page) {
  1512. if(cache->config.additional_bytes_per_page)
  1513. return page->custom_data;
  1514. return NULL;
  1515. }
  1516. size_t pgc_page_data_size(PGC *cache, PGC_PAGE *page) {
  1517. return page_size_from_assumed_size(cache, page->assumed_size);
  1518. }
  1519. bool pgc_is_page_hot(PGC_PAGE *page) {
  1520. return is_page_hot(page);
  1521. }
  1522. bool pgc_is_page_dirty(PGC_PAGE *page) {
  1523. return is_page_dirty(page);
  1524. }
  1525. bool pgc_is_page_clean(PGC_PAGE *page) {
  1526. return is_page_clean(page);
  1527. }
  1528. void pgc_reset_hot_max(PGC *cache) {
  1529. size_t entries = __atomic_load_n(&cache->hot.stats->entries, __ATOMIC_RELAXED);
  1530. size_t size = __atomic_load_n(&cache->hot.stats->size, __ATOMIC_RELAXED);
  1531. __atomic_store_n(&cache->hot.stats->max_entries, entries, __ATOMIC_RELAXED);
  1532. __atomic_store_n(&cache->hot.stats->max_size, size, __ATOMIC_RELAXED);
  1533. size_t size_to_evict = 0;
  1534. cache_usage_per1000(cache, &size_to_evict);
  1535. evict_pages(cache, 0, 0, true, false);
  1536. }
  1537. void pgc_set_dynamic_target_cache_size_callback(PGC *cache, dynamic_target_cache_size_callback callback) {
  1538. cache->config.dynamic_target_size_cb = callback;
  1539. size_t size_to_evict = 0;
  1540. cache_usage_per1000(cache, &size_to_evict);
  1541. evict_pages(cache, 0, 0, true, false);
  1542. }
  1543. size_t pgc_get_current_cache_size(PGC *cache) {
  1544. cache_usage_per1000(cache, NULL);
  1545. return __atomic_load_n(&cache->stats.current_cache_size, __ATOMIC_RELAXED);
  1546. }
  1547. size_t pgc_get_wanted_cache_size(PGC *cache) {
  1548. cache_usage_per1000(cache, NULL);
  1549. return __atomic_load_n(&cache->stats.wanted_cache_size, __ATOMIC_RELAXED);
  1550. }
  1551. bool pgc_evict_pages(PGC *cache, size_t max_skip, size_t max_evict) {
  1552. bool under_pressure = cache_needs_space_aggressively(cache);
  1553. return evict_pages(cache,
  1554. under_pressure ? 0 : max_skip,
  1555. under_pressure ? 0 : max_evict,
  1556. true, false);
  1557. }
  1558. bool pgc_flush_pages(PGC *cache, size_t max_flushes) {
  1559. bool under_pressure = flushing_critical(cache);
  1560. return flush_pages(cache, under_pressure ? 0 : max_flushes, PGC_SECTION_ALL, true, false);
  1561. }
  1562. void pgc_page_hot_set_end_time_s(PGC *cache __maybe_unused, PGC_PAGE *page, time_t end_time_s) {
  1563. internal_fatal(!is_page_hot(page),
  1564. "DBENGINE CACHE: end_time_s update on non-hot page");
  1565. internal_fatal(end_time_s < __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
  1566. "DBENGINE CACHE: end_time_s is not bigger than existing");
  1567. __atomic_store_n(&page->end_time_s, end_time_s, __ATOMIC_RELAXED);
  1568. #ifdef PGC_COUNT_POINTS_COLLECTED
  1569. __atomic_add_fetch(&cache->stats.points_collected, 1, __ATOMIC_RELAXED);
  1570. #endif
  1571. }
  1572. PGC_PAGE *pgc_page_get_and_acquire(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method) {
  1573. return page_find_and_acquire(cache, section, metric_id, start_time_s, method);
  1574. }
  1575. struct pgc_statistics pgc_get_statistics(PGC *cache) {
  1576. // FIXME - get the statistics atomically
  1577. return cache->stats;
  1578. }
  1579. size_t pgc_hot_and_dirty_entries(PGC *cache) {
  1580. size_t entries = 0;
  1581. entries += __atomic_load_n(&cache->hot.stats->entries, __ATOMIC_RELAXED);
  1582. entries += __atomic_load_n(&cache->dirty.stats->entries, __ATOMIC_RELAXED);
  1583. entries += __atomic_load_n(&cache->stats.flushing_entries, __ATOMIC_RELAXED);
  1584. entries += __atomic_load_n(&cache->stats.hot2dirty_entries, __ATOMIC_RELAXED);
  1585. return entries;
  1586. }
  1587. void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_fileno, uint8_t type, migrate_to_v2_callback cb, void *data) {
  1588. __atomic_add_fetch(&rrdeng_cache_efficiency_stats.journal_v2_indexing_started, 1, __ATOMIC_RELAXED);
  1589. __atomic_add_fetch(&cache->stats.workers_jv2_flush, 1, __ATOMIC_RELAXED);
  1590. pgc_ll_lock(cache, &cache->hot);
  1591. Pvoid_t JudyL_metrics = NULL;
  1592. Pvoid_t JudyL_extents_pos = NULL;
  1593. size_t count_of_unique_extents = 0;
  1594. size_t count_of_unique_metrics = 0;
  1595. size_t count_of_unique_pages = 0;
  1596. size_t master_extent_index_id = 0;
  1597. Pvoid_t *section_pages_pptr = JudyLGet(cache->hot.sections_judy, section, PJE0);
  1598. if(!section_pages_pptr) {
  1599. pgc_ll_unlock(cache, &cache->hot);
  1600. return;
  1601. }
  1602. struct section_pages *sp = *section_pages_pptr;
  1603. if(!spinlock_trylock(&sp->migration_to_v2_spinlock)) {
  1604. netdata_log_info("DBENGINE: migration to journal v2 for datafile %u is postponed, another jv2 indexer is already running for this section", datafile_fileno);
  1605. pgc_ll_unlock(cache, &cache->hot);
  1606. return;
  1607. }
  1608. ARAL *ar_mi = aral_by_size_acquire(sizeof(struct jv2_metrics_info));
  1609. ARAL *ar_pi = aral_by_size_acquire(sizeof(struct jv2_page_info));
  1610. ARAL *ar_ei = aral_by_size_acquire(sizeof(struct jv2_extents_info));
  1611. for(PGC_PAGE *page = sp->base; page ; page = page->link.next) {
  1612. struct extent_io_data *xio = (struct extent_io_data *)page->custom_data;
  1613. if(xio->fileno != datafile_fileno) continue;
  1614. if(page_flag_check(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2)) {
  1615. internal_fatal(true, "Migration to journal v2: page has already been migrated to v2");
  1616. continue;
  1617. }
  1618. if(!page_transition_trylock(cache, page)) {
  1619. internal_fatal(true, "Migration to journal v2: cannot get page transition lock");
  1620. continue;
  1621. }
  1622. if(!page_acquire(cache, page)) {
  1623. internal_fatal(true, "Migration to journal v2: cannot acquire page for migration to v2");
  1624. continue;
  1625. }
  1626. page_flag_set(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2);
  1627. pgc_ll_unlock(cache, &cache->hot);
  1628. // update the extents JudyL
  1629. size_t current_extent_index_id;
  1630. Pvoid_t *PValue = JudyLIns(&JudyL_extents_pos, xio->pos, PJE0);
  1631. if(!PValue || *PValue == PJERR)
  1632. fatal("Corrupted JudyL extents pos");
  1633. struct jv2_extents_info *ei;
  1634. if(!*PValue) {
  1635. ei = aral_mallocz(ar_ei); // callocz(1, sizeof(struct jv2_extents_info));
  1636. ei->pos = xio->pos;
  1637. ei->bytes = xio->bytes;
  1638. ei->number_of_pages = 1;
  1639. ei->index = master_extent_index_id++;
  1640. *PValue = ei;
  1641. count_of_unique_extents++;
  1642. }
  1643. else {
  1644. ei = *PValue;
  1645. ei->number_of_pages++;
  1646. }
  1647. current_extent_index_id = ei->index;
  1648. // update the metrics JudyL
  1649. PValue = JudyLIns(&JudyL_metrics, page->metric_id, PJE0);
  1650. if(!PValue || *PValue == PJERR)
  1651. fatal("Corrupted JudyL metrics");
  1652. struct jv2_metrics_info *mi;
  1653. if(!*PValue) {
  1654. mi = aral_mallocz(ar_mi); // callocz(1, sizeof(struct jv2_metrics_info));
  1655. mi->uuid = mrg_metric_uuid(main_mrg, (METRIC *)page->metric_id);
  1656. mi->first_time_s = page->start_time_s;
  1657. mi->last_time_s = page->end_time_s;
  1658. mi->number_of_pages = 1;
  1659. mi->page_list_header = 0;
  1660. mi->JudyL_pages_by_start_time = NULL;
  1661. *PValue = mi;
  1662. count_of_unique_metrics++;
  1663. }
  1664. else {
  1665. mi = *PValue;
  1666. mi->number_of_pages++;
  1667. if(page->start_time_s < mi->first_time_s)
  1668. mi->first_time_s = page->start_time_s;
  1669. if(page->end_time_s > mi->last_time_s)
  1670. mi->last_time_s = page->end_time_s;
  1671. }
  1672. PValue = JudyLIns(&mi->JudyL_pages_by_start_time, page->start_time_s, PJE0);
  1673. if(!PValue || *PValue == PJERR)
  1674. fatal("Corrupted JudyL metric pages");
  1675. if(!*PValue) {
  1676. struct jv2_page_info *pi = aral_mallocz(ar_pi); // callocz(1, (sizeof(struct jv2_page_info)));
  1677. pi->start_time_s = page->start_time_s;
  1678. pi->end_time_s = page->end_time_s;
  1679. pi->update_every_s = page->update_every_s;
  1680. pi->page_length = page_size_from_assumed_size(cache, page->assumed_size);
  1681. pi->page = page;
  1682. pi->extent_index = current_extent_index_id;
  1683. pi->custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL;
  1684. *PValue = pi;
  1685. count_of_unique_pages++;
  1686. }
  1687. else {
  1688. // impossible situation
  1689. internal_fatal(true, "Page is already in JudyL metric pages");
  1690. page_flag_clear(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2);
  1691. page_transition_unlock(cache, page);
  1692. page_release(cache, page, false);
  1693. }
  1694. pgc_ll_lock(cache, &cache->hot);
  1695. }
  1696. spinlock_unlock(&sp->migration_to_v2_spinlock);
  1697. pgc_ll_unlock(cache, &cache->hot);
  1698. // callback
  1699. cb(section, datafile_fileno, type, JudyL_metrics, JudyL_extents_pos, count_of_unique_extents, count_of_unique_metrics, count_of_unique_pages, data);
  1700. {
  1701. Pvoid_t *PValue1;
  1702. bool metric_id_first = true;
  1703. Word_t metric_id = 0;
  1704. while ((PValue1 = JudyLFirstThenNext(JudyL_metrics, &metric_id, &metric_id_first))) {
  1705. struct jv2_metrics_info *mi = *PValue1;
  1706. Pvoid_t *PValue2;
  1707. bool start_time_first = true;
  1708. Word_t start_time = 0;
  1709. while ((PValue2 = JudyLFirstThenNext(mi->JudyL_pages_by_start_time, &start_time, &start_time_first))) {
  1710. struct jv2_page_info *pi = *PValue2;
  1711. page_transition_unlock(cache, pi->page);
  1712. pgc_page_hot_to_dirty_and_release(cache, pi->page);
  1713. // make_acquired_page_clean_and_evict_or_page_release(cache, pi->page);
  1714. aral_freez(ar_pi, pi);
  1715. }
  1716. JudyLFreeArray(&mi->JudyL_pages_by_start_time, PJE0);
  1717. aral_freez(ar_mi, mi);
  1718. }
  1719. JudyLFreeArray(&JudyL_metrics, PJE0);
  1720. }
  1721. {
  1722. Pvoid_t *PValue;
  1723. bool extent_pos_first = true;
  1724. Word_t extent_pos = 0;
  1725. while ((PValue = JudyLFirstThenNext(JudyL_extents_pos, &extent_pos, &extent_pos_first))) {
  1726. struct jv2_extents_info *ei = *PValue;
  1727. aral_freez(ar_ei, ei);
  1728. }
  1729. JudyLFreeArray(&JudyL_extents_pos, PJE0);
  1730. }
  1731. aral_by_size_release(ar_ei);
  1732. aral_by_size_release(ar_pi);
  1733. aral_by_size_release(ar_mi);
  1734. __atomic_sub_fetch(&cache->stats.workers_jv2_flush, 1, __ATOMIC_RELAXED);
  1735. }
  1736. static bool match_page_data(PGC_PAGE *page, void *data) {
  1737. return (page->data == data);
  1738. }
  1739. void pgc_open_evict_clean_pages_of_datafile(PGC *cache, struct rrdengine_datafile *datafile) {
  1740. evict_pages_with_filter(cache, 0, 0, true, true, match_page_data, datafile);
  1741. }
  1742. size_t pgc_count_clean_pages_having_data_ptr(PGC *cache, Word_t section, void *ptr) {
  1743. size_t found = 0;
  1744. pgc_ll_lock(cache, &cache->clean);
  1745. for(PGC_PAGE *page = cache->clean.base; page ;page = page->link.next)
  1746. found += (page->data == ptr && page->section == section) ? 1 : 0;
  1747. pgc_ll_unlock(cache, &cache->clean);
  1748. return found;
  1749. }
  1750. size_t pgc_count_hot_pages_having_data_ptr(PGC *cache, Word_t section, void *ptr) {
  1751. size_t found = 0;
  1752. pgc_ll_lock(cache, &cache->hot);
  1753. Pvoid_t *section_pages_pptr = JudyLGet(cache->hot.sections_judy, section, PJE0);
  1754. if(section_pages_pptr) {
  1755. struct section_pages *sp = *section_pages_pptr;
  1756. for(PGC_PAGE *page = sp->base; page ;page = page->link.next)
  1757. found += (page->data == ptr) ? 1 : 0;
  1758. }
  1759. pgc_ll_unlock(cache, &cache->hot);
  1760. return found;
  1761. }
  1762. // ----------------------------------------------------------------------------
  1763. // unittest
  1764. static void unittest_free_clean_page_callback(PGC *cache __maybe_unused, PGC_ENTRY entry __maybe_unused) {
  1765. ;
  1766. }
  1767. static void unittest_save_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused) {
  1768. ;
  1769. }
  1770. #ifdef PGC_STRESS_TEST
  1771. struct {
  1772. bool stop;
  1773. PGC *cache;
  1774. PGC_PAGE **metrics;
  1775. size_t clean_metrics;
  1776. size_t hot_metrics;
  1777. time_t first_time_t;
  1778. time_t last_time_t;
  1779. size_t cache_size;
  1780. size_t query_threads;
  1781. size_t collect_threads;
  1782. size_t partitions;
  1783. size_t points_per_page;
  1784. time_t time_per_collection_ut;
  1785. time_t time_per_query_ut;
  1786. time_t time_per_flush_ut;
  1787. PGC_OPTIONS options;
  1788. char rand_statebufs[1024];
  1789. struct random_data *random_data;
  1790. } pgc_uts = {
  1791. .stop = false,
  1792. .metrics = NULL,
  1793. .clean_metrics = 100000,
  1794. .hot_metrics = 1000000,
  1795. .first_time_t = 100000000,
  1796. .last_time_t = 0,
  1797. .cache_size = 0, // get the default (8MB)
  1798. .collect_threads = 16,
  1799. .query_threads = 16,
  1800. .partitions = 0, // get the default (system cpus)
  1801. .options = PGC_OPTIONS_AUTOSCALE,/* PGC_OPTIONS_FLUSH_PAGES_INLINE | PGC_OPTIONS_EVICT_PAGES_INLINE,*/
  1802. .points_per_page = 10,
  1803. .time_per_collection_ut = 1000000,
  1804. .time_per_query_ut = 250,
  1805. .time_per_flush_ut = 100,
  1806. .rand_statebufs = {},
  1807. .random_data = NULL,
  1808. };
  1809. void *unittest_stress_test_collector(void *ptr) {
  1810. size_t id = *((size_t *)ptr);
  1811. size_t metric_start = pgc_uts.clean_metrics;
  1812. size_t metric_end = pgc_uts.clean_metrics + pgc_uts.hot_metrics;
  1813. size_t number_of_metrics = metric_end - metric_start;
  1814. size_t per_collector_metrics = number_of_metrics / pgc_uts.collect_threads;
  1815. metric_start = metric_start + per_collector_metrics * id + 1;
  1816. metric_end = metric_start + per_collector_metrics - 1;
  1817. time_t start_time_t = pgc_uts.first_time_t + 1;
  1818. heartbeat_t hb;
  1819. heartbeat_init(&hb);
  1820. while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1821. // netdata_log_info("COLLECTOR %zu: collecting metrics %zu to %zu, from %ld to %lu", id, metric_start, metric_end, start_time_t, start_time_t + pgc_uts.points_per_page);
  1822. netdata_thread_disable_cancelability();
  1823. for (size_t i = metric_start; i < metric_end; i++) {
  1824. bool added;
  1825. pgc_uts.metrics[i] = pgc_page_add_and_acquire(pgc_uts.cache, (PGC_ENTRY) {
  1826. .section = 1,
  1827. .metric_id = i,
  1828. .start_time_t = start_time_t,
  1829. .end_time_t = start_time_t,
  1830. .update_every = 1,
  1831. .size = 4096,
  1832. .data = NULL,
  1833. .hot = true,
  1834. }, &added);
  1835. if(!pgc_is_page_hot(pgc_uts.metrics[i]) || !added) {
  1836. pgc_page_release(pgc_uts.cache, pgc_uts.metrics[i]);
  1837. pgc_uts.metrics[i] = NULL;
  1838. }
  1839. }
  1840. time_t end_time_t = start_time_t + (time_t)pgc_uts.points_per_page;
  1841. while(++start_time_t <= end_time_t && !__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1842. heartbeat_next(&hb, pgc_uts.time_per_collection_ut);
  1843. for (size_t i = metric_start; i < metric_end; i++) {
  1844. if(pgc_uts.metrics[i])
  1845. pgc_page_hot_set_end_time_t(pgc_uts.cache, pgc_uts.metrics[i], start_time_t);
  1846. }
  1847. __atomic_store_n(&pgc_uts.last_time_t, start_time_t, __ATOMIC_RELAXED);
  1848. }
  1849. for (size_t i = metric_start; i < metric_end; i++) {
  1850. if (pgc_uts.metrics[i]) {
  1851. if(i % 10 == 0)
  1852. pgc_page_to_clean_evict_or_release(pgc_uts.cache, pgc_uts.metrics[i]);
  1853. else
  1854. pgc_page_hot_to_dirty_and_release(pgc_uts.cache, pgc_uts.metrics[i]);
  1855. }
  1856. }
  1857. netdata_thread_enable_cancelability();
  1858. }
  1859. return ptr;
  1860. }
  1861. void *unittest_stress_test_queries(void *ptr) {
  1862. size_t id = *((size_t *)ptr);
  1863. struct random_data *random_data = &pgc_uts.random_data[id];
  1864. size_t start = 0;
  1865. size_t end = pgc_uts.clean_metrics + pgc_uts.hot_metrics;
  1866. while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1867. netdata_thread_disable_cancelability();
  1868. int32_t random_number;
  1869. random_r(random_data, &random_number);
  1870. size_t metric_id = random_number % (end - start);
  1871. time_t start_time_t = pgc_uts.first_time_t;
  1872. time_t end_time_t = __atomic_load_n(&pgc_uts.last_time_t, __ATOMIC_RELAXED);
  1873. if(end_time_t <= start_time_t)
  1874. end_time_t = start_time_t + 1;
  1875. size_t pages = (end_time_t - start_time_t) / pgc_uts.points_per_page + 1;
  1876. PGC_PAGE *array[pages];
  1877. for(size_t i = 0; i < pages ;i++)
  1878. array[i] = NULL;
  1879. // find the pages the cache has
  1880. for(size_t i = 0; i < pages ;i++) {
  1881. time_t page_start_time = start_time_t + (time_t)(i * pgc_uts.points_per_page);
  1882. array[i] = pgc_page_get_and_acquire(pgc_uts.cache, 1, metric_id,
  1883. page_start_time, (i < pages - 1)?PGC_SEARCH_EXACT:PGC_SEARCH_CLOSEST);
  1884. }
  1885. // load the rest of the pages
  1886. for(size_t i = 0; i < pages ;i++) {
  1887. if(array[i]) continue;
  1888. time_t page_start_time = start_time_t + (time_t)(i * pgc_uts.points_per_page);
  1889. array[i] = pgc_page_add_and_acquire(pgc_uts.cache, (PGC_ENTRY) {
  1890. .section = 1,
  1891. .metric_id = metric_id,
  1892. .start_time_t = page_start_time,
  1893. .end_time_t = page_start_time + (time_t)pgc_uts.points_per_page,
  1894. .update_every = 1,
  1895. .size = 4096,
  1896. .data = NULL,
  1897. .hot = false,
  1898. }, NULL);
  1899. }
  1900. // do the query
  1901. // ...
  1902. struct timespec work_duration = {.tv_sec = 0, .tv_nsec = pgc_uts.time_per_query_ut * NSEC_PER_USEC };
  1903. nanosleep(&work_duration, NULL);
  1904. // release the pages
  1905. for(size_t i = 0; i < pages ;i++) {
  1906. if(!array[i]) continue;
  1907. pgc_page_release(pgc_uts.cache, array[i]);
  1908. array[i] = NULL;
  1909. }
  1910. netdata_thread_enable_cancelability();
  1911. }
  1912. return ptr;
  1913. }
  1914. void *unittest_stress_test_service(void *ptr) {
  1915. heartbeat_t hb;
  1916. heartbeat_init(&hb);
  1917. while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
  1918. heartbeat_next(&hb, 1 * USEC_PER_SEC);
  1919. pgc_flush_pages(pgc_uts.cache, 1000);
  1920. pgc_evict_pages(pgc_uts.cache, 0, 0);
  1921. }
  1922. return ptr;
  1923. }
  1924. static void unittest_stress_test_save_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused) {
  1925. // netdata_log_info("SAVE %zu pages", entries);
  1926. if(!pgc_uts.stop) {
  1927. usec_t t = pgc_uts.time_per_flush_ut;
  1928. if(t > 0) {
  1929. struct timespec work_duration = {
  1930. .tv_sec = t / USEC_PER_SEC,
  1931. .tv_nsec = (long) ((t % USEC_PER_SEC) * NSEC_PER_USEC)
  1932. };
  1933. nanosleep(&work_duration, NULL);
  1934. }
  1935. }
  1936. }
  1937. void unittest_stress_test(void) {
  1938. pgc_uts.cache = pgc_create(pgc_uts.cache_size * 1024 * 1024,
  1939. unittest_free_clean_page_callback,
  1940. 64, unittest_stress_test_save_dirty_page_callback,
  1941. 1000, 10000, 1,
  1942. pgc_uts.options, pgc_uts.partitions, 0);
  1943. pgc_uts.metrics = callocz(pgc_uts.clean_metrics + pgc_uts.hot_metrics, sizeof(PGC_PAGE *));
  1944. pthread_t service_thread;
  1945. netdata_thread_create(&service_thread, "SERVICE",
  1946. NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
  1947. unittest_stress_test_service, NULL);
  1948. pthread_t collect_threads[pgc_uts.collect_threads];
  1949. size_t collect_thread_ids[pgc_uts.collect_threads];
  1950. for(size_t i = 0; i < pgc_uts.collect_threads ;i++) {
  1951. collect_thread_ids[i] = i;
  1952. char buffer[100 + 1];
  1953. snprintfz(buffer, 100, "COLLECT_%zu", i);
  1954. netdata_thread_create(&collect_threads[i], buffer,
  1955. NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
  1956. unittest_stress_test_collector, &collect_thread_ids[i]);
  1957. }
  1958. pthread_t queries_threads[pgc_uts.query_threads];
  1959. size_t query_thread_ids[pgc_uts.query_threads];
  1960. pgc_uts.random_data = callocz(pgc_uts.query_threads, sizeof(struct random_data));
  1961. for(size_t i = 0; i < pgc_uts.query_threads ;i++) {
  1962. query_thread_ids[i] = i;
  1963. char buffer[100 + 1];
  1964. snprintfz(buffer, 100, "QUERY_%zu", i);
  1965. initstate_r(1, pgc_uts.rand_statebufs, 1024, &pgc_uts.random_data[i]);
  1966. netdata_thread_create(&queries_threads[i], buffer,
  1967. NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
  1968. unittest_stress_test_queries, &query_thread_ids[i]);
  1969. }
  1970. heartbeat_t hb;
  1971. heartbeat_init(&hb);
  1972. struct {
  1973. size_t entries;
  1974. size_t added;
  1975. size_t deleted;
  1976. size_t referenced;
  1977. size_t hot_entries;
  1978. size_t hot_added;
  1979. size_t hot_deleted;
  1980. size_t dirty_entries;
  1981. size_t dirty_added;
  1982. size_t dirty_deleted;
  1983. size_t clean_entries;
  1984. size_t clean_added;
  1985. size_t clean_deleted;
  1986. size_t searches_exact;
  1987. size_t searches_exact_hits;
  1988. size_t searches_closest;
  1989. size_t searches_closest_hits;
  1990. size_t collections;
  1991. size_t events_cache_under_severe_pressure;
  1992. size_t events_cache_needs_space_90;
  1993. size_t events_flush_critical;
  1994. } stats = {}, old_stats = {};
  1995. for(int i = 0; i < 86400 ;i++) {
  1996. heartbeat_next(&hb, 1 * USEC_PER_SEC);
  1997. old_stats = stats;
  1998. stats.entries = __atomic_load_n(&pgc_uts.cache->stats.entries, __ATOMIC_RELAXED);
  1999. stats.added = __atomic_load_n(&pgc_uts.cache->stats.added_entries, __ATOMIC_RELAXED);
  2000. stats.deleted = __atomic_load_n(&pgc_uts.cache->stats.removed_entries, __ATOMIC_RELAXED);
  2001. stats.referenced = __atomic_load_n(&pgc_uts.cache->stats.referenced_entries, __ATOMIC_RELAXED);
  2002. stats.hot_entries = __atomic_load_n(&pgc_uts.cache->hot.stats->entries, __ATOMIC_RELAXED);
  2003. stats.hot_added = __atomic_load_n(&pgc_uts.cache->hot.stats->added_entries, __ATOMIC_RELAXED);
  2004. stats.hot_deleted = __atomic_load_n(&pgc_uts.cache->hot.stats->removed_entries, __ATOMIC_RELAXED);
  2005. stats.dirty_entries = __atomic_load_n(&pgc_uts.cache->dirty.stats->entries, __ATOMIC_RELAXED);
  2006. stats.dirty_added = __atomic_load_n(&pgc_uts.cache->dirty.stats->added_entries, __ATOMIC_RELAXED);
  2007. stats.dirty_deleted = __atomic_load_n(&pgc_uts.cache->dirty.stats->removed_entries, __ATOMIC_RELAXED);
  2008. stats.clean_entries = __atomic_load_n(&pgc_uts.cache->clean.stats->entries, __ATOMIC_RELAXED);
  2009. stats.clean_added = __atomic_load_n(&pgc_uts.cache->clean.stats->added_entries, __ATOMIC_RELAXED);
  2010. stats.clean_deleted = __atomic_load_n(&pgc_uts.cache->clean.stats->removed_entries, __ATOMIC_RELAXED);
  2011. stats.searches_exact = __atomic_load_n(&pgc_uts.cache->stats.searches_exact, __ATOMIC_RELAXED);
  2012. stats.searches_exact_hits = __atomic_load_n(&pgc_uts.cache->stats.searches_exact_hits, __ATOMIC_RELAXED);
  2013. stats.searches_closest = __atomic_load_n(&pgc_uts.cache->stats.searches_closest, __ATOMIC_RELAXED);
  2014. stats.searches_closest_hits = __atomic_load_n(&pgc_uts.cache->stats.searches_closest_hits, __ATOMIC_RELAXED);
  2015. stats.events_cache_under_severe_pressure = __atomic_load_n(&pgc_uts.cache->stats.events_cache_under_severe_pressure, __ATOMIC_RELAXED);
  2016. stats.events_cache_needs_space_90 = __atomic_load_n(&pgc_uts.cache->stats.events_cache_needs_space_aggressively, __ATOMIC_RELAXED);
  2017. stats.events_flush_critical = __atomic_load_n(&pgc_uts.cache->stats.events_flush_critical, __ATOMIC_RELAXED);
  2018. size_t searches_exact = stats.searches_exact - old_stats.searches_exact;
  2019. size_t searches_closest = stats.searches_closest - old_stats.searches_closest;
  2020. size_t hit_exact = stats.searches_exact_hits - old_stats.searches_exact_hits;
  2021. size_t hit_closest = stats.searches_closest_hits - old_stats.searches_closest_hits;
  2022. double hit_exact_pc = (searches_exact > 0) ? (double)hit_exact * 100.0 / (double)searches_exact : 0.0;
  2023. double hit_closest_pc = (searches_closest > 0) ? (double)hit_closest * 100.0 / (double)searches_closest : 0.0;
  2024. #ifdef PGC_COUNT_POINTS_COLLECTED
  2025. stats.collections = __atomic_load_n(&pgc_uts.cache->stats.points_collected, __ATOMIC_RELAXED);
  2026. #endif
  2027. char *cache_status = "N";
  2028. if(stats.events_cache_under_severe_pressure > old_stats.events_cache_under_severe_pressure)
  2029. cache_status = "F";
  2030. else if(stats.events_cache_needs_space_90 > old_stats.events_cache_needs_space_90)
  2031. cache_status = "f";
  2032. char *flushing_status = "N";
  2033. if(stats.events_flush_critical > old_stats.events_flush_critical)
  2034. flushing_status = "F";
  2035. netdata_log_info("PGS %5zuk +%4zuk/-%4zuk "
  2036. "| RF %5zuk "
  2037. "| HOT %5zuk +%4zuk -%4zuk "
  2038. "| DRT %s %5zuk +%4zuk -%4zuk "
  2039. "| CLN %s %5zuk +%4zuk -%4zuk "
  2040. "| SRCH %4zuk %4zuk, HIT %4.1f%% %4.1f%% "
  2041. #ifdef PGC_COUNT_POINTS_COLLECTED
  2042. "| CLCT %8.4f Mps"
  2043. #endif
  2044. , stats.entries / 1000
  2045. , (stats.added - old_stats.added) / 1000, (stats.deleted - old_stats.deleted) / 1000
  2046. , stats.referenced / 1000
  2047. , stats.hot_entries / 1000, (stats.hot_added - old_stats.hot_added) / 1000, (stats.hot_deleted - old_stats.hot_deleted) / 1000
  2048. , flushing_status
  2049. , stats.dirty_entries / 1000
  2050. , (stats.dirty_added - old_stats.dirty_added) / 1000, (stats.dirty_deleted - old_stats.dirty_deleted) / 1000
  2051. , cache_status
  2052. , stats.clean_entries / 1000
  2053. , (stats.clean_added - old_stats.clean_added) / 1000, (stats.clean_deleted - old_stats.clean_deleted) / 1000
  2054. , searches_exact / 1000, searches_closest / 1000
  2055. , hit_exact_pc, hit_closest_pc
  2056. #ifdef PGC_COUNT_POINTS_COLLECTED
  2057. , (double)(stats.collections - old_stats.collections) / 1000.0 / 1000.0
  2058. #endif
  2059. );
  2060. }
  2061. netdata_log_info("Waiting for threads to stop...");
  2062. __atomic_store_n(&pgc_uts.stop, true, __ATOMIC_RELAXED);
  2063. netdata_thread_join(service_thread, NULL);
  2064. for(size_t i = 0; i < pgc_uts.collect_threads ;i++)
  2065. netdata_thread_join(collect_threads[i],NULL);
  2066. for(size_t i = 0; i < pgc_uts.query_threads ;i++)
  2067. netdata_thread_join(queries_threads[i],NULL);
  2068. pgc_destroy(pgc_uts.cache);
  2069. freez(pgc_uts.metrics);
  2070. freez(pgc_uts.random_data);
  2071. }
  2072. #endif
  2073. int pgc_unittest(void) {
  2074. PGC *cache = pgc_create("test",
  2075. 32 * 1024 * 1024, unittest_free_clean_page_callback,
  2076. 64, NULL, unittest_save_dirty_page_callback,
  2077. 10, 10, 1000, 10,
  2078. PGC_OPTIONS_DEFAULT, 1, 11);
  2079. // FIXME - unit tests
  2080. // - add clean page
  2081. // - add clean page again (should not add it)
  2082. // - release page (should decrement counters)
  2083. // - add hot page
  2084. // - add hot page again (should not add it)
  2085. // - turn hot page to dirty, with and without a reference counter to it
  2086. // - dirty pages are saved once there are enough of them
  2087. // - find page exact
  2088. // - find page (should return last)
  2089. // - find page (should return next)
  2090. // - page cache full (should evict)
  2091. // - on destroy, turn hot pages to dirty and save them
  2092. PGC_PAGE *page1 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
  2093. .section = 1,
  2094. .metric_id = 10,
  2095. .start_time_s = 100,
  2096. .end_time_s = 1000,
  2097. .size = 4096,
  2098. .data = NULL,
  2099. .hot = false,
  2100. .custom_data = (uint8_t *)"0123456789",
  2101. }, NULL);
  2102. if(strcmp(pgc_page_custom_data(cache, page1), "0123456789") != 0)
  2103. fatal("custom data do not work");
  2104. memcpy(pgc_page_custom_data(cache, page1), "ABCDEFGHIJ", 11);
  2105. if(strcmp(pgc_page_custom_data(cache, page1), "ABCDEFGHIJ") != 0)
  2106. fatal("custom data do not work");
  2107. pgc_page_release(cache, page1);
  2108. PGC_PAGE *page2 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
  2109. .section = 2,
  2110. .metric_id = 10,
  2111. .start_time_s = 1001,
  2112. .end_time_s = 2000,
  2113. .size = 4096,
  2114. .data = NULL,
  2115. .hot = true,
  2116. }, NULL);
  2117. pgc_page_hot_set_end_time_s(cache, page2, 2001);
  2118. pgc_page_hot_to_dirty_and_release(cache, page2);
  2119. PGC_PAGE *page3 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
  2120. .section = 3,
  2121. .metric_id = 10,
  2122. .start_time_s = 1001,
  2123. .end_time_s = 2000,
  2124. .size = 4096,
  2125. .data = NULL,
  2126. .hot = true,
  2127. }, NULL);
  2128. pgc_page_hot_set_end_time_s(cache, page3, 2001);
  2129. pgc_page_hot_to_dirty_and_release(cache, page3);
  2130. pgc_destroy(cache);
  2131. #ifdef PGC_STRESS_TEST
  2132. unittest_stress_test();
  2133. #endif
  2134. return 0;
  2135. }