aral.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089
  1. #include "../libnetdata.h"
  2. #include "aral.h"
  3. #ifdef NETDATA_TRACE_ALLOCATIONS
  4. #define TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS , const char *file, const char *function, size_t line
  5. #define TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS , file, function, line
  6. #else
  7. #define TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS
  8. #define TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS
  9. #endif
  10. #define ARAL_FREE_PAGES_DELTA_TO_REARRANGE_LIST 5
  11. // max file size
  12. #define ARAL_MAX_PAGE_SIZE_MMAP (1*1024*1024*1024)
  13. // max malloc size
  14. // optimal at current versions of libc is up to 256k
  15. // ideal to have the same overhead as libc is 4k
  16. #define ARAL_MAX_PAGE_SIZE_MALLOC (65*1024)
  17. typedef struct aral_free {
  18. size_t size;
  19. struct aral_free *next;
  20. } ARAL_FREE;
  21. typedef struct aral_page {
  22. size_t size; // the allocation size of the page
  23. const char *filename;
  24. uint8_t *data;
  25. uint32_t free_elements_to_move_first;
  26. uint32_t max_elements; // the number of elements that can fit on this page
  27. struct {
  28. uint32_t used_elements; // the number of used elements on this page
  29. uint32_t free_elements; // the number of free elements on this page
  30. struct aral_page *prev; // the prev page on the list
  31. struct aral_page *next; // the next page on the list
  32. } aral_lock;
  33. struct {
  34. SPINLOCK spinlock;
  35. ARAL_FREE *list;
  36. } free;
  37. } ARAL_PAGE;
  38. typedef enum {
  39. ARAL_LOCKLESS = (1 << 0),
  40. ARAL_DEFRAGMENT = (1 << 1),
  41. ARAL_ALLOCATED_STATS = (1 << 2),
  42. } ARAL_OPTIONS;
  43. struct aral {
  44. struct {
  45. char name[ARAL_MAX_NAME + 1];
  46. ARAL_OPTIONS options;
  47. size_t element_size; // calculated to take into account ARAL overheads
  48. size_t max_allocation_size; // calculated in bytes
  49. size_t max_page_elements; // calculated
  50. size_t page_ptr_offset; // calculated
  51. size_t natural_page_size; // calculated
  52. size_t initial_page_elements;
  53. size_t requested_element_size;
  54. size_t requested_max_page_size;
  55. struct {
  56. bool enabled;
  57. const char *filename;
  58. char **cache_dir;
  59. } mmap;
  60. } config;
  61. struct {
  62. SPINLOCK spinlock;
  63. size_t file_number; // for mmap
  64. struct aral_page *pages; // linked list of pages
  65. size_t user_malloc_operations;
  66. size_t user_free_operations;
  67. size_t defragment_operations;
  68. size_t defragment_linked_list_traversals;
  69. } aral_lock;
  70. struct {
  71. SPINLOCK spinlock;
  72. size_t allocating_elements; // currently allocating elements
  73. size_t allocation_size; // current / next allocation size
  74. } adders;
  75. struct {
  76. size_t allocators; // the number of threads currently trying to allocate memory
  77. } atomic;
  78. struct aral_statistics *stats;
  79. };
  80. size_t aral_structures_from_stats(struct aral_statistics *stats) {
  81. return __atomic_load_n(&stats->structures.allocated_bytes, __ATOMIC_RELAXED);
  82. }
  83. size_t aral_overhead_from_stats(struct aral_statistics *stats) {
  84. return __atomic_load_n(&stats->malloc.allocated_bytes, __ATOMIC_RELAXED) -
  85. __atomic_load_n(&stats->malloc.used_bytes, __ATOMIC_RELAXED);
  86. }
  87. size_t aral_overhead(ARAL *ar) {
  88. return aral_overhead_from_stats(ar->stats);
  89. }
  90. size_t aral_structures(ARAL *ar) {
  91. return aral_structures_from_stats(ar->stats);
  92. }
  93. struct aral_statistics *aral_statistics(ARAL *ar) {
  94. return ar->stats;
  95. }
  96. #define ARAL_NATURAL_ALIGNMENT (sizeof(uintptr_t) * 2)
  97. static inline size_t natural_alignment(size_t size, size_t alignment) {
  98. if(unlikely(size % alignment))
  99. size = size + alignment - (size % alignment);
  100. return size;
  101. }
  102. static size_t aral_align_alloc_size(ARAL *ar, uint64_t size) {
  103. if(size % ar->config.natural_page_size)
  104. size += ar->config.natural_page_size - (size % ar->config.natural_page_size) ;
  105. if(size % ar->config.element_size)
  106. size -= size % ar->config.element_size;
  107. return size;
  108. }
  109. static inline void aral_lock(ARAL *ar) {
  110. if(likely(!(ar->config.options & ARAL_LOCKLESS)))
  111. spinlock_lock(&ar->aral_lock.spinlock);
  112. }
  113. static inline void aral_unlock(ARAL *ar) {
  114. if(likely(!(ar->config.options & ARAL_LOCKLESS)))
  115. spinlock_unlock(&ar->aral_lock.spinlock);
  116. }
  117. static inline void aral_page_free_lock(ARAL *ar, ARAL_PAGE *page) {
  118. if(likely(!(ar->config.options & ARAL_LOCKLESS)))
  119. spinlock_lock(&page->free.spinlock);
  120. }
  121. static inline void aral_page_free_unlock(ARAL *ar, ARAL_PAGE *page) {
  122. if(likely(!(ar->config.options & ARAL_LOCKLESS)))
  123. spinlock_unlock(&page->free.spinlock);
  124. }
  125. static inline bool aral_adders_trylock(ARAL *ar) {
  126. if(likely(!(ar->config.options & ARAL_LOCKLESS)))
  127. return spinlock_trylock(&ar->adders.spinlock);
  128. return true;
  129. }
  130. static inline void aral_adders_lock(ARAL *ar) {
  131. if(likely(!(ar->config.options & ARAL_LOCKLESS)))
  132. spinlock_lock(&ar->adders.spinlock);
  133. }
  134. static inline void aral_adders_unlock(ARAL *ar) {
  135. if(likely(!(ar->config.options & ARAL_LOCKLESS)))
  136. spinlock_unlock(&ar->adders.spinlock);
  137. }
  138. static void aral_delete_leftover_files(const char *name, const char *path, const char *required_prefix) {
  139. DIR *dir = opendir(path);
  140. if(!dir) return;
  141. char full_path[FILENAME_MAX + 1];
  142. size_t len = strlen(required_prefix);
  143. struct dirent *de = NULL;
  144. while((de = readdir(dir))) {
  145. if(de->d_type == DT_DIR)
  146. continue;
  147. if(strncmp(de->d_name, required_prefix, len) != 0)
  148. continue;
  149. snprintfz(full_path, FILENAME_MAX, "%s/%s", path, de->d_name);
  150. netdata_log_info("ARAL: '%s' removing left-over file '%s'", name, full_path);
  151. if(unlikely(unlink(full_path) == -1))
  152. netdata_log_error("ARAL: '%s' cannot delete file '%s'", name, full_path);
  153. }
  154. closedir(dir);
  155. }
  156. // ----------------------------------------------------------------------------
  157. // check a free slot
  158. #ifdef NETDATA_INTERNAL_CHECKS
  159. static inline void aral_free_validate_internal_check(ARAL *ar, ARAL_FREE *fr) {
  160. if(unlikely(fr->size < ar->config.element_size))
  161. fatal("ARAL: '%s' free item of size %zu, less than the expected element size %zu",
  162. ar->config.name, fr->size, ar->config.element_size);
  163. if(unlikely(fr->size % ar->config.element_size))
  164. fatal("ARAL: '%s' free item of size %zu is not multiple to element size %zu",
  165. ar->config.name, fr->size, ar->config.element_size);
  166. }
  167. #else
  168. #define aral_free_validate_internal_check(ar, fr) debug_dummy()
  169. #endif
  170. // ----------------------------------------------------------------------------
  171. // find the page a pointer belongs to
  172. #ifdef NETDATA_INTERNAL_CHECKS
  173. static inline ARAL_PAGE *find_page_with_allocation_internal_check(ARAL *ar, void *ptr) {
  174. aral_lock(ar);
  175. uintptr_t seeking = (uintptr_t)ptr;
  176. ARAL_PAGE *page;
  177. for(page = ar->aral_lock.pages; page ; page = page->aral_lock.next) {
  178. if(unlikely(seeking >= (uintptr_t)page->data && seeking < (uintptr_t)page->data + page->size))
  179. break;
  180. }
  181. aral_unlock(ar);
  182. return page;
  183. }
  184. #endif
  185. // ----------------------------------------------------------------------------
  186. // find a page with a free slot (there shouldn't be any)
  187. #ifdef NETDATA_ARAL_INTERNAL_CHECKS
  188. static inline ARAL_PAGE *find_page_with_free_slots_internal_check___with_aral_lock(ARAL *ar) {
  189. ARAL_PAGE *page;
  190. for(page = ar->aral_lock.pages; page ; page = page->next) {
  191. if(page->aral_lock.free_elements)
  192. break;
  193. internal_fatal(page->size - page->aral_lock.used_elements * ar->config.element_size >= ar->config.element_size,
  194. "ARAL: '%s' a page is marked full, but it is not!", ar->config.name);
  195. internal_fatal(page->size < page->aral_lock.used_elements * ar->config.element_size,
  196. "ARAL: '%s' a page has been overflown!", ar->config.name);
  197. }
  198. return page;
  199. }
  200. #endif
  201. size_t aral_next_allocation_size___adders_lock_needed(ARAL *ar) {
  202. size_t size = ar->adders.allocation_size;
  203. if(size > ar->config.max_allocation_size)
  204. size = ar->config.max_allocation_size;
  205. else
  206. ar->adders.allocation_size = aral_align_alloc_size(ar, (uint64_t)ar->adders.allocation_size * 2);
  207. return size;
  208. }
  209. static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
  210. ARAL_PAGE *page = callocz(1, sizeof(ARAL_PAGE));
  211. spinlock_init(&page->free.spinlock);
  212. page->size = size;
  213. page->max_elements = page->size / ar->config.element_size;
  214. page->aral_lock.free_elements = page->max_elements;
  215. page->free_elements_to_move_first = page->max_elements / 4;
  216. if(unlikely(page->free_elements_to_move_first < 1))
  217. page->free_elements_to_move_first = 1;
  218. __atomic_add_fetch(&ar->stats->structures.allocations, 1, __ATOMIC_RELAXED);
  219. __atomic_add_fetch(&ar->stats->structures.allocated_bytes, sizeof(ARAL_PAGE), __ATOMIC_RELAXED);
  220. if(unlikely(ar->config.mmap.enabled)) {
  221. ar->aral_lock.file_number++;
  222. char filename[FILENAME_MAX + 1];
  223. snprintfz(filename, FILENAME_MAX, "%s/array_alloc.mmap/%s.%zu", *ar->config.mmap.cache_dir, ar->config.mmap.filename, ar->aral_lock.file_number);
  224. page->filename = strdupz(filename);
  225. page->data = netdata_mmap(page->filename, page->size, MAP_SHARED, 0, false, NULL);
  226. if (unlikely(!page->data))
  227. fatal("ARAL: '%s' cannot allocate aral buffer of size %zu on filename '%s'",
  228. ar->config.name, page->size, page->filename);
  229. __atomic_add_fetch(&ar->stats->mmap.allocations, 1, __ATOMIC_RELAXED);
  230. __atomic_add_fetch(&ar->stats->mmap.allocated_bytes, page->size, __ATOMIC_RELAXED);
  231. }
  232. else {
  233. #ifdef NETDATA_TRACE_ALLOCATIONS
  234. page->data = mallocz_int(page->size TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
  235. #else
  236. page->data = mallocz(page->size);
  237. #endif
  238. __atomic_add_fetch(&ar->stats->malloc.allocations, 1, __ATOMIC_RELAXED);
  239. __atomic_add_fetch(&ar->stats->malloc.allocated_bytes, page->size, __ATOMIC_RELAXED);
  240. }
  241. // link the free space to its page
  242. ARAL_FREE *fr = (ARAL_FREE *)page->data;
  243. fr->size = page->size;
  244. fr->next = NULL;
  245. page->free.list = fr;
  246. aral_free_validate_internal_check(ar, fr);
  247. return page;
  248. }
  249. void aral_del_page___no_lock_needed(ARAL *ar, ARAL_PAGE *page TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
  250. // free it
  251. if (ar->config.mmap.enabled) {
  252. netdata_munmap(page->data, page->size);
  253. if (unlikely(unlink(page->filename) == 1))
  254. netdata_log_error("Cannot delete file '%s'", page->filename);
  255. freez((void *)page->filename);
  256. __atomic_sub_fetch(&ar->stats->mmap.allocations, 1, __ATOMIC_RELAXED);
  257. __atomic_sub_fetch(&ar->stats->mmap.allocated_bytes, page->size, __ATOMIC_RELAXED);
  258. }
  259. else {
  260. #ifdef NETDATA_TRACE_ALLOCATIONS
  261. freez_int(page->data TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
  262. #else
  263. freez(page->data);
  264. #endif
  265. __atomic_sub_fetch(&ar->stats->malloc.allocations, 1, __ATOMIC_RELAXED);
  266. __atomic_sub_fetch(&ar->stats->malloc.allocated_bytes, page->size, __ATOMIC_RELAXED);
  267. }
  268. freez(page);
  269. __atomic_sub_fetch(&ar->stats->structures.allocations, 1, __ATOMIC_RELAXED);
  270. __atomic_sub_fetch(&ar->stats->structures.allocated_bytes, sizeof(ARAL_PAGE), __ATOMIC_RELAXED);
  271. }
  272. static inline void aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ARAL *ar, ARAL_PAGE *page) {
  273. ARAL_PAGE *first = ar->aral_lock.pages;
  274. if (page->aral_lock.free_elements <= page->free_elements_to_move_first ||
  275. !first ||
  276. !first->aral_lock.free_elements ||
  277. page->aral_lock.free_elements <= first->aral_lock.free_elements + ARAL_FREE_PAGES_DELTA_TO_REARRANGE_LIST) {
  278. // first position
  279. DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  280. }
  281. else {
  282. ARAL_PAGE *second = first->aral_lock.next;
  283. if (!second ||
  284. !second->aral_lock.free_elements ||
  285. page->aral_lock.free_elements <= second->aral_lock.free_elements)
  286. // second position
  287. DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, first, page, aral_lock.prev, aral_lock.next);
  288. else
  289. // third position
  290. DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, second, page, aral_lock.prev, aral_lock.next);
  291. }
  292. }
  293. static inline ARAL_PAGE *aral_acquire_a_free_slot(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
  294. __atomic_add_fetch(&ar->atomic.allocators, 1, __ATOMIC_RELAXED);
  295. aral_lock(ar);
  296. ARAL_PAGE *page = ar->aral_lock.pages;
  297. while(!page || !page->aral_lock.free_elements) {
  298. #ifdef NETDATA_ARAL_INTERNAL_CHECKS
  299. internal_fatal(find_page_with_free_slots_internal_check___with_aral_lock(ar), "ARAL: '%s' found page with free slot!", ar->config.name);
  300. #endif
  301. aral_unlock(ar);
  302. if(aral_adders_trylock(ar)) {
  303. if(ar->adders.allocating_elements < __atomic_load_n(&ar->atomic.allocators, __ATOMIC_RELAXED)) {
  304. size_t size = aral_next_allocation_size___adders_lock_needed(ar);
  305. ar->adders.allocating_elements += size / ar->config.element_size;
  306. aral_adders_unlock(ar);
  307. page = aral_create_page___no_lock_needed(ar, size TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
  308. aral_lock(ar);
  309. aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ar, page);
  310. aral_adders_lock(ar);
  311. ar->adders.allocating_elements -= size / ar->config.element_size;
  312. aral_adders_unlock(ar);
  313. // we have a page that is all empty
  314. // and only aral_lock() is held, so
  315. // break the loop
  316. break;
  317. }
  318. aral_adders_unlock(ar);
  319. }
  320. aral_lock(ar);
  321. page = ar->aral_lock.pages;
  322. }
  323. __atomic_sub_fetch(&ar->atomic.allocators, 1, __ATOMIC_RELAXED);
  324. // we have a page
  325. // and aral locked
  326. {
  327. ARAL_PAGE *first = ar->aral_lock.pages;
  328. ARAL_PAGE *second = first->aral_lock.next;
  329. if (!second ||
  330. !second->aral_lock.free_elements ||
  331. first->aral_lock.free_elements <= second->aral_lock.free_elements + ARAL_FREE_PAGES_DELTA_TO_REARRANGE_LIST)
  332. page = first;
  333. else {
  334. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, second, aral_lock.prev, aral_lock.next);
  335. DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, second, aral_lock.prev, aral_lock.next);
  336. page = second;
  337. }
  338. }
  339. internal_fatal(!page || !page->aral_lock.free_elements,
  340. "ARAL: '%s' selected page does not have a free slot in it",
  341. ar->config.name);
  342. internal_fatal(page->max_elements != page->aral_lock.used_elements + page->aral_lock.free_elements,
  343. "ARAL: '%s' page element counters do not match, "
  344. "page says it can handle %zu elements, "
  345. "but there are %zu used and %zu free items, "
  346. "total %zu items",
  347. ar->config.name,
  348. (size_t)page->max_elements,
  349. (size_t)page->aral_lock.used_elements, (size_t)page->aral_lock.free_elements,
  350. (size_t)page->aral_lock.used_elements + (size_t)page->aral_lock.free_elements
  351. );
  352. ar->aral_lock.user_malloc_operations++;
  353. // acquire a slot for the caller
  354. page->aral_lock.used_elements++;
  355. if(--page->aral_lock.free_elements == 0) {
  356. // we are done with this page
  357. // move the full page last
  358. // so that pages with free items remain first in the list
  359. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  360. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  361. }
  362. aral_unlock(ar);
  363. return page;
  364. }
  365. void *aral_mallocz_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
  366. #ifdef FSANITIZE_ADDRESS
  367. return mallocz(ar->config.requested_element_size);
  368. #endif
  369. ARAL_PAGE *page = aral_acquire_a_free_slot(ar TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
  370. aral_page_free_lock(ar, page);
  371. internal_fatal(!page->free.list,
  372. "ARAL: '%s' free item to use, cannot be NULL.", ar->config.name);
  373. internal_fatal(page->free.list->size < ar->config.element_size,
  374. "ARAL: '%s' free item size %zu, cannot be smaller than %zu",
  375. ar->config.name, page->free.list->size, ar->config.element_size);
  376. ARAL_FREE *found_fr = page->free.list;
  377. // check if the remaining size (after we use this slot) is not enough for another element
  378. if(unlikely(found_fr->size - ar->config.element_size < ar->config.element_size)) {
  379. // we can use the entire free space entry
  380. page->free.list = found_fr->next;
  381. }
  382. else {
  383. // we can split the free space entry
  384. uint8_t *data = (uint8_t *)found_fr;
  385. ARAL_FREE *fr = (ARAL_FREE *)&data[ar->config.element_size];
  386. fr->size = found_fr->size - ar->config.element_size;
  387. // link the free slot first in the page
  388. fr->next = found_fr->next;
  389. page->free.list = fr;
  390. aral_free_validate_internal_check(ar, fr);
  391. }
  392. aral_page_free_unlock(ar, page);
  393. // put the page pointer after the element
  394. uint8_t *data = (uint8_t *)found_fr;
  395. ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->config.page_ptr_offset];
  396. *page_ptr = page;
  397. if(unlikely(ar->config.mmap.enabled))
  398. __atomic_add_fetch(&ar->stats->mmap.used_bytes, ar->config.element_size, __ATOMIC_RELAXED);
  399. else
  400. __atomic_add_fetch(&ar->stats->malloc.used_bytes, ar->config.element_size, __ATOMIC_RELAXED);
  401. return (void *)found_fr;
  402. }
  403. static inline ARAL_PAGE *aral_ptr_to_page___must_NOT_have_aral_lock(ARAL *ar, void *ptr) {
  404. // given a data pointer we returned before,
  405. // find the ARAL_PAGE it belongs to
  406. uint8_t *data = (uint8_t *)ptr;
  407. ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->config.page_ptr_offset];
  408. ARAL_PAGE *page = *page_ptr;
  409. #ifdef NETDATA_INTERNAL_CHECKS
  410. // make it NULL so that we will fail on double free
  411. // do not enable this on production, because the MMAP file
  412. // will need to be saved again!
  413. *page_ptr = NULL;
  414. #endif
  415. #ifdef NETDATA_ARAL_INTERNAL_CHECKS
  416. {
  417. // find the page ptr belongs
  418. ARAL_PAGE *page2 = find_page_with_allocation_internal_check(ar, ptr);
  419. internal_fatal(page != page2,
  420. "ARAL: '%s' page pointers do not match!",
  421. ar->name);
  422. internal_fatal(!page2,
  423. "ARAL: '%s' free of pointer %p is not in ARAL address space.",
  424. ar->name, ptr);
  425. }
  426. #endif
  427. internal_fatal(!page,
  428. "ARAL: '%s' possible corruption or double free of pointer %p",
  429. ar->config.name, ptr);
  430. return page;
  431. }
  432. static void aral_defrag_sorted_page_position___aral_lock_needed(ARAL *ar, ARAL_PAGE *page) {
  433. ARAL_PAGE *tmp;
  434. int action = 0; (void)action;
  435. size_t move_later = 0, move_earlier = 0;
  436. for(tmp = page->aral_lock.next ;
  437. tmp && tmp->aral_lock.free_elements && tmp->aral_lock.free_elements < page->aral_lock.free_elements ;
  438. tmp = tmp->aral_lock.next)
  439. move_later++;
  440. if(!tmp && page->aral_lock.next) {
  441. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  442. DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  443. action = 1;
  444. }
  445. else if(tmp != page->aral_lock.next) {
  446. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  447. DOUBLE_LINKED_LIST_INSERT_ITEM_BEFORE_UNSAFE(ar->aral_lock.pages, tmp, page, aral_lock.prev, aral_lock.next);
  448. action = 2;
  449. }
  450. else {
  451. for(tmp = (page == ar->aral_lock.pages) ? NULL : page->aral_lock.prev ;
  452. tmp && (!tmp->aral_lock.free_elements || tmp->aral_lock.free_elements > page->aral_lock.free_elements);
  453. tmp = (tmp == ar->aral_lock.pages) ? NULL : tmp->aral_lock.prev)
  454. move_earlier++;
  455. if(!tmp) {
  456. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  457. DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  458. action = 3;
  459. }
  460. else if(tmp != page->aral_lock.prev){
  461. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  462. DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, tmp, page, aral_lock.prev, aral_lock.next);
  463. action = 4;
  464. }
  465. }
  466. ar->aral_lock.defragment_operations++;
  467. ar->aral_lock.defragment_linked_list_traversals += move_earlier + move_later;
  468. internal_fatal(page->aral_lock.next && page->aral_lock.next->aral_lock.free_elements && page->aral_lock.next->aral_lock.free_elements < page->aral_lock.free_elements,
  469. "ARAL: '%s' item should be later in the list", ar->config.name);
  470. internal_fatal(page != ar->aral_lock.pages && (!page->aral_lock.prev->aral_lock.free_elements || page->aral_lock.prev->aral_lock.free_elements > page->aral_lock.free_elements),
  471. "ARAL: '%s' item should be earlier in the list", ar->config.name);
  472. }
  473. static inline void aral_move_page_with_free_list___aral_lock_needed(ARAL *ar, ARAL_PAGE *page) {
  474. if(unlikely(page == ar->aral_lock.pages))
  475. // we are the first already
  476. return;
  477. if(likely(!(ar->config.options & ARAL_DEFRAGMENT))) {
  478. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  479. aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ar, page);
  480. }
  481. else
  482. aral_defrag_sorted_page_position___aral_lock_needed(ar, page);
  483. }
  484. void aral_freez_internal(ARAL *ar, void *ptr TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
  485. #ifdef FSANITIZE_ADDRESS
  486. freez(ptr);
  487. return;
  488. #endif
  489. if(unlikely(!ptr)) return;
  490. // get the page pointer
  491. ARAL_PAGE *page = aral_ptr_to_page___must_NOT_have_aral_lock(ar, ptr);
  492. if(unlikely(ar->config.mmap.enabled))
  493. __atomic_sub_fetch(&ar->stats->mmap.used_bytes, ar->config.element_size, __ATOMIC_RELAXED);
  494. else
  495. __atomic_sub_fetch(&ar->stats->malloc.used_bytes, ar->config.element_size, __ATOMIC_RELAXED);
  496. // make this element available
  497. ARAL_FREE *fr = (ARAL_FREE *)ptr;
  498. fr->size = ar->config.element_size;
  499. aral_page_free_lock(ar, page);
  500. fr->next = page->free.list;
  501. page->free.list = fr;
  502. aral_page_free_unlock(ar, page);
  503. aral_lock(ar);
  504. internal_fatal(!page->aral_lock.used_elements,
  505. "ARAL: '%s' pointer %p is inside a page without any active allocations.",
  506. ar->config.name, ptr);
  507. internal_fatal(page->max_elements != page->aral_lock.used_elements + page->aral_lock.free_elements,
  508. "ARAL: '%s' page element counters do not match, "
  509. "page says it can handle %zu elements, "
  510. "but there are %zu used and %zu free items, "
  511. "total %zu items",
  512. ar->config.name,
  513. (size_t)page->max_elements,
  514. (size_t)page->aral_lock.used_elements, (size_t)page->aral_lock.free_elements,
  515. (size_t)page->aral_lock.used_elements + (size_t)page->aral_lock.free_elements
  516. );
  517. page->aral_lock.used_elements--;
  518. page->aral_lock.free_elements++;
  519. ar->aral_lock.user_free_operations++;
  520. // if the page is empty, release it
  521. if(unlikely(!page->aral_lock.used_elements)) {
  522. bool is_this_page_the_last_one = ar->aral_lock.pages == page && !page->aral_lock.next;
  523. if(!is_this_page_the_last_one)
  524. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  525. aral_unlock(ar);
  526. if(!is_this_page_the_last_one)
  527. aral_del_page___no_lock_needed(ar, page TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
  528. }
  529. else {
  530. aral_move_page_with_free_list___aral_lock_needed(ar, page);
  531. aral_unlock(ar);
  532. }
  533. }
  534. void aral_destroy_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
  535. aral_lock(ar);
  536. ARAL_PAGE *page;
  537. while((page = ar->aral_lock.pages)) {
  538. DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next);
  539. aral_del_page___no_lock_needed(ar, page TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
  540. }
  541. aral_unlock(ar);
  542. if(ar->config.options & ARAL_ALLOCATED_STATS)
  543. freez(ar->stats);
  544. freez(ar);
  545. }
  546. size_t aral_element_size(ARAL *ar) {
  547. return ar->config.requested_element_size;
  548. }
  549. ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_elements, size_t max_page_size,
  550. struct aral_statistics *stats, const char *filename, char **cache_dir, bool mmap, bool lockless) {
  551. ARAL *ar = callocz(1, sizeof(ARAL));
  552. ar->config.options = (lockless) ? ARAL_LOCKLESS : 0;
  553. ar->config.requested_element_size = element_size;
  554. ar->config.initial_page_elements = initial_page_elements;
  555. ar->config.requested_max_page_size = max_page_size;
  556. ar->config.mmap.filename = filename;
  557. ar->config.mmap.cache_dir = cache_dir;
  558. ar->config.mmap.enabled = mmap;
  559. strncpyz(ar->config.name, name, ARAL_MAX_NAME);
  560. spinlock_init(&ar->aral_lock.spinlock);
  561. if(stats) {
  562. ar->stats = stats;
  563. ar->config.options &= ~ARAL_ALLOCATED_STATS;
  564. }
  565. else {
  566. ar->stats = callocz(1, sizeof(struct aral_statistics));
  567. ar->config.options |= ARAL_ALLOCATED_STATS;
  568. }
  569. long int page_size = sysconf(_SC_PAGE_SIZE);
  570. if (unlikely(page_size == -1))
  571. ar->config.natural_page_size = 4096;
  572. else
  573. ar->config.natural_page_size = page_size;
  574. // we need to add a page pointer after the element
  575. // so, first align the element size to the pointer size
  576. ar->config.element_size = natural_alignment(ar->config.requested_element_size, sizeof(uintptr_t));
  577. // then add the size of a pointer to it
  578. ar->config.element_size += sizeof(uintptr_t);
  579. // make sure it is at least what we need for an ARAL_FREE slot
  580. if (ar->config.element_size < sizeof(ARAL_FREE))
  581. ar->config.element_size = sizeof(ARAL_FREE);
  582. // and finally align it to the natural alignment
  583. ar->config.element_size = natural_alignment(ar->config.element_size, ARAL_NATURAL_ALIGNMENT);
  584. ar->config.max_page_elements = ar->config.requested_max_page_size / ar->config.element_size;
  585. // we write the page pointer just after each element
  586. ar->config.page_ptr_offset = ar->config.element_size - sizeof(uintptr_t);
  587. if(ar->config.requested_element_size + sizeof(uintptr_t) > ar->config.element_size)
  588. fatal("ARAL: '%s' failed to calculate properly page_ptr_offset: "
  589. "element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, "
  590. "final element size %zu, page_ptr_offset %zu",
  591. ar->config.name, ar->config.requested_element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT,
  592. ar->config.element_size, ar->config.page_ptr_offset);
  593. //netdata_log_info("ARAL: element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, final element size %zu, page_ptr_offset %zu",
  594. // ar->element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT, ar->internal.element_size, ar->internal.page_ptr_offset);
  595. if (ar->config.initial_page_elements < 2)
  596. ar->config.initial_page_elements = 2;
  597. if(ar->config.mmap.enabled && (!ar->config.mmap.cache_dir || !*ar->config.mmap.cache_dir)) {
  598. netdata_log_error("ARAL: '%s' mmap cache directory is not configured properly, disabling mmap.", ar->config.name);
  599. ar->config.mmap.enabled = false;
  600. internal_fatal(true, "ARAL: '%s' mmap cache directory is not configured properly", ar->config.name);
  601. }
  602. uint64_t max_alloc_size;
  603. if(!ar->config.max_page_elements)
  604. max_alloc_size = ar->config.mmap.enabled ? ARAL_MAX_PAGE_SIZE_MMAP : ARAL_MAX_PAGE_SIZE_MALLOC;
  605. else
  606. max_alloc_size = ar->config.max_page_elements * ar->config.element_size;
  607. ar->config.max_allocation_size = aral_align_alloc_size(ar, max_alloc_size);
  608. ar->adders.allocation_size = aral_align_alloc_size(ar, (uint64_t)ar->config.element_size * ar->config.initial_page_elements);
  609. ar->aral_lock.pages = NULL;
  610. ar->aral_lock.file_number = 0;
  611. if(ar->config.mmap.enabled) {
  612. char directory_name[FILENAME_MAX + 1];
  613. snprintfz(directory_name, FILENAME_MAX, "%s/array_alloc.mmap", *ar->config.mmap.cache_dir);
  614. int r = mkdir(directory_name, 0775);
  615. if (r != 0 && errno != EEXIST)
  616. fatal("Cannot create directory '%s'", directory_name);
  617. char file[FILENAME_MAX + 1];
  618. snprintfz(file, FILENAME_MAX, "%s.", ar->config.mmap.filename);
  619. aral_delete_leftover_files(ar->config.name, directory_name, file);
  620. }
  621. internal_error(true,
  622. "ARAL: '%s' "
  623. "element size %zu (requested %zu bytes), "
  624. "min elements per page %zu (requested %zu), "
  625. "max elements per page %zu, "
  626. "max page size %zu bytes (requested %zu) "
  627. , ar->config.name
  628. , ar->config.element_size, ar->config.requested_element_size
  629. , ar->adders.allocation_size / ar->config.element_size, ar->config.initial_page_elements
  630. , ar->config.max_allocation_size / ar->config.element_size
  631. , ar->config.max_allocation_size, ar->config.requested_max_page_size
  632. );
  633. __atomic_add_fetch(&ar->stats->structures.allocations, 1, __ATOMIC_RELAXED);
  634. __atomic_add_fetch(&ar->stats->structures.allocated_bytes, sizeof(ARAL), __ATOMIC_RELAXED);
  635. return ar;
  636. }
  637. // ----------------------------------------------------------------------------
  638. // global aral caching
  639. #define ARAL_BY_SIZE_MAX_SIZE 1024
  640. struct aral_by_size {
  641. ARAL *ar;
  642. int32_t refcount;
  643. };
  644. struct {
  645. struct aral_statistics shared_statistics;
  646. SPINLOCK spinlock;
  647. struct aral_by_size array[ARAL_BY_SIZE_MAX_SIZE + 1];
  648. } aral_by_size_globals = {};
  649. struct aral_statistics *aral_by_size_statistics(void) {
  650. return &aral_by_size_globals.shared_statistics;
  651. }
  652. size_t aral_by_size_structures(void) {
  653. return aral_structures_from_stats(&aral_by_size_globals.shared_statistics);
  654. }
  655. size_t aral_by_size_overhead(void) {
  656. return aral_overhead_from_stats(&aral_by_size_globals.shared_statistics);
  657. }
  658. ARAL *aral_by_size_acquire(size_t size) {
  659. spinlock_lock(&aral_by_size_globals.spinlock);
  660. ARAL *ar = NULL;
  661. if(size <= ARAL_BY_SIZE_MAX_SIZE && aral_by_size_globals.array[size].ar) {
  662. ar = aral_by_size_globals.array[size].ar;
  663. aral_by_size_globals.array[size].refcount++;
  664. internal_fatal(aral_element_size(ar) != size, "DICTIONARY: aral has size %zu but we want %zu",
  665. aral_element_size(ar), size);
  666. }
  667. if(!ar) {
  668. char buf[30 + 1];
  669. snprintf(buf, 30, "size-%zu", size);
  670. ar = aral_create(buf,
  671. size,
  672. 0,
  673. 65536 * ((size / 150) + 1),
  674. &aral_by_size_globals.shared_statistics,
  675. NULL, NULL, false, false);
  676. if(size <= ARAL_BY_SIZE_MAX_SIZE) {
  677. aral_by_size_globals.array[size].ar = ar;
  678. aral_by_size_globals.array[size].refcount = 1;
  679. }
  680. }
  681. spinlock_unlock(&aral_by_size_globals.spinlock);
  682. return ar;
  683. }
  684. void aral_by_size_release(ARAL *ar) {
  685. size_t size = aral_element_size(ar);
  686. if(size <= ARAL_BY_SIZE_MAX_SIZE) {
  687. spinlock_lock(&aral_by_size_globals.spinlock);
  688. internal_fatal(aral_by_size_globals.array[size].ar != ar,
  689. "ARAL BY SIZE: aral pointers do not match");
  690. if(aral_by_size_globals.array[size].refcount <= 0)
  691. fatal("ARAL BY SIZE: double release detected");
  692. aral_by_size_globals.array[size].refcount--;
  693. // if(!aral_by_size_globals.array[size].refcount) {
  694. // aral_destroy(aral_by_size_globals.array[size].ar);
  695. // aral_by_size_globals.array[size].ar = NULL;
  696. // }
  697. spinlock_unlock(&aral_by_size_globals.spinlock);
  698. }
  699. else
  700. aral_destroy(ar);
  701. }
  702. // ----------------------------------------------------------------------------
  703. // unittest
  704. struct aral_unittest_config {
  705. bool single_threaded;
  706. bool stop;
  707. ARAL *ar;
  708. size_t elements;
  709. size_t threads;
  710. int errors;
  711. };
  712. static void *aral_test_thread(void *ptr) {
  713. struct aral_unittest_config *auc = ptr;
  714. ARAL *ar = auc->ar;
  715. size_t elements = auc->elements;
  716. void **pointers = callocz(elements, sizeof(void *));
  717. do {
  718. for (size_t i = 0; i < elements; i++) {
  719. pointers[i] = aral_mallocz(ar);
  720. }
  721. for (size_t div = 5; div >= 2; div--) {
  722. for (size_t i = 0; i < elements / div; i++) {
  723. aral_freez(ar, pointers[i]);
  724. pointers[i] = NULL;
  725. }
  726. for (size_t i = 0; i < elements / div; i++) {
  727. pointers[i] = aral_mallocz(ar);
  728. }
  729. }
  730. for (size_t step = 50; step >= 10; step -= 10) {
  731. for (size_t i = 0; i < elements; i += step) {
  732. aral_freez(ar, pointers[i]);
  733. pointers[i] = NULL;
  734. }
  735. for (size_t i = 0; i < elements; i += step) {
  736. pointers[i] = aral_mallocz(ar);
  737. }
  738. }
  739. for (size_t i = 0; i < elements; i++) {
  740. aral_freez(ar, pointers[i]);
  741. pointers[i] = NULL;
  742. }
  743. if (auc->single_threaded && ar->aral_lock.pages && ar->aral_lock.pages->aral_lock.used_elements) {
  744. fprintf(stderr, "\n\nARAL leftovers detected (1)\n\n");
  745. __atomic_add_fetch(&auc->errors, 1, __ATOMIC_RELAXED);
  746. }
  747. if(!auc->single_threaded && __atomic_load_n(&auc->stop, __ATOMIC_RELAXED))
  748. break;
  749. for (size_t i = 0; i < elements; i++) {
  750. pointers[i] = aral_mallocz(ar);
  751. }
  752. size_t increment = elements / ar->config.max_page_elements;
  753. for (size_t all = increment; all <= elements / 2; all += increment) {
  754. size_t to_free = (all % ar->config.max_page_elements) + 1;
  755. size_t step = elements / to_free;
  756. if(!step) step = 1;
  757. // fprintf(stderr, "all %zu, to free %zu, step %zu\n", all, to_free, step);
  758. size_t free_list[to_free];
  759. for (size_t i = 0; i < to_free; i++) {
  760. size_t pos = step * i;
  761. aral_freez(ar, pointers[pos]);
  762. pointers[pos] = NULL;
  763. free_list[i] = pos;
  764. }
  765. for (size_t i = 0; i < to_free; i++) {
  766. size_t pos = free_list[i];
  767. pointers[pos] = aral_mallocz(ar);
  768. }
  769. }
  770. for (size_t i = 0; i < elements; i++) {
  771. aral_freez(ar, pointers[i]);
  772. pointers[i] = NULL;
  773. }
  774. if (auc->single_threaded && ar->aral_lock.pages && ar->aral_lock.pages->aral_lock.used_elements) {
  775. fprintf(stderr, "\n\nARAL leftovers detected (2)\n\n");
  776. __atomic_add_fetch(&auc->errors, 1, __ATOMIC_RELAXED);
  777. }
  778. } while(!auc->single_threaded && !__atomic_load_n(&auc->stop, __ATOMIC_RELAXED));
  779. freez(pointers);
  780. return ptr;
  781. }
  782. int aral_stress_test(size_t threads, size_t elements, size_t seconds) {
  783. fprintf(stderr, "Running stress test of %zu threads, with %zu elements each, for %zu seconds...\n",
  784. threads, elements, seconds);
  785. struct aral_unittest_config auc = {
  786. .single_threaded = false,
  787. .threads = threads,
  788. .ar = aral_create("aral-stress-test", 20, 0, 8192, NULL, "aral-stress-test", NULL, false, false),
  789. .elements = elements,
  790. .errors = 0,
  791. };
  792. usec_t started_ut = now_monotonic_usec();
  793. netdata_thread_t thread_ptrs[threads];
  794. for(size_t i = 0; i < threads ; i++) {
  795. char tag[NETDATA_THREAD_NAME_MAX + 1];
  796. snprintfz(tag, NETDATA_THREAD_NAME_MAX, "TH[%zu]", i);
  797. netdata_thread_create(&thread_ptrs[i], tag,
  798. NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
  799. aral_test_thread, &auc);
  800. }
  801. size_t malloc_done = 0;
  802. size_t free_done = 0;
  803. size_t countdown = seconds;
  804. while(countdown-- > 0) {
  805. sleep_usec(1 * USEC_PER_SEC);
  806. aral_lock(auc.ar);
  807. size_t m = auc.ar->aral_lock.user_malloc_operations;
  808. size_t f = auc.ar->aral_lock.user_free_operations;
  809. aral_unlock(auc.ar);
  810. fprintf(stderr, "ARAL executes %0.2f M malloc and %0.2f M free operations/s\n",
  811. (double)(m - malloc_done) / 1000000.0, (double)(f - free_done) / 1000000.0);
  812. malloc_done = m;
  813. free_done = f;
  814. }
  815. __atomic_store_n(&auc.stop, true, __ATOMIC_RELAXED);
  816. // fprintf(stderr, "Cancelling the threads...\n");
  817. // for(size_t i = 0; i < threads ; i++) {
  818. // netdata_thread_cancel(thread_ptrs[i]);
  819. // }
  820. fprintf(stderr, "Waiting the threads to finish...\n");
  821. for(size_t i = 0; i < threads ; i++) {
  822. netdata_thread_join(thread_ptrs[i], NULL);
  823. }
  824. usec_t ended_ut = now_monotonic_usec();
  825. if (auc.ar->aral_lock.pages && auc.ar->aral_lock.pages->aral_lock.used_elements) {
  826. fprintf(stderr, "\n\nARAL leftovers detected (3)\n\n");
  827. __atomic_add_fetch(&auc.errors, 1, __ATOMIC_RELAXED);
  828. }
  829. netdata_log_info("ARAL: did %zu malloc, %zu free, "
  830. "using %zu threads, in %llu usecs",
  831. auc.ar->aral_lock.user_malloc_operations,
  832. auc.ar->aral_lock.user_free_operations,
  833. threads,
  834. ended_ut - started_ut);
  835. aral_destroy(auc.ar);
  836. return auc.errors;
  837. }
  838. int aral_unittest(size_t elements) {
  839. char *cache_dir = "/tmp/";
  840. struct aral_unittest_config auc = {
  841. .single_threaded = true,
  842. .threads = 1,
  843. .ar = aral_create("aral-test", 20, 0, 8192, NULL, "aral-test", &cache_dir, false, false),
  844. .elements = elements,
  845. .errors = 0,
  846. };
  847. aral_test_thread(&auc);
  848. aral_destroy(auc.ar);
  849. int errors = aral_stress_test(2, elements, 5);
  850. return auc.errors + errors;
  851. }