allocator_sba.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. /**
  2. * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3. * SPDX-License-Identifier: Apache-2.0.
  4. */
  5. #include <aws/common/allocator.h>
  6. #include <aws/common/array_list.h>
  7. #include <aws/common/assert.h>
  8. #include <aws/common/macros.h>
  9. #include <aws/common/mutex.h>
  10. /*
  11. * Small Block Allocator
  12. * This is a fairly standard approach, the idea is to always allocate aligned pages of memory so that for
  13. * any address you can round to the nearest page boundary to find the bookkeeping data. The idea is to reduce
  14. * overhead per alloc and greatly improve runtime speed by doing as little actual allocation work as possible,
  15. * preferring instead to re-use (hopefully still cached) chunks in FIFO order, or chunking up a page if there's
  16. * no free chunks. When all chunks in a page are freed, the page is returned to the OS.
  17. *
  18. * The allocator itself is simply an array of bins, each representing a power of 2 size from 32 - N (512 tends to be
  19. * a good upper bound). Thread safety is guaranteed by a mutex per bin, and locks are only necessary around the
  20. * lowest level alloc and free operations.
  21. *
  22. * Note: this allocator gets its internal memory for data structures from the parent allocator, but does not
  23. * use the parent to allocate pages. Pages are allocated directly from the OS-specific aligned malloc implementation,
  24. * which allows the OS to do address re-mapping for us instead of over-allocating to fulfill alignment.
  25. */
  26. #ifdef _WIN32
  27. # include <malloc.h>
  28. #elif __linux__ || __APPLE__
  29. # include <stdlib.h>
  30. #endif
  31. #if !defined(AWS_SBA_PAGE_SIZE)
  32. # if defined(PAGE_SIZE)
  33. # define AWS_SBA_PAGE_SIZE ((uintptr_t)(PAGE_SIZE))
  34. # else
  35. # define AWS_SBA_PAGE_SIZE ((uintptr_t)(4096))
  36. # endif
  37. #endif
  38. #define AWS_SBA_PAGE_MASK ((uintptr_t) ~(AWS_SBA_PAGE_SIZE - 1))
  39. #define AWS_SBA_TAG_VALUE 0x736f6d6570736575ULL
  40. /* list of sizes of bins, must be powers of 2, and less than AWS_SBA_PAGE_SIZE * 0.5 */
  41. #define AWS_SBA_BIN_COUNT 5
  42. static const size_t s_bin_sizes[AWS_SBA_BIN_COUNT] = {32, 64, 128, 256, 512};
  43. static const size_t s_max_bin_size = 512;
  44. struct sba_bin {
  45. size_t size; /* size of allocs in this bin */
  46. struct aws_mutex mutex; /* lock protecting this bin */
  47. uint8_t *page_cursor; /* pointer to working page, currently being chunked from */
  48. struct aws_array_list active_pages; /* all pages in use by this bin, could be optimized at scale by being a set */
  49. struct aws_array_list free_chunks; /* free chunks available in this bin */
  50. };
  51. /* Header stored at the base of each page.
  52. * As long as this is under 32 bytes, all is well.
  53. * Above that, there's potentially more waste per page */
  54. struct page_header {
  55. uint64_t tag; /* marker to identify/validate pages */
  56. struct sba_bin *bin; /* bin this page belongs to */
  57. uint32_t alloc_count; /* number of outstanding allocs from this page */
  58. uint64_t tag2;
  59. };
  60. /* This is the impl for the aws_allocator */
  61. struct small_block_allocator {
  62. struct aws_allocator *allocator; /* parent allocator, for large allocs */
  63. struct sba_bin bins[AWS_SBA_BIN_COUNT];
  64. int (*lock)(struct aws_mutex *);
  65. int (*unlock)(struct aws_mutex *);
  66. };
  67. static int s_null_lock(struct aws_mutex *mutex) {
  68. (void)mutex;
  69. /* NO OP */
  70. return 0;
  71. }
  72. static int s_null_unlock(struct aws_mutex *mutex) {
  73. (void)mutex;
  74. /* NO OP */
  75. return 0;
  76. }
  77. static int s_mutex_lock(struct aws_mutex *mutex) {
  78. return aws_mutex_lock(mutex);
  79. }
  80. static int s_mutex_unlock(struct aws_mutex *mutex) {
  81. return aws_mutex_unlock(mutex);
  82. }
  83. static void *s_page_base(const void *addr) {
  84. /* mask off the address to round it to page alignment */
  85. uint8_t *page_base = (uint8_t *)(((uintptr_t)addr) & AWS_SBA_PAGE_MASK);
  86. return page_base;
  87. }
  88. static void *s_page_bind(void *addr, struct sba_bin *bin) {
  89. /* insert the header at the base of the page and advance past it */
  90. struct page_header *page = (struct page_header *)addr;
  91. page->tag = page->tag2 = AWS_SBA_TAG_VALUE;
  92. page->bin = bin;
  93. page->alloc_count = 0;
  94. return (uint8_t *)addr + sizeof(struct page_header);
  95. }
  96. /* Wraps OS-specific aligned malloc implementation */
  97. static void *s_aligned_alloc(size_t size, size_t align) {
  98. #ifdef _WIN32
  99. return _aligned_malloc(size, align);
  100. #else
  101. void *mem = NULL;
  102. int return_code = posix_memalign(&mem, align, size);
  103. if (return_code) {
  104. aws_raise_error(AWS_ERROR_OOM);
  105. return NULL;
  106. }
  107. return mem;
  108. #endif
  109. }
  110. /* wraps OS-specific aligned free implementation */
  111. static void s_aligned_free(void *addr) {
  112. #ifdef _WIN32
  113. _aligned_free(addr);
  114. #else
  115. free(addr);
  116. #endif
  117. }
  118. /* aws_allocator vtable template */
  119. static void *s_sba_mem_acquire(struct aws_allocator *allocator, size_t size);
  120. static void s_sba_mem_release(struct aws_allocator *allocator, void *ptr);
  121. static void *s_sba_mem_realloc(struct aws_allocator *allocator, void *old_ptr, size_t old_size, size_t new_size);
  122. static void *s_sba_mem_calloc(struct aws_allocator *allocator, size_t num, size_t size);
  123. static struct aws_allocator s_sba_allocator = {
  124. .mem_acquire = s_sba_mem_acquire,
  125. .mem_release = s_sba_mem_release,
  126. .mem_realloc = s_sba_mem_realloc,
  127. .mem_calloc = s_sba_mem_calloc,
  128. };
  129. static int s_sba_init(struct small_block_allocator *sba, struct aws_allocator *allocator, bool multi_threaded) {
  130. sba->allocator = allocator;
  131. AWS_ZERO_ARRAY(sba->bins);
  132. sba->lock = multi_threaded ? s_mutex_lock : s_null_lock;
  133. sba->unlock = multi_threaded ? s_mutex_unlock : s_null_unlock;
  134. for (unsigned idx = 0; idx < AWS_SBA_BIN_COUNT; ++idx) {
  135. struct sba_bin *bin = &sba->bins[idx];
  136. bin->size = s_bin_sizes[idx];
  137. if (multi_threaded && aws_mutex_init(&bin->mutex)) {
  138. goto cleanup;
  139. }
  140. if (aws_array_list_init_dynamic(&bin->active_pages, sba->allocator, 16, sizeof(void *))) {
  141. goto cleanup;
  142. }
  143. /* start with enough chunks for 1 page */
  144. if (aws_array_list_init_dynamic(
  145. &bin->free_chunks, sba->allocator, aws_max_size(AWS_SBA_PAGE_SIZE / bin->size, 16), sizeof(void *))) {
  146. goto cleanup;
  147. }
  148. }
  149. return AWS_OP_SUCCESS;
  150. cleanup:
  151. for (unsigned idx = 0; idx < AWS_SBA_BIN_COUNT; ++idx) {
  152. struct sba_bin *bin = &sba->bins[idx];
  153. aws_mutex_clean_up(&bin->mutex);
  154. aws_array_list_clean_up(&bin->active_pages);
  155. aws_array_list_clean_up(&bin->free_chunks);
  156. }
  157. return AWS_OP_ERR;
  158. }
  159. static void s_sba_clean_up(struct small_block_allocator *sba) {
  160. /* free all known pages, then free the working page */
  161. for (unsigned idx = 0; idx < AWS_SBA_BIN_COUNT; ++idx) {
  162. struct sba_bin *bin = &sba->bins[idx];
  163. for (size_t page_idx = 0; page_idx < bin->active_pages.length; ++page_idx) {
  164. void *page_addr = NULL;
  165. aws_array_list_get_at(&bin->active_pages, &page_addr, page_idx);
  166. struct page_header *page = page_addr;
  167. AWS_ASSERT(page->alloc_count == 0 && "Memory still allocated in aws_sba_allocator (bin)");
  168. s_aligned_free(page);
  169. }
  170. if (bin->page_cursor) {
  171. void *page_addr = s_page_base(bin->page_cursor);
  172. struct page_header *page = page_addr;
  173. AWS_ASSERT(page->alloc_count == 0 && "Memory still allocated in aws_sba_allocator (page)");
  174. s_aligned_free(page);
  175. }
  176. aws_array_list_clean_up(&bin->active_pages);
  177. aws_array_list_clean_up(&bin->free_chunks);
  178. aws_mutex_clean_up(&bin->mutex);
  179. }
  180. }
  181. struct aws_allocator *aws_small_block_allocator_new(struct aws_allocator *allocator, bool multi_threaded) {
  182. struct small_block_allocator *sba = NULL;
  183. struct aws_allocator *sba_allocator = NULL;
  184. aws_mem_acquire_many(
  185. allocator, 2, &sba, sizeof(struct small_block_allocator), &sba_allocator, sizeof(struct aws_allocator));
  186. if (!sba || !sba_allocator) {
  187. return NULL;
  188. }
  189. AWS_ZERO_STRUCT(*sba);
  190. AWS_ZERO_STRUCT(*sba_allocator);
  191. /* copy the template vtable */
  192. *sba_allocator = s_sba_allocator;
  193. sba_allocator->impl = sba;
  194. if (s_sba_init(sba, allocator, multi_threaded)) {
  195. s_sba_clean_up(sba);
  196. aws_mem_release(allocator, sba);
  197. return NULL;
  198. }
  199. return sba_allocator;
  200. }
  201. void aws_small_block_allocator_destroy(struct aws_allocator *sba_allocator) {
  202. if (!sba_allocator) {
  203. return;
  204. }
  205. struct small_block_allocator *sba = sba_allocator->impl;
  206. if (!sba) {
  207. return;
  208. }
  209. struct aws_allocator *allocator = sba->allocator;
  210. s_sba_clean_up(sba);
  211. aws_mem_release(allocator, sba);
  212. }
  213. size_t aws_small_block_allocator_bytes_active(struct aws_allocator *sba_allocator) {
  214. AWS_FATAL_ASSERT(sba_allocator && "aws_small_block_allocator_bytes_used requires a non-null allocator");
  215. struct small_block_allocator *sba = sba_allocator->impl;
  216. AWS_FATAL_ASSERT(sba && "aws_small_block_allocator_bytes_used: supplied allocator has invalid SBA impl");
  217. size_t used = 0;
  218. for (unsigned idx = 0; idx < AWS_SBA_BIN_COUNT; ++idx) {
  219. struct sba_bin *bin = &sba->bins[idx];
  220. sba->lock(&bin->mutex);
  221. for (size_t page_idx = 0; page_idx < bin->active_pages.length; ++page_idx) {
  222. void *page_addr = NULL;
  223. aws_array_list_get_at(&bin->active_pages, &page_addr, page_idx);
  224. struct page_header *page = page_addr;
  225. used += page->alloc_count * bin->size;
  226. }
  227. if (bin->page_cursor) {
  228. void *page_addr = s_page_base(bin->page_cursor);
  229. struct page_header *page = page_addr;
  230. used += page->alloc_count * bin->size;
  231. }
  232. sba->unlock(&bin->mutex);
  233. }
  234. return used;
  235. }
  236. size_t aws_small_block_allocator_bytes_reserved(struct aws_allocator *sba_allocator) {
  237. AWS_FATAL_ASSERT(sba_allocator && "aws_small_block_allocator_bytes_used requires a non-null allocator");
  238. struct small_block_allocator *sba = sba_allocator->impl;
  239. AWS_FATAL_ASSERT(sba && "aws_small_block_allocator_bytes_used: supplied allocator has invalid SBA impl");
  240. size_t used = 0;
  241. for (unsigned idx = 0; idx < AWS_SBA_BIN_COUNT; ++idx) {
  242. struct sba_bin *bin = &sba->bins[idx];
  243. sba->lock(&bin->mutex);
  244. used += (bin->active_pages.length + (bin->page_cursor != NULL)) * AWS_SBA_PAGE_SIZE;
  245. sba->unlock(&bin->mutex);
  246. }
  247. return used;
  248. }
  249. size_t aws_small_block_allocator_page_size(struct aws_allocator *sba_allocator) {
  250. (void)sba_allocator;
  251. return AWS_SBA_PAGE_SIZE;
  252. }
  253. size_t aws_small_block_allocator_page_size_available(struct aws_allocator *sba_allocator) {
  254. (void)sba_allocator;
  255. return AWS_SBA_PAGE_SIZE - sizeof(struct page_header);
  256. }
  257. /* NOTE: Expects the mutex to be held by the caller */
  258. static void *s_sba_alloc_from_bin(struct sba_bin *bin) {
  259. /* check the free list, hand chunks out in FIFO order */
  260. if (bin->free_chunks.length > 0) {
  261. void *chunk = NULL;
  262. if (aws_array_list_back(&bin->free_chunks, &chunk)) {
  263. return NULL;
  264. }
  265. if (aws_array_list_pop_back(&bin->free_chunks)) {
  266. return NULL;
  267. }
  268. AWS_ASSERT(chunk);
  269. struct page_header *page = s_page_base(chunk);
  270. page->alloc_count++;
  271. return chunk;
  272. }
  273. /* If there is a working page to chunk from, use it */
  274. if (bin->page_cursor) {
  275. struct page_header *page = s_page_base(bin->page_cursor);
  276. AWS_ASSERT(page);
  277. size_t space_left = AWS_SBA_PAGE_SIZE - (bin->page_cursor - (uint8_t *)page);
  278. if (space_left >= bin->size) {
  279. void *chunk = bin->page_cursor;
  280. page->alloc_count++;
  281. bin->page_cursor += bin->size;
  282. space_left -= bin->size;
  283. if (space_left < bin->size) {
  284. aws_array_list_push_back(&bin->active_pages, &page);
  285. bin->page_cursor = NULL;
  286. }
  287. return chunk;
  288. }
  289. }
  290. /* Nothing free to use, allocate a page and restart */
  291. uint8_t *new_page = s_aligned_alloc(AWS_SBA_PAGE_SIZE, AWS_SBA_PAGE_SIZE);
  292. new_page = s_page_bind(new_page, bin);
  293. bin->page_cursor = new_page;
  294. return s_sba_alloc_from_bin(bin);
  295. }
  296. /* NOTE: Expects the mutex to be held by the caller */
  297. static void s_sba_free_to_bin(struct sba_bin *bin, void *addr) {
  298. AWS_PRECONDITION(addr);
  299. struct page_header *page = s_page_base(addr);
  300. AWS_ASSERT(page->bin == bin);
  301. page->alloc_count--;
  302. if (page->alloc_count == 0 && page != s_page_base(bin->page_cursor)) { /* empty page, free it */
  303. uint8_t *page_start = (uint8_t *)page + sizeof(struct page_header);
  304. uint8_t *page_end = page_start + AWS_SBA_PAGE_SIZE;
  305. /* Remove all chunks in the page from the free list */
  306. intptr_t chunk_idx = bin->free_chunks.length;
  307. for (; chunk_idx >= 0; --chunk_idx) {
  308. uint8_t *chunk = NULL;
  309. aws_array_list_get_at(&bin->free_chunks, &chunk, chunk_idx);
  310. if (chunk >= page_start && chunk < page_end) {
  311. aws_array_list_swap(&bin->free_chunks, chunk_idx, bin->free_chunks.length - 1);
  312. aws_array_list_pop_back(&bin->free_chunks);
  313. }
  314. }
  315. /* Find page in pages list and remove it */
  316. for (size_t page_idx = 0; page_idx < bin->active_pages.length; ++page_idx) {
  317. void *page_addr = NULL;
  318. aws_array_list_get_at(&bin->active_pages, &page_addr, page_idx);
  319. if (page_addr == page) {
  320. aws_array_list_swap(&bin->active_pages, page_idx, bin->active_pages.length - 1);
  321. aws_array_list_pop_back(&bin->active_pages);
  322. break;
  323. }
  324. }
  325. /* ensure that the page tag is erased, in case nearby memory is re-used */
  326. page->tag = page->tag2 = 0;
  327. s_aligned_free(page);
  328. return;
  329. }
  330. aws_array_list_push_back(&bin->free_chunks, &addr);
  331. }
  332. /* No lock required for this function, it's all read-only access to constant data */
  333. static struct sba_bin *s_sba_find_bin(struct small_block_allocator *sba, size_t size) {
  334. AWS_PRECONDITION(size <= s_max_bin_size);
  335. /* map bits 5(32) to 9(512) to indices 0-4 */
  336. size_t next_pow2 = 0;
  337. aws_round_up_to_power_of_two(size, &next_pow2);
  338. size_t lz = aws_clz_i32((int32_t)next_pow2);
  339. size_t idx = aws_sub_size_saturating(31 - lz, 5);
  340. AWS_ASSERT(idx <= 4);
  341. struct sba_bin *bin = &sba->bins[idx];
  342. AWS_ASSERT(bin->size >= size);
  343. return bin;
  344. }
  345. static void *s_sba_alloc(struct small_block_allocator *sba, size_t size) {
  346. if (size <= s_max_bin_size) {
  347. struct sba_bin *bin = s_sba_find_bin(sba, size);
  348. AWS_FATAL_ASSERT(bin);
  349. /* BEGIN CRITICAL SECTION */
  350. sba->lock(&bin->mutex);
  351. void *mem = s_sba_alloc_from_bin(bin);
  352. sba->unlock(&bin->mutex);
  353. /* END CRITICAL SECTION */
  354. return mem;
  355. }
  356. return aws_mem_acquire(sba->allocator, size);
  357. }
  358. AWS_SUPPRESS_ASAN AWS_SUPPRESS_TSAN static void s_sba_free(struct small_block_allocator *sba, void *addr) {
  359. if (!addr) {
  360. return;
  361. }
  362. struct page_header *page = (struct page_header *)s_page_base(addr);
  363. /* Check to see if this page is tagged by the sba */
  364. /* this check causes a read of (possibly) memory we didn't allocate, but it will always be
  365. * heap memory, so should not cause any issues. TSan will see this as a data race, but it
  366. * is not, that's a false positive
  367. */
  368. if (page->tag == AWS_SBA_TAG_VALUE && page->tag2 == AWS_SBA_TAG_VALUE) {
  369. struct sba_bin *bin = page->bin;
  370. /* BEGIN CRITICAL SECTION */
  371. sba->lock(&bin->mutex);
  372. s_sba_free_to_bin(bin, addr);
  373. sba->unlock(&bin->mutex);
  374. /* END CRITICAL SECTION */
  375. return;
  376. }
  377. /* large alloc, give back to underlying allocator */
  378. aws_mem_release(sba->allocator, addr);
  379. }
  380. static void *s_sba_mem_acquire(struct aws_allocator *allocator, size_t size) {
  381. struct small_block_allocator *sba = allocator->impl;
  382. return s_sba_alloc(sba, size);
  383. }
  384. static void s_sba_mem_release(struct aws_allocator *allocator, void *ptr) {
  385. struct small_block_allocator *sba = allocator->impl;
  386. s_sba_free(sba, ptr);
  387. }
  388. static void *s_sba_mem_realloc(struct aws_allocator *allocator, void *old_ptr, size_t old_size, size_t new_size) {
  389. struct small_block_allocator *sba = allocator->impl;
  390. /* If both allocations come from the parent, let the parent do it */
  391. if (old_size > s_max_bin_size && new_size > s_max_bin_size) {
  392. void *ptr = old_ptr;
  393. if (aws_mem_realloc(sba->allocator, &ptr, old_size, new_size)) {
  394. return NULL;
  395. }
  396. return ptr;
  397. }
  398. if (new_size == 0) {
  399. s_sba_free(sba, old_ptr);
  400. return NULL;
  401. }
  402. if (old_size > new_size) {
  403. return old_ptr;
  404. }
  405. void *new_mem = s_sba_alloc(sba, new_size);
  406. if (old_ptr && old_size) {
  407. memcpy(new_mem, old_ptr, old_size);
  408. s_sba_free(sba, old_ptr);
  409. }
  410. return new_mem;
  411. }
  412. static void *s_sba_mem_calloc(struct aws_allocator *allocator, size_t num, size_t size) {
  413. struct small_block_allocator *sba = allocator->impl;
  414. void *mem = s_sba_alloc(sba, size * num);
  415. memset(mem, 0, size * num);
  416. return mem;
  417. }