--- contrib/libs/tcmalloc/tcmalloc/central_freelist.h (index) +++ contrib/libs/tcmalloc/tcmalloc/central_freelist.h (working tree) @@ -70,6 +70,14 @@ class CentralFreeList { SpanStats GetSpanStats() const; + void AcquireInternalLocks() { + lock_.Lock(); + } + + void ReleaseInternalLocks() { + lock_.Unlock(); + } + private: // Release an object to spans. // Returns object's span if it become completely free. --- contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (index) +++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (working tree) @@ -1031,6 +1031,20 @@ void CPUCache::PrintInPbtxt(PbtxtRegion *region) const { } } +void CPUCache::AcquireInternalLocks() { + for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; + ++cpu) { + resize_[cpu].lock.Lock(); + } +} + +void CPUCache::ReleaseInternalLocks() { + for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; + ++cpu) { + resize_[cpu].lock.Unlock(); + } +} + void CPUCache::PerClassResizeInfo::Init() { state_.store(0, std::memory_order_relaxed); } --- contrib/libs/tcmalloc/tcmalloc/cpu_cache.h (index) +++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.h (working tree) @@ -164,6 +164,9 @@ class CPUCache { void Print(Printer* out) const; void PrintInPbtxt(PbtxtRegion* region) const; + void AcquireInternalLocks(); + void ReleaseInternalLocks(); + private: // Per-size-class freelist resizing info. class PerClassResizeInfo { --- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (index) +++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (working tree) @@ -116,6 +116,10 @@ ABSL_ATTRIBUTE_WEAK int64_t MallocExtension_Internal_GetMaxTotalThreadCacheBytes(); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes( int64_t value); + +ABSL_ATTRIBUTE_WEAK void +MallocExtension_EnableForkSupport(); + } #endif --- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (index) +++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (working tree) @@ -460,6 +460,14 @@ void MallocExtension::SetBackgroundReleaseRate(BytesPerSecond rate) { #endif } +void MallocExtension::EnableForkSupport() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_EnableForkSupport != nullptr) { + MallocExtension_EnableForkSupport(); + } +#endif +} + } // namespace tcmalloc // Default implementation just returns size. The expectation is that --- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (index) +++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (working tree) @@ -468,6 +468,10 @@ class MallocExtension final { // Specifies the release rate from the page heap. ProcessBackgroundActions // must be called for this to be operative. static void SetBackgroundReleaseRate(BytesPerSecond rate); + + // Enables fork support. + // Allocator will continue to function correctly in the child, after calling fork(). + static void EnableForkSupport(); }; } // namespace tcmalloc --- contrib/libs/tcmalloc/tcmalloc/static_vars.cc (index) +++ contrib/libs/tcmalloc/tcmalloc/static_vars.cc (working tree) @@ -59,6 +59,7 @@ ABSL_CONST_INIT PageHeapAllocator Static::bucket_allocator_; ABSL_CONST_INIT std::atomic Static::inited_{false}; ABSL_CONST_INIT bool Static::cpu_cache_active_ = false; +ABSL_CONST_INIT bool Static::fork_support_enabled_ = false; ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_; ABSL_CONST_INIT PageMap Static::pagemap_; ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock( @@ -116,6 +117,13 @@ ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() { pagemap_.MapRootWithSmallPages(); guardedpage_allocator_.Init(/*max_alloced_pages=*/64, /*total_pages=*/128); inited_.store(true, std::memory_order_release); + + pageheap_lock.Unlock(); + pthread_atfork( + TCMallocPreFork, + TCMallocPostFork, + TCMallocPostFork); + pageheap_lock.Lock(); } } --- contrib/libs/tcmalloc/tcmalloc/static_vars.h (index) +++ contrib/libs/tcmalloc/tcmalloc/static_vars.h (working tree) @@ -50,6 +50,9 @@ class CPUCache; class PageMap; class ThreadCache; +void TCMallocPreFork(); +void TCMallocPostFork(); + class Static { public: // True if InitIfNecessary() has run to completion. @@ -124,6 +127,9 @@ class Static { static void ActivateCPUCache() { cpu_cache_active_ = true; } static void DeactivateCPUCache() { cpu_cache_active_ = false; } + static bool ForkSupportEnabled() { return fork_support_enabled_; } + static void EnableForkSupport() { fork_support_enabled_ = true; } + static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() { return #ifndef TCMALLOC_DEPRECATED_PERTHREAD @@ -169,6 +175,7 @@ class Static { static PageHeapAllocator bucket_allocator_; ABSL_CONST_INIT static std::atomic inited_; static bool cpu_cache_active_; + static bool fork_support_enabled_; ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_; ABSL_CONST_INIT static NumaTopology numa_topology_; --- contrib/libs/tcmalloc/tcmalloc/system-alloc.cc (index) +++ contrib/libs/tcmalloc/tcmalloc/system-alloc.cc (working tree) @@ -354,6 +354,14 @@ ABSL_CONST_INIT std::atomic system_release_errors = ATOMIC_VAR_INIT(0); } // namespace +void AcquireSystemAllocLock() { + spinlock.Lock(); +} + +void ReleaseSystemAllocLock() { + spinlock.Unlock(); +} + void* SystemAlloc(size_t bytes, size_t* actual_bytes, size_t alignment, const MemoryTag tag) { // If default alignment is set request the minimum alignment provided by --- contrib/libs/tcmalloc/tcmalloc/system-alloc.h (index) +++ contrib/libs/tcmalloc/tcmalloc/system-alloc.h (working tree) @@ -50,6 +50,9 @@ void *SystemAlloc(size_t bytes, size_t *actual_bytes, size_t alignment, // call to SystemRelease. int SystemReleaseErrors(); +void AcquireSystemAllocLock(); +void ReleaseSystemAllocLock(); + // This call is a hint to the operating system that the pages // contained in the specified range of memory will not be used for a // while, and can be released for use by other processes or the OS. --- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (index) +++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (working tree) @@ -1117,6 +1117,40 @@ extern "C" void MallocExtension_Internal_ReleaseMemoryToSystem( } } +extern "C" void MallocExtension_EnableForkSupport() { + Static::EnableForkSupport(); +} + +void TCMallocPreFork() { + if (!Static::ForkSupportEnabled()) { + return; + } + + if (Static::CPUCacheActive()) { + Static::cpu_cache().AcquireInternalLocks(); + } + Static::transfer_cache().AcquireInternalLocks(); + guarded_page_lock.Lock(); + release_lock.Lock(); + pageheap_lock.Lock(); + AcquireSystemAllocLock(); +} + +void TCMallocPostFork() { + if (!Static::ForkSupportEnabled()) { + return; + } + + ReleaseSystemAllocLock(); + pageheap_lock.Unlock(); + guarded_page_lock.Unlock(); + release_lock.Unlock(); + Static::transfer_cache().ReleaseInternalLocks(); + if (Static::CPUCacheActive()) { + Static::cpu_cache().ReleaseInternalLocks(); + } +} + // nallocx slow path. // Moved to a separate function because size_class_with_alignment is not inlined // which would cause nallocx to become non-leaf function with stack frame and --- contrib/libs/tcmalloc/tcmalloc/tcmalloc.h (index) +++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.h (working tree) @@ -120,4 +120,7 @@ void TCMallocInternalDeleteArrayNothrow(void* p, const std::nothrow_t&) __THROW } #endif +void TCMallocInternalAcquireLocks(); +void TCMallocInternalReleaseLocks(); + #endif // TCMALLOC_TCMALLOC_H_ --- contrib/libs/tcmalloc/tcmalloc/transfer_cache.h (index) +++ contrib/libs/tcmalloc/tcmalloc/transfer_cache.h (working tree) @@ -176,6 +176,26 @@ class TransferCacheManager : public StaticForwarder { } } + void AcquireInternalLocks() { + for (int i = 0; i < kNumClasses; ++i) { + if (implementation_ == TransferCacheImplementation::Ring) { + cache_[i].rbtc.AcquireInternalLocks(); + } else { + cache_[i].tc.AcquireInternalLocks(); + } + } + } + + void ReleaseInternalLocks() { + for (int i = 0; i < kNumClasses; ++i) { + if (implementation_ == TransferCacheImplementation::Ring) { + cache_[i].rbtc.ReleaseInternalLocks(); + } else { + cache_[i].tc.ReleaseInternalLocks(); + } + } + } + void InsertRange(int size_class, absl::Span batch) { if (implementation_ == TransferCacheImplementation::Ring) { cache_[size_class].rbtc.InsertRange(size_class, batch); @@ -295,6 +315,9 @@ class TransferCacheManager { return TransferCacheImplementation::None; } + void AcquireInternalLocks() {} + void ReleaseInternalLocks() {} + private: CentralFreeList freelist_[kNumClasses]; } ABSL_CACHELINE_ALIGNED; --- contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h (index) +++ contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h (working tree) @@ -366,6 +366,18 @@ class TransferCache { return freelist_do_not_access_directly_; } + void AcquireInternalLocks() + { + freelist().AcquireInternalLocks(); + lock_.Lock(); + } + + void ReleaseInternalLocks() + { + lock_.Unlock(); + freelist().ReleaseInternalLocks(); + } + private: // Returns first object of the i-th slot. void **GetSlot(size_t i) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { @@ -468,6 +480,18 @@ class RingBufferTransferCache { // These methods all do internal locking. + void AcquireInternalLocks() + { + freelist().AcquireInternalLocks(); + lock_.Lock(); + } + + void ReleaseInternalLocks() + { + lock_.Unlock(); + freelist().ReleaseInternalLocks(); + } + // Insert the specified batch into the transfer cache. N is the number of // elements in the range. RemoveRange() is the opposite operation. void InsertRange(int size_class, absl::Span batch)