lightrwlock.h 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. #pragma once
  2. #include <util/system/rwlock.h>
  3. #include <util/system/sanitizers.h>
  4. // TLightRWLock and TSAN are not friends...
  5. #if defined(_linux_) && !defined(_tsan_enabled_)
  6. /* TLightRWLock is optimized for read lock and very fast lock/unlock switching.
  7. Read lock increments counter.
  8. Write lock sets highest bit of counter (makes counter negative).
  9. Whenever a thread tries to acquire read lock that thread increments
  10. the counter. If the thread gets negative value of the counter right just
  11. after the increment that means write lock was acquired in another thread.
  12. In that case the thread decrements the counter back, wakes one thread on
  13. UnshareFutex, waits on the TrappedFutex and then tries acquire read lock
  14. from the beginning.
  15. If the thread gets positive value of the counter after the increment
  16. then read lock was successfully acquired and
  17. the thread can proceed execution.
  18. Whenever a thread tries to acquire write lock that thread set the highest bit
  19. of the counter. If the thread determine that the bit was set previously then
  20. write lock was acquired in another thread. In that case the thread waits on
  21. the TrappedFutex and then tries again from the beginning.
  22. If the highest bit was successfully set then thread check if any read lock
  23. exists at the moment. If so the thread waits on UnshareFutex. If there is
  24. no more read locks then write lock was successfully acquired and the thread
  25. can proceed execution.
  26. */
  27. #include <linux/futex.h>
  28. #include <unistd.h>
  29. #include <sys/syscall.h>
  30. #include <errno.h>
  31. namespace NS_LightRWLock {
  32. static int Y_FORCE_INLINE AtomicFetchAdd(volatile int& item, int value) {
  33. return __atomic_fetch_add(&item, value, __ATOMIC_SEQ_CST);
  34. }
  35. #if defined(_x86_64_) || defined(_i386_)
  36. static char Y_FORCE_INLINE AtomicSetBit(volatile int& item, unsigned bit) {
  37. char ret;
  38. __asm__ __volatile__(
  39. "lock bts %2,%0\n"
  40. "setc %1\n"
  41. : "+m"(item), "=rm"(ret)
  42. : "r"(bit)
  43. : "cc");
  44. // msan doesn't treat ret as initialized
  45. NSan::Unpoison(&ret, sizeof(ret));
  46. return ret;
  47. }
  48. static char Y_FORCE_INLINE AtomicClearBit(volatile int& item, unsigned bit) {
  49. char ret;
  50. __asm__ __volatile__(
  51. "lock btc %2,%0\n"
  52. "setc %1\n"
  53. : "+m"(item), "=rm"(ret)
  54. : "r"(bit)
  55. : "cc");
  56. // msan doesn't treat ret as initialized
  57. NSan::Unpoison(&ret, sizeof(ret));
  58. return ret;
  59. }
  60. #else
  61. static char Y_FORCE_INLINE AtomicSetBit(volatile int& item, unsigned bit) {
  62. int prev = __atomic_fetch_or(&item, 1 << bit, __ATOMIC_SEQ_CST);
  63. return (prev & (1 << bit)) != 0 ? 1 : 0;
  64. }
  65. static char Y_FORCE_INLINE
  66. AtomicClearBit(volatile int& item, unsigned bit) {
  67. int prev = __atomic_fetch_and(&item, ~(1 << bit), __ATOMIC_SEQ_CST);
  68. return (prev & (1 << bit)) != 0 ? 1 : 0;
  69. }
  70. #endif
  71. #if defined(_x86_64_) || defined(_i386_) || defined (__aarch64__) || defined (__arm__) || defined (__powerpc64__)
  72. static bool AtomicLockHighByte(volatile int& item) {
  73. union TA {
  74. int x;
  75. char y[4];
  76. };
  77. volatile TA* ptr = reinterpret_cast<volatile TA*>(&item);
  78. char zero = 0;
  79. return __atomic_compare_exchange_n(&(ptr->y[3]), &zero, (char)128, true,
  80. __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
  81. }
  82. #endif
  83. template <typename TInt>
  84. static void Y_FORCE_INLINE AtomicStore(volatile TInt& var, TInt value) {
  85. __atomic_store_n(&var, value, __ATOMIC_RELEASE);
  86. }
  87. template <typename TInt>
  88. static void Y_FORCE_INLINE SequenceStore(volatile TInt& var, TInt value) {
  89. __atomic_store_n(&var, value, __ATOMIC_SEQ_CST);
  90. }
  91. template <typename TInt>
  92. static TInt Y_FORCE_INLINE AtomicLoad(const volatile TInt& var) {
  93. return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
  94. }
  95. static void Y_FORCE_INLINE FutexWait(volatile int& fvar, int value) {
  96. for (;;) {
  97. int result =
  98. syscall(SYS_futex, &fvar, FUTEX_WAIT_PRIVATE, value, NULL, NULL, 0);
  99. if (Y_UNLIKELY(result == -1)) {
  100. if (errno == EWOULDBLOCK)
  101. return;
  102. if (errno == EINTR)
  103. continue;
  104. Y_ABORT("futex error");
  105. }
  106. }
  107. }
  108. static void Y_FORCE_INLINE FutexWake(volatile int& fvar, int amount) {
  109. const int result =
  110. syscall(SYS_futex, &fvar, FUTEX_WAKE_PRIVATE, amount, NULL, NULL, 0);
  111. if (Y_UNLIKELY(result == -1))
  112. Y_ABORT("futex error");
  113. }
  114. }
  115. class alignas(64) TLightRWLock {
  116. public:
  117. TLightRWLock(ui32 spinCount = 10)
  118. : Counter_(0)
  119. , TrappedFutex_(0)
  120. , UnshareFutex_(0)
  121. , SpinCount_(spinCount)
  122. {
  123. }
  124. TLightRWLock(const TLightRWLock&) = delete;
  125. void operator=(const TLightRWLock&) = delete;
  126. Y_FORCE_INLINE void AcquireWrite() {
  127. using namespace NS_LightRWLock;
  128. if (AtomicLockHighByte(Counter_)) {
  129. if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0)
  130. return;
  131. return WaitForUntrappedShared();
  132. }
  133. WaitForExclusiveAndUntrappedShared();
  134. }
  135. Y_FORCE_INLINE void AcquireRead() {
  136. using namespace NS_LightRWLock;
  137. if (Y_LIKELY(AtomicFetchAdd(Counter_, 1) >= 0))
  138. return;
  139. WaitForUntrappedAndAcquireRead();
  140. }
  141. Y_FORCE_INLINE void ReleaseWrite() {
  142. using namespace NS_LightRWLock;
  143. AtomicClearBit(Counter_, 31);
  144. if (AtomicLoad(TrappedFutex_)) {
  145. SequenceStore(TrappedFutex_, 0);
  146. FutexWake(TrappedFutex_, 0x7fffffff);
  147. }
  148. }
  149. Y_FORCE_INLINE void ReleaseRead() {
  150. using namespace NS_LightRWLock;
  151. if (Y_LIKELY(AtomicFetchAdd(Counter_, -1) >= 0))
  152. return;
  153. if (!AtomicLoad(UnshareFutex_))
  154. return;
  155. if ((AtomicLoad(Counter_) & 0x7fffffff) == 0) {
  156. SequenceStore(UnshareFutex_, 0);
  157. FutexWake(UnshareFutex_, 1);
  158. }
  159. }
  160. private:
  161. volatile int Counter_;
  162. volatile int TrappedFutex_;
  163. volatile int UnshareFutex_;
  164. const ui32 SpinCount_;
  165. void WaitForUntrappedShared();
  166. void WaitForExclusiveAndUntrappedShared();
  167. void WaitForUntrappedAndAcquireRead();
  168. };
  169. #else
  170. class TLightRWLock: public TRWMutex {
  171. public:
  172. TLightRWLock() {
  173. }
  174. TLightRWLock(ui32) {
  175. }
  176. };
  177. #endif
  178. using TLightReadGuard = TReadGuardBase<TLightRWLock>;
  179. using TLightWriteGuard = TWriteGuardBase<TLightRWLock>;