prefetch.h 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. // Copyright 2023 The Abseil Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // -----------------------------------------------------------------------------
  16. // File: prefetch.h
  17. // -----------------------------------------------------------------------------
  18. //
  19. // This header file defines prefetch functions to prefetch memory contents
  20. // into the first level cache (L1) for the current CPU. The prefetch logic
  21. // offered in this header is limited to prefetching first level cachelines
  22. // only, and is aimed at relatively 'simple' prefetching logic.
  23. //
  24. #ifndef ABSL_BASE_PREFETCH_H_
  25. #define ABSL_BASE_PREFETCH_H_
  26. #include "absl/base/attributes.h"
  27. #include "absl/base/config.h"
  28. #if defined(ABSL_INTERNAL_HAVE_SSE)
  29. #include <xmmintrin.h>
  30. #endif
  31. #if defined(_MSC_VER)
  32. #include <intrin.h>
  33. #if defined(ABSL_INTERNAL_HAVE_SSE)
  34. #pragma intrinsic(_mm_prefetch)
  35. #endif
  36. #endif
  37. namespace absl {
  38. ABSL_NAMESPACE_BEGIN
  39. // Moves data into the L1 cache before it is read, or "prefetches" it.
  40. //
  41. // The value of `addr` is the address of the memory to prefetch. If
  42. // the target and compiler support it, data prefetch instructions are
  43. // generated. If the prefetch is done some time before the memory is
  44. // read, it may be in the cache by the time the read occurs.
  45. //
  46. // This method prefetches data with the highest degree of temporal locality;
  47. // data is prefetched where possible into all levels of the cache.
  48. //
  49. // Incorrect or gratuitous use of this function can degrade performance.
  50. // Use this function only when representative benchmarks show an improvement.
  51. //
  52. // Example:
  53. //
  54. // // Computes incremental checksum for `data`.
  55. // int ComputeChecksum(int sum, absl::string_view data);
  56. //
  57. // // Computes cumulative checksum for all values in `data`
  58. // int ComputeChecksum(absl::Span<const std::string> data) {
  59. // int sum = 0;
  60. // auto it = data.begin();
  61. // auto pit = data.begin();
  62. // auto end = data.end();
  63. // for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
  64. // absl::PrefetchToLocalCache(pit->data());
  65. // }
  66. // for (; pit != end; ++pit, ++it) {
  67. // sum = ComputeChecksum(sum, *it);
  68. // absl::PrefetchToLocalCache(pit->data());
  69. // }
  70. // for (; it != end; ++it) {
  71. // sum = ComputeChecksum(sum, *it);
  72. // }
  73. // return sum;
  74. // }
  75. //
  76. void PrefetchToLocalCache(const void* addr);
  77. // Moves data into the L1 cache before it is read, or "prefetches" it.
  78. //
  79. // This function is identical to `PrefetchToLocalCache()` except that it has
  80. // non-temporal locality: the fetched data should not be left in any of the
  81. // cache tiers. This is useful for cases where the data is used only once /
  82. // short term, for example, invoking a destructor on an object.
  83. //
  84. // Incorrect or gratuitous use of this function can degrade performance.
  85. // Use this function only when representative benchmarks show an improvement.
  86. //
  87. // Example:
  88. //
  89. // template <typename Iterator>
  90. // void DestroyPointers(Iterator begin, Iterator end) {
  91. // size_t distance = std::min(8U, bars.size());
  92. //
  93. // int dist = 8;
  94. // auto prefetch_it = begin;
  95. // while (prefetch_it != end && --dist;) {
  96. // absl::PrefetchToLocalCacheNta(*prefetch_it++);
  97. // }
  98. // while (prefetch_it != end) {
  99. // delete *begin++;
  100. // absl::PrefetchToLocalCacheNta(*prefetch_it++);
  101. // }
  102. // while (begin != end) {
  103. // delete *begin++;
  104. // }
  105. // }
  106. //
  107. void PrefetchToLocalCacheNta(const void* addr);
  108. // Moves data into the L1 cache with the intent to modify it.
  109. //
  110. // This function is similar to `PrefetchToLocalCache()` except that it
  111. // prefetches cachelines with an 'intent to modify' This typically includes
  112. // invalidating cache entries for this address in all other cache tiers, and an
  113. // exclusive access intent.
  114. //
  115. // Incorrect or gratuitous use of this function can degrade performance. As this
  116. // function can invalidate cached cachelines on other caches and computer cores,
  117. // incorrect usage of this function can have an even greater negative impact
  118. // than incorrect regular prefetches.
  119. // Use this function only when representative benchmarks show an improvement.
  120. //
  121. // Example:
  122. //
  123. // void* Arena::Allocate(size_t size) {
  124. // void* ptr = AllocateBlock(size);
  125. // absl::PrefetchToLocalCacheForWrite(ptr);
  126. // return ptr;
  127. // }
  128. //
  129. void PrefetchToLocalCacheForWrite(const void* addr);
  130. #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
  131. #define ABSL_HAVE_PREFETCH 1
  132. // See __builtin_prefetch:
  133. // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
  134. //
  135. ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
  136. const void* addr) {
  137. __builtin_prefetch(addr, 0, 3);
  138. }
  139. ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
  140. const void* addr) {
  141. __builtin_prefetch(addr, 0, 0);
  142. }
  143. ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
  144. const void* addr) {
  145. // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
  146. // unless -march=broadwell or newer; this is not generally the default, so we
  147. // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
  148. // processors and has been present on AMD processors since the K6-2.
  149. #if defined(__x86_64__) && !defined(__PRFCHW__)
  150. asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
  151. #else
  152. __builtin_prefetch(addr, 1, 3);
  153. #endif
  154. }
  155. #elif defined(ABSL_INTERNAL_HAVE_SSE)
  156. #define ABSL_HAVE_PREFETCH 1
  157. ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
  158. const void* addr) {
  159. _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
  160. }
  161. ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
  162. const void* addr) {
  163. _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
  164. }
  165. ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
  166. const void* addr) {
  167. #if defined(_MM_HINT_ET0)
  168. _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
  169. #elif !defined(_MSC_VER) && defined(__x86_64__)
  170. // _MM_HINT_ET0 is not universally supported. As we commented further
  171. // up, PREFETCHW is recognized as a no-op on older Intel processors
  172. // and has been present on AMD processors since the K6-2. We have this
  173. // disabled for MSVC compilers as this miscompiles on older MSVC compilers.
  174. asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
  175. #endif
  176. }
  177. #else
  178. ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
  179. const void* addr) {}
  180. ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
  181. const void* addr) {}
  182. ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
  183. const void* addr) {}
  184. #endif
  185. ABSL_NAMESPACE_END
  186. } // namespace absl
  187. #endif // ABSL_BASE_PREFETCH_H_