cord_analysis.cc 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. // Copyright 2021 The Abseil Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "absl/strings/cord_analysis.h"
  15. #include <cassert>
  16. #include <cstddef>
  17. #include <cstdint>
  18. #include <unordered_set>
  19. #include "absl/base/config.h"
  20. #include "absl/base/nullability.h"
  21. #include "absl/strings/internal/cord_data_edge.h"
  22. #include "absl/strings/internal/cord_internal.h"
  23. #include "absl/strings/internal/cord_rep_btree.h"
  24. #include "absl/strings/internal/cord_rep_crc.h"
  25. namespace absl {
  26. ABSL_NAMESPACE_BEGIN
  27. namespace cord_internal {
  28. namespace {
  29. // Accounting mode for analyzing memory usage.
  30. enum class Mode { kFairShare, kTotal, kTotalMorePrecise };
  31. // CordRepRef holds a `const CordRep*` reference in rep, and depending on mode,
  32. // holds a 'fraction' representing a cumulative inverse refcount weight.
  33. template <Mode mode>
  34. struct CordRepRef {
  35. // Instantiates a CordRepRef instance.
  36. explicit CordRepRef(absl::Nonnull<const CordRep*> r) : rep(r) {}
  37. // Creates a child reference holding the provided child.
  38. // Overloaded to add cumulative reference count for kFairShare.
  39. CordRepRef Child(absl::Nonnull<const CordRep*> child) const {
  40. return CordRepRef(child);
  41. }
  42. absl::Nonnull<const CordRep*> rep;
  43. };
  44. // RawUsage holds the computed total number of bytes.
  45. template <Mode mode>
  46. struct RawUsage {
  47. size_t total = 0;
  48. // Add 'size' to total, ignoring the CordRepRef argument.
  49. void Add(size_t size, CordRepRef<mode>) { total += size; }
  50. };
  51. // Overloaded representation of RawUsage that tracks the set of objects
  52. // counted, and avoids double-counting objects referenced more than once
  53. // by the same Cord.
  54. template <>
  55. struct RawUsage<Mode::kTotalMorePrecise> {
  56. size_t total = 0;
  57. // TODO(b/289250880): Replace this with a flat_hash_set.
  58. std::unordered_set<absl::Nonnull<const CordRep*>> counted;
  59. void Add(size_t size, CordRepRef<Mode::kTotalMorePrecise> repref) {
  60. if (counted.insert(repref.rep).second) {
  61. total += size;
  62. }
  63. }
  64. };
  65. // Returns n / refcount avoiding a div for the common refcount == 1.
  66. template <typename refcount_t>
  67. double MaybeDiv(double d, refcount_t refcount) {
  68. return refcount == 1 ? d : d / refcount;
  69. }
  70. // Overloaded 'kFairShare' specialization for CordRepRef. This class holds a
  71. // `fraction` value which represents a cumulative inverse refcount weight.
  72. // For example, a top node with a reference count of 2 will have a fraction
  73. // value of 1/2 = 0.5, representing the 'fair share' of memory it references.
  74. // A node below such a node with a reference count of 5 then has a fraction of
  75. // 0.5 / 5 = 0.1 representing the fair share of memory below that node, etc.
  76. template <>
  77. struct CordRepRef<Mode::kFairShare> {
  78. // Creates a CordRepRef with the provided rep and top (parent) fraction.
  79. explicit CordRepRef(absl::Nonnull<const CordRep*> r, double frac = 1.0)
  80. : rep(r), fraction(MaybeDiv(frac, r->refcount.Get())) {}
  81. // Returns a CordRepRef with a fraction of `this->fraction / child.refcount`
  82. CordRepRef Child(absl::Nonnull<const CordRep*> child) const {
  83. return CordRepRef(child, fraction);
  84. }
  85. absl::Nonnull<const CordRep*> rep;
  86. double fraction;
  87. };
  88. // Overloaded 'kFairShare' specialization for RawUsage
  89. template <>
  90. struct RawUsage<Mode::kFairShare> {
  91. double total = 0;
  92. // Adds `size` multiplied by `rep.fraction` to the total size.
  93. void Add(size_t size, CordRepRef<Mode::kFairShare> rep) {
  94. total += static_cast<double>(size) * rep.fraction;
  95. }
  96. };
  97. // Computes the estimated memory size of the provided data edge.
  98. // External reps are assumed 'heap allocated at their exact size'.
  99. template <Mode mode>
  100. void AnalyzeDataEdge(CordRepRef<mode> rep, RawUsage<mode>& raw_usage) {
  101. assert(IsDataEdge(rep.rep));
  102. // Consume all substrings
  103. if (rep.rep->tag == SUBSTRING) {
  104. raw_usage.Add(sizeof(CordRepSubstring), rep);
  105. rep = rep.Child(rep.rep->substring()->child);
  106. }
  107. // Consume FLAT / EXTERNAL
  108. const size_t size =
  109. rep.rep->tag >= FLAT
  110. ? rep.rep->flat()->AllocatedSize()
  111. : rep.rep->length + sizeof(CordRepExternalImpl<intptr_t>);
  112. raw_usage.Add(size, rep);
  113. }
  114. // Computes the memory size of the provided Btree tree.
  115. template <Mode mode>
  116. void AnalyzeBtree(CordRepRef<mode> rep, RawUsage<mode>& raw_usage) {
  117. raw_usage.Add(sizeof(CordRepBtree), rep);
  118. const CordRepBtree* tree = rep.rep->btree();
  119. if (tree->height() > 0) {
  120. for (CordRep* edge : tree->Edges()) {
  121. AnalyzeBtree(rep.Child(edge), raw_usage);
  122. }
  123. } else {
  124. for (CordRep* edge : tree->Edges()) {
  125. AnalyzeDataEdge(rep.Child(edge), raw_usage);
  126. }
  127. }
  128. }
  129. template <Mode mode>
  130. size_t GetEstimatedUsage(absl::Nonnull<const CordRep*> rep) {
  131. // Zero initialized memory usage totals.
  132. RawUsage<mode> raw_usage;
  133. // Capture top level node and refcount into a CordRepRef.
  134. CordRepRef<mode> repref(rep);
  135. // Consume the top level CRC node if present.
  136. if (repref.rep->tag == CRC) {
  137. raw_usage.Add(sizeof(CordRepCrc), repref);
  138. if (repref.rep->crc()->child == nullptr) {
  139. return static_cast<size_t>(raw_usage.total);
  140. }
  141. repref = repref.Child(repref.rep->crc()->child);
  142. }
  143. if (IsDataEdge(repref.rep)) {
  144. AnalyzeDataEdge(repref, raw_usage);
  145. } else if (repref.rep->tag == BTREE) {
  146. AnalyzeBtree(repref, raw_usage);
  147. } else {
  148. assert(false);
  149. }
  150. return static_cast<size_t>(raw_usage.total);
  151. }
  152. } // namespace
  153. size_t GetEstimatedMemoryUsage(absl::Nonnull<const CordRep*> rep) {
  154. return GetEstimatedUsage<Mode::kTotal>(rep);
  155. }
  156. size_t GetEstimatedFairShareMemoryUsage(absl::Nonnull<const CordRep*> rep) {
  157. return GetEstimatedUsage<Mode::kFairShare>(rep);
  158. }
  159. size_t GetMorePreciseMemoryUsage(absl::Nonnull<const CordRep*> rep) {
  160. return GetEstimatedUsage<Mode::kTotalMorePrecise>(rep);
  161. }
  162. } // namespace cord_internal
  163. ABSL_NAMESPACE_END
  164. } // namespace absl