cord.cc 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379
  1. // Copyright 2020 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "y_absl/strings/cord.h"
  15. #include <algorithm>
  16. #include <atomic>
  17. #include <cstddef>
  18. #include <cstdio>
  19. #include <cstdlib>
  20. #include <iomanip>
  21. #include <ios>
  22. #include <iostream>
  23. #include <limits>
  24. #include <ostream>
  25. #include <sstream>
  26. #include <type_traits>
  27. #include <unordered_set>
  28. #include <vector>
  29. #include "y_absl/base/casts.h"
  30. #include "y_absl/base/internal/raw_logging.h"
  31. #include "y_absl/base/macros.h"
  32. #include "y_absl/base/port.h"
  33. #include "y_absl/container/fixed_array.h"
  34. #include "y_absl/container/inlined_vector.h"
  35. #include "y_absl/crc/internal/crc_cord_state.h"
  36. #include "y_absl/strings/cord_buffer.h"
  37. #include "y_absl/strings/escaping.h"
  38. #include "y_absl/strings/internal/cord_data_edge.h"
  39. #include "y_absl/strings/internal/cord_internal.h"
  40. #include "y_absl/strings/internal/cord_rep_btree.h"
  41. #include "y_absl/strings/internal/cord_rep_crc.h"
  42. #include "y_absl/strings/internal/cord_rep_flat.h"
  43. #include "y_absl/strings/internal/cordz_statistics.h"
  44. #include "y_absl/strings/internal/cordz_update_scope.h"
  45. #include "y_absl/strings/internal/cordz_update_tracker.h"
  46. #include "y_absl/strings/internal/resize_uninitialized.h"
  47. #include "y_absl/strings/str_cat.h"
  48. #include "y_absl/strings/str_join.h"
  49. #include "y_absl/strings/string_view.h"
  50. namespace y_absl {
  51. Y_ABSL_NAMESPACE_BEGIN
  52. using ::y_absl::cord_internal::CordRep;
  53. using ::y_absl::cord_internal::CordRepBtree;
  54. using ::y_absl::cord_internal::CordRepCrc;
  55. using ::y_absl::cord_internal::CordRepExternal;
  56. using ::y_absl::cord_internal::CordRepFlat;
  57. using ::y_absl::cord_internal::CordRepSubstring;
  58. using ::y_absl::cord_internal::CordzUpdateTracker;
  59. using ::y_absl::cord_internal::InlineData;
  60. using ::y_absl::cord_internal::kMaxFlatLength;
  61. using ::y_absl::cord_internal::kMinFlatLength;
  62. using ::y_absl::cord_internal::kInlinedVectorSize;
  63. using ::y_absl::cord_internal::kMaxBytesToCopy;
  64. static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
  65. int indent = 0);
  66. static bool VerifyNode(CordRep* root, CordRep* start_node,
  67. bool full_validation);
  68. static inline CordRep* VerifyTree(CordRep* node) {
  69. // Verification is expensive, so only do it in debug mode.
  70. // Even in debug mode we normally do only light validation.
  71. // If you are debugging Cord itself, you should define the
  72. // macro EXTRA_CORD_VALIDATION, e.g. by adding
  73. // --copt=-DEXTRA_CORD_VALIDATION to the blaze line.
  74. #ifdef EXTRA_CORD_VALIDATION
  75. assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true));
  76. #else // EXTRA_CORD_VALIDATION
  77. assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false));
  78. #endif // EXTRA_CORD_VALIDATION
  79. static_cast<void>(&VerifyNode);
  80. return node;
  81. }
  82. static CordRepFlat* CreateFlat(const char* data, size_t length,
  83. size_t alloc_hint) {
  84. CordRepFlat* flat = CordRepFlat::New(length + alloc_hint);
  85. flat->length = length;
  86. memcpy(flat->Data(), data, length);
  87. return flat;
  88. }
  89. // Creates a new flat or Btree out of the specified array.
  90. // The returned node has a refcount of 1.
  91. static CordRep* NewBtree(const char* data, size_t length, size_t alloc_hint) {
  92. if (length <= kMaxFlatLength) {
  93. return CreateFlat(data, length, alloc_hint);
  94. }
  95. CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0);
  96. data += kMaxFlatLength;
  97. length -= kMaxFlatLength;
  98. auto* root = CordRepBtree::Create(flat);
  99. return CordRepBtree::Append(root, {data, length}, alloc_hint);
  100. }
  101. // Create a new tree out of the specified array.
  102. // The returned node has a refcount of 1.
  103. static CordRep* NewTree(const char* data, size_t length, size_t alloc_hint) {
  104. if (length == 0) return nullptr;
  105. return NewBtree(data, length, alloc_hint);
  106. }
  107. namespace cord_internal {
  108. void InitializeCordRepExternal(y_absl::string_view data, CordRepExternal* rep) {
  109. assert(!data.empty());
  110. rep->length = data.size();
  111. rep->tag = EXTERNAL;
  112. rep->base = data.data();
  113. VerifyTree(rep);
  114. }
  115. } // namespace cord_internal
  116. // Creates a CordRep from the provided string. If the string is large enough,
  117. // and not wasteful, we move the string into an external cord rep, preserving
  118. // the already allocated string contents.
  119. // Requires the provided string length to be larger than `kMaxInline`.
  120. static CordRep* CordRepFromString(TString&& src) {
  121. assert(src.length() > cord_internal::kMaxInline);
  122. if (
  123. // String is short: copy data to avoid external block overhead.
  124. src.size() <= kMaxBytesToCopy ||
  125. // String is wasteful: copy data to avoid pinning too much unused memory.
  126. src.size() < src.capacity() / 2
  127. ) {
  128. return NewTree(src.data(), src.size(), 0);
  129. }
  130. struct StringReleaser {
  131. void operator()(y_absl::string_view /* data */) {}
  132. TString data;
  133. };
  134. const y_absl::string_view original_data = src;
  135. auto* rep =
  136. static_cast<::y_absl::cord_internal::CordRepExternalImpl<StringReleaser>*>(
  137. y_absl::cord_internal::NewExternalRep(original_data,
  138. StringReleaser{std::move(src)}));
  139. // Moving src may have invalidated its data pointer, so adjust it.
  140. rep->base = rep->template get<0>().data.data();
  141. return rep;
  142. }
  143. // --------------------------------------------------------------------
  144. // Cord::InlineRep functions
  145. #ifdef Y_ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL
  146. constexpr unsigned char Cord::InlineRep::kMaxInline;
  147. #endif
  148. inline void Cord::InlineRep::set_data(const char* data, size_t n) {
  149. static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15");
  150. data_.set_inline_data(data, n);
  151. }
  152. inline char* Cord::InlineRep::set_data(size_t n) {
  153. assert(n <= kMaxInline);
  154. ResetToEmpty();
  155. set_inline_size(n);
  156. return data_.as_chars();
  157. }
  158. inline void Cord::InlineRep::reduce_size(size_t n) {
  159. size_t tag = inline_size();
  160. assert(tag <= kMaxInline);
  161. assert(tag >= n);
  162. tag -= n;
  163. memset(data_.as_chars() + tag, 0, n);
  164. set_inline_size(tag);
  165. }
  166. inline void Cord::InlineRep::remove_prefix(size_t n) {
  167. cord_internal::SmallMemmove(data_.as_chars(), data_.as_chars() + n,
  168. inline_size() - n);
  169. reduce_size(n);
  170. }
  171. // Returns `rep` converted into a CordRepBtree.
  172. // Directly returns `rep` if `rep` is already a CordRepBtree.
  173. static CordRepBtree* ForceBtree(CordRep* rep) {
  174. return rep->IsBtree()
  175. ? rep->btree()
  176. : CordRepBtree::Create(cord_internal::RemoveCrcNode(rep));
  177. }
  178. void Cord::InlineRep::AppendTreeToInlined(CordRep* tree,
  179. MethodIdentifier method) {
  180. assert(!is_tree());
  181. if (!data_.is_empty()) {
  182. CordRepFlat* flat = MakeFlatWithExtraCapacity(0);
  183. tree = CordRepBtree::Append(CordRepBtree::Create(flat), tree);
  184. }
  185. EmplaceTree(tree, method);
  186. }
  187. void Cord::InlineRep::AppendTreeToTree(CordRep* tree, MethodIdentifier method) {
  188. assert(is_tree());
  189. const CordzUpdateScope scope(data_.cordz_info(), method);
  190. tree = CordRepBtree::Append(ForceBtree(data_.as_tree()), tree);
  191. SetTree(tree, scope);
  192. }
  193. void Cord::InlineRep::AppendTree(CordRep* tree, MethodIdentifier method) {
  194. assert(tree != nullptr);
  195. assert(tree->length != 0);
  196. assert(!tree->IsCrc());
  197. if (data_.is_tree()) {
  198. AppendTreeToTree(tree, method);
  199. } else {
  200. AppendTreeToInlined(tree, method);
  201. }
  202. }
  203. void Cord::InlineRep::PrependTreeToInlined(CordRep* tree,
  204. MethodIdentifier method) {
  205. assert(!is_tree());
  206. if (!data_.is_empty()) {
  207. CordRepFlat* flat = MakeFlatWithExtraCapacity(0);
  208. tree = CordRepBtree::Prepend(CordRepBtree::Create(flat), tree);
  209. }
  210. EmplaceTree(tree, method);
  211. }
  212. void Cord::InlineRep::PrependTreeToTree(CordRep* tree,
  213. MethodIdentifier method) {
  214. assert(is_tree());
  215. const CordzUpdateScope scope(data_.cordz_info(), method);
  216. tree = CordRepBtree::Prepend(ForceBtree(data_.as_tree()), tree);
  217. SetTree(tree, scope);
  218. }
  219. void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) {
  220. assert(tree != nullptr);
  221. assert(tree->length != 0);
  222. assert(!tree->IsCrc());
  223. if (data_.is_tree()) {
  224. PrependTreeToTree(tree, method);
  225. } else {
  226. PrependTreeToInlined(tree, method);
  227. }
  228. }
  229. // Searches for a non-full flat node at the rightmost leaf of the tree. If a
  230. // suitable leaf is found, the function will update the length field for all
  231. // nodes to account for the size increase. The append region address will be
  232. // written to region and the actual size increase will be written to size.
  233. static inline bool PrepareAppendRegion(CordRep* root, char** region,
  234. size_t* size, size_t max_length) {
  235. if (root->IsBtree() && root->refcount.IsOne()) {
  236. Span<char> span = root->btree()->GetAppendBuffer(max_length);
  237. if (!span.empty()) {
  238. *region = span.data();
  239. *size = span.size();
  240. return true;
  241. }
  242. }
  243. CordRep* dst = root;
  244. if (!dst->IsFlat() || !dst->refcount.IsOne()) {
  245. *region = nullptr;
  246. *size = 0;
  247. return false;
  248. }
  249. const size_t in_use = dst->length;
  250. const size_t capacity = dst->flat()->Capacity();
  251. if (in_use == capacity) {
  252. *region = nullptr;
  253. *size = 0;
  254. return false;
  255. }
  256. const size_t size_increase = std::min(capacity - in_use, max_length);
  257. dst->length += size_increase;
  258. *region = dst->flat()->Data() + in_use;
  259. *size = size_increase;
  260. return true;
  261. }
  262. void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) {
  263. assert(&src != this);
  264. assert(is_tree() || src.is_tree());
  265. auto constexpr method = CordzUpdateTracker::kAssignCord;
  266. if (Y_ABSL_PREDICT_TRUE(!is_tree())) {
  267. EmplaceTree(CordRep::Ref(src.as_tree()), src.data_, method);
  268. return;
  269. }
  270. CordRep* tree = as_tree();
  271. if (CordRep* src_tree = src.tree()) {
  272. // Leave any existing `cordz_info` in place, and let MaybeTrackCord()
  273. // decide if this cord should be (or remains to be) sampled or not.
  274. data_.set_tree(CordRep::Ref(src_tree));
  275. CordzInfo::MaybeTrackCord(data_, src.data_, method);
  276. } else {
  277. CordzInfo::MaybeUntrackCord(data_.cordz_info());
  278. data_ = src.data_;
  279. }
  280. CordRep::Unref(tree);
  281. }
  282. void Cord::InlineRep::UnrefTree() {
  283. if (is_tree()) {
  284. CordzInfo::MaybeUntrackCord(data_.cordz_info());
  285. CordRep::Unref(tree());
  286. }
  287. }
  288. // --------------------------------------------------------------------
  289. // Constructors and destructors
  290. Cord::Cord(y_absl::string_view src, MethodIdentifier method)
  291. : contents_(InlineData::kDefaultInit) {
  292. const size_t n = src.size();
  293. if (n <= InlineRep::kMaxInline) {
  294. contents_.set_data(src.data(), n);
  295. } else {
  296. CordRep* rep = NewTree(src.data(), n, 0);
  297. contents_.EmplaceTree(rep, method);
  298. }
  299. }
  300. template <typename T, Cord::EnableIfString<T>>
  301. Cord::Cord(T&& src) : contents_(InlineData::kDefaultInit) {
  302. if (src.size() <= InlineRep::kMaxInline) {
  303. contents_.set_data(src.data(), src.size());
  304. } else {
  305. CordRep* rep = CordRepFromString(std::forward<T>(src));
  306. contents_.EmplaceTree(rep, CordzUpdateTracker::kConstructorString);
  307. }
  308. }
  309. template Cord::Cord(TString&& src);
  310. // The destruction code is separate so that the compiler can determine
  311. // that it does not need to call the destructor on a moved-from Cord.
  312. void Cord::DestroyCordSlow() {
  313. assert(contents_.is_tree());
  314. CordzInfo::MaybeUntrackCord(contents_.cordz_info());
  315. CordRep::Unref(VerifyTree(contents_.as_tree()));
  316. }
  317. // --------------------------------------------------------------------
  318. // Mutators
  319. void Cord::Clear() {
  320. if (CordRep* tree = contents_.clear()) {
  321. CordRep::Unref(tree);
  322. }
  323. }
  324. Cord& Cord::AssignLargeString(TString&& src) {
  325. auto constexpr method = CordzUpdateTracker::kAssignString;
  326. assert(src.size() > kMaxBytesToCopy);
  327. CordRep* rep = CordRepFromString(std::move(src));
  328. if (CordRep* tree = contents_.tree()) {
  329. CordzUpdateScope scope(contents_.cordz_info(), method);
  330. contents_.SetTree(rep, scope);
  331. CordRep::Unref(tree);
  332. } else {
  333. contents_.EmplaceTree(rep, method);
  334. }
  335. return *this;
  336. }
  337. Cord& Cord::operator=(y_absl::string_view src) {
  338. auto constexpr method = CordzUpdateTracker::kAssignString;
  339. const char* data = src.data();
  340. size_t length = src.size();
  341. CordRep* tree = contents_.tree();
  342. if (length <= InlineRep::kMaxInline) {
  343. // Embed into this->contents_, which is somewhat subtle:
  344. // - MaybeUntrackCord must be called before Unref(tree).
  345. // - MaybeUntrackCord must be called before set_data() clobbers cordz_info.
  346. // - set_data() must be called before Unref(tree) as it may reference tree.
  347. if (tree != nullptr) CordzInfo::MaybeUntrackCord(contents_.cordz_info());
  348. contents_.set_data(data, length);
  349. if (tree != nullptr) CordRep::Unref(tree);
  350. return *this;
  351. }
  352. if (tree != nullptr) {
  353. CordzUpdateScope scope(contents_.cordz_info(), method);
  354. if (tree->IsFlat() && tree->flat()->Capacity() >= length &&
  355. tree->refcount.IsOne()) {
  356. // Copy in place if the existing FLAT node is reusable.
  357. memmove(tree->flat()->Data(), data, length);
  358. tree->length = length;
  359. VerifyTree(tree);
  360. return *this;
  361. }
  362. contents_.SetTree(NewTree(data, length, 0), scope);
  363. CordRep::Unref(tree);
  364. } else {
  365. contents_.EmplaceTree(NewTree(data, length, 0), method);
  366. }
  367. return *this;
  368. }
  369. // TODO(sanjay): Move to Cord::InlineRep section of file. For now,
  370. // we keep it here to make diffs easier.
  371. void Cord::InlineRep::AppendArray(y_absl::string_view src,
  372. MethodIdentifier method) {
  373. MaybeRemoveEmptyCrcNode();
  374. if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
  375. size_t appended = 0;
  376. CordRep* rep = tree();
  377. const CordRep* const root = rep;
  378. CordzUpdateScope scope(root ? cordz_info() : nullptr, method);
  379. if (root != nullptr) {
  380. rep = cord_internal::RemoveCrcNode(rep);
  381. char* region;
  382. if (PrepareAppendRegion(rep, &region, &appended, src.size())) {
  383. memcpy(region, src.data(), appended);
  384. }
  385. } else {
  386. // Try to fit in the inline buffer if possible.
  387. size_t inline_length = inline_size();
  388. if (src.size() <= kMaxInline - inline_length) {
  389. // Append new data to embedded array
  390. set_inline_size(inline_length + src.size());
  391. memcpy(data_.as_chars() + inline_length, src.data(), src.size());
  392. return;
  393. }
  394. // Allocate flat to be a perfect fit on first append exceeding inlined size.
  395. // Subsequent growth will use amortized growth until we reach maximum flat
  396. // size.
  397. rep = CordRepFlat::New(inline_length + src.size());
  398. appended = std::min(src.size(), rep->flat()->Capacity() - inline_length);
  399. memcpy(rep->flat()->Data(), data_.as_chars(), inline_length);
  400. memcpy(rep->flat()->Data() + inline_length, src.data(), appended);
  401. rep->length = inline_length + appended;
  402. }
  403. src.remove_prefix(appended);
  404. if (src.empty()) {
  405. CommitTree(root, rep, scope, method);
  406. return;
  407. }
  408. // TODO(b/192061034): keep legacy 10% growth rate: consider other rates.
  409. rep = ForceBtree(rep);
  410. const size_t min_growth = std::max<size_t>(rep->length / 10, src.size());
  411. rep = CordRepBtree::Append(rep->btree(), src, min_growth - src.size());
  412. CommitTree(root, rep, scope, method);
  413. }
  414. inline CordRep* Cord::TakeRep() const& {
  415. return CordRep::Ref(contents_.tree());
  416. }
  417. inline CordRep* Cord::TakeRep() && {
  418. CordRep* rep = contents_.tree();
  419. contents_.clear();
  420. return rep;
  421. }
  422. template <typename C>
  423. inline void Cord::AppendImpl(C&& src) {
  424. auto constexpr method = CordzUpdateTracker::kAppendCord;
  425. contents_.MaybeRemoveEmptyCrcNode();
  426. if (src.empty()) return;
  427. if (empty()) {
  428. // Since destination is empty, we can avoid allocating a node,
  429. if (src.contents_.is_tree()) {
  430. // by taking the tree directly
  431. CordRep* rep =
  432. cord_internal::RemoveCrcNode(std::forward<C>(src).TakeRep());
  433. contents_.EmplaceTree(rep, method);
  434. } else {
  435. // or copying over inline data
  436. contents_.data_ = src.contents_.data_;
  437. }
  438. return;
  439. }
  440. // For short cords, it is faster to copy data if there is room in dst.
  441. const size_t src_size = src.contents_.size();
  442. if (src_size <= kMaxBytesToCopy) {
  443. CordRep* src_tree = src.contents_.tree();
  444. if (src_tree == nullptr) {
  445. // src has embedded data.
  446. contents_.AppendArray({src.contents_.data(), src_size}, method);
  447. return;
  448. }
  449. if (src_tree->IsFlat()) {
  450. // src tree just has one flat node.
  451. contents_.AppendArray({src_tree->flat()->Data(), src_size}, method);
  452. return;
  453. }
  454. if (&src == this) {
  455. // ChunkIterator below assumes that src is not modified during traversal.
  456. Append(Cord(src));
  457. return;
  458. }
  459. // TODO(mec): Should we only do this if "dst" has space?
  460. for (y_absl::string_view chunk : src.Chunks()) {
  461. Append(chunk);
  462. }
  463. return;
  464. }
  465. // Guaranteed to be a tree (kMaxBytesToCopy > kInlinedSize)
  466. CordRep* rep = cord_internal::RemoveCrcNode(std::forward<C>(src).TakeRep());
  467. contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord);
  468. }
  469. static CordRep::ExtractResult ExtractAppendBuffer(CordRep* rep,
  470. size_t min_capacity) {
  471. switch (rep->tag) {
  472. case cord_internal::BTREE:
  473. return CordRepBtree::ExtractAppendBuffer(rep->btree(), min_capacity);
  474. default:
  475. if (rep->IsFlat() && rep->refcount.IsOne() &&
  476. rep->flat()->Capacity() - rep->length >= min_capacity) {
  477. return {nullptr, rep};
  478. }
  479. return {rep, nullptr};
  480. }
  481. }
  482. static CordBuffer CreateAppendBuffer(InlineData& data, size_t block_size,
  483. size_t capacity) {
  484. // Watch out for overflow, people can ask for size_t::max().
  485. const size_t size = data.inline_size();
  486. const size_t max_capacity = std::numeric_limits<size_t>::max() - size;
  487. capacity = (std::min)(max_capacity, capacity) + size;
  488. CordBuffer buffer =
  489. block_size ? CordBuffer::CreateWithCustomLimit(block_size, capacity)
  490. : CordBuffer::CreateWithDefaultLimit(capacity);
  491. cord_internal::SmallMemmove(buffer.data(), data.as_chars(), size);
  492. buffer.SetLength(size);
  493. data = {};
  494. return buffer;
  495. }
  496. CordBuffer Cord::GetAppendBufferSlowPath(size_t block_size, size_t capacity,
  497. size_t min_capacity) {
  498. auto constexpr method = CordzUpdateTracker::kGetAppendBuffer;
  499. CordRep* tree = contents_.tree();
  500. if (tree != nullptr) {
  501. CordzUpdateScope scope(contents_.cordz_info(), method);
  502. CordRep::ExtractResult result = ExtractAppendBuffer(tree, min_capacity);
  503. if (result.extracted != nullptr) {
  504. contents_.SetTreeOrEmpty(result.tree, scope);
  505. return CordBuffer(result.extracted->flat());
  506. }
  507. return block_size ? CordBuffer::CreateWithCustomLimit(block_size, capacity)
  508. : CordBuffer::CreateWithDefaultLimit(capacity);
  509. }
  510. return CreateAppendBuffer(contents_.data_, block_size, capacity);
  511. }
  512. void Cord::Append(const Cord& src) {
  513. AppendImpl(src);
  514. }
  515. void Cord::Append(Cord&& src) {
  516. AppendImpl(std::move(src));
  517. }
  518. template <typename T, Cord::EnableIfString<T>>
  519. void Cord::Append(T&& src) {
  520. if (src.size() <= kMaxBytesToCopy) {
  521. Append(y_absl::string_view(src));
  522. } else {
  523. CordRep* rep = CordRepFromString(std::forward<T>(src));
  524. contents_.AppendTree(rep, CordzUpdateTracker::kAppendString);
  525. }
  526. }
  527. template void Cord::Append(TString&& src);
  528. void Cord::Prepend(const Cord& src) {
  529. contents_.MaybeRemoveEmptyCrcNode();
  530. if (src.empty()) return;
  531. CordRep* src_tree = src.contents_.tree();
  532. if (src_tree != nullptr) {
  533. CordRep::Ref(src_tree);
  534. contents_.PrependTree(cord_internal::RemoveCrcNode(src_tree),
  535. CordzUpdateTracker::kPrependCord);
  536. return;
  537. }
  538. // `src` cord is inlined.
  539. y_absl::string_view src_contents(src.contents_.data(), src.contents_.size());
  540. return Prepend(src_contents);
  541. }
  542. void Cord::PrependArray(y_absl::string_view src, MethodIdentifier method) {
  543. contents_.MaybeRemoveEmptyCrcNode();
  544. if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
  545. if (!contents_.is_tree()) {
  546. size_t cur_size = contents_.inline_size();
  547. if (cur_size + src.size() <= InlineRep::kMaxInline) {
  548. // Use embedded storage.
  549. InlineData data;
  550. data.set_inline_size(cur_size + src.size());
  551. memcpy(data.as_chars(), src.data(), src.size());
  552. memcpy(data.as_chars() + src.size(), contents_.data(), cur_size);
  553. contents_.data_ = data;
  554. return;
  555. }
  556. }
  557. CordRep* rep = NewTree(src.data(), src.size(), 0);
  558. contents_.PrependTree(rep, method);
  559. }
  560. void Cord::AppendPrecise(y_absl::string_view src, MethodIdentifier method) {
  561. assert(!src.empty());
  562. assert(src.size() <= cord_internal::kMaxFlatLength);
  563. if (contents_.remaining_inline_capacity() >= src.size()) {
  564. const size_t inline_length = contents_.inline_size();
  565. contents_.set_inline_size(inline_length + src.size());
  566. memcpy(contents_.data_.as_chars() + inline_length, src.data(), src.size());
  567. } else {
  568. contents_.AppendTree(CordRepFlat::Create(src), method);
  569. }
  570. }
  571. void Cord::PrependPrecise(y_absl::string_view src, MethodIdentifier method) {
  572. assert(!src.empty());
  573. assert(src.size() <= cord_internal::kMaxFlatLength);
  574. if (contents_.remaining_inline_capacity() >= src.size()) {
  575. const size_t cur_size = contents_.inline_size();
  576. InlineData data;
  577. data.set_inline_size(cur_size + src.size());
  578. memcpy(data.as_chars(), src.data(), src.size());
  579. memcpy(data.as_chars() + src.size(), contents_.data(), cur_size);
  580. contents_.data_ = data;
  581. } else {
  582. contents_.PrependTree(CordRepFlat::Create(src), method);
  583. }
  584. }
  585. template <typename T, Cord::EnableIfString<T>>
  586. inline void Cord::Prepend(T&& src) {
  587. if (src.size() <= kMaxBytesToCopy) {
  588. Prepend(y_absl::string_view(src));
  589. } else {
  590. CordRep* rep = CordRepFromString(std::forward<T>(src));
  591. contents_.PrependTree(rep, CordzUpdateTracker::kPrependString);
  592. }
  593. }
  594. template void Cord::Prepend(TString&& src);
  595. void Cord::RemovePrefix(size_t n) {
  596. Y_ABSL_INTERNAL_CHECK(n <= size(),
  597. y_absl::StrCat("Requested prefix size ", n,
  598. " exceeds Cord's size ", size()));
  599. contents_.MaybeRemoveEmptyCrcNode();
  600. CordRep* tree = contents_.tree();
  601. if (tree == nullptr) {
  602. contents_.remove_prefix(n);
  603. } else {
  604. auto constexpr method = CordzUpdateTracker::kRemovePrefix;
  605. CordzUpdateScope scope(contents_.cordz_info(), method);
  606. tree = cord_internal::RemoveCrcNode(tree);
  607. if (n >= tree->length) {
  608. CordRep::Unref(tree);
  609. tree = nullptr;
  610. } else if (tree->IsBtree()) {
  611. CordRep* old = tree;
  612. tree = tree->btree()->SubTree(n, tree->length - n);
  613. CordRep::Unref(old);
  614. } else if (tree->IsSubstring() && tree->refcount.IsOne()) {
  615. tree->substring()->start += n;
  616. tree->length -= n;
  617. } else {
  618. CordRep* rep = CordRepSubstring::Substring(tree, n, tree->length - n);
  619. CordRep::Unref(tree);
  620. tree = rep;
  621. }
  622. contents_.SetTreeOrEmpty(tree, scope);
  623. }
  624. }
  625. void Cord::RemoveSuffix(size_t n) {
  626. Y_ABSL_INTERNAL_CHECK(n <= size(),
  627. y_absl::StrCat("Requested suffix size ", n,
  628. " exceeds Cord's size ", size()));
  629. contents_.MaybeRemoveEmptyCrcNode();
  630. CordRep* tree = contents_.tree();
  631. if (tree == nullptr) {
  632. contents_.reduce_size(n);
  633. } else {
  634. auto constexpr method = CordzUpdateTracker::kRemoveSuffix;
  635. CordzUpdateScope scope(contents_.cordz_info(), method);
  636. tree = cord_internal::RemoveCrcNode(tree);
  637. if (n >= tree->length) {
  638. CordRep::Unref(tree);
  639. tree = nullptr;
  640. } else if (tree->IsBtree()) {
  641. tree = CordRepBtree::RemoveSuffix(tree->btree(), n);
  642. } else if (!tree->IsExternal() && tree->refcount.IsOne()) {
  643. assert(tree->IsFlat() || tree->IsSubstring());
  644. tree->length -= n;
  645. } else {
  646. CordRep* rep = CordRepSubstring::Substring(tree, 0, tree->length - n);
  647. CordRep::Unref(tree);
  648. tree = rep;
  649. }
  650. contents_.SetTreeOrEmpty(tree, scope);
  651. }
  652. }
  653. Cord Cord::Subcord(size_t pos, size_t new_size) const {
  654. Cord sub_cord;
  655. size_t length = size();
  656. if (pos > length) pos = length;
  657. if (new_size > length - pos) new_size = length - pos;
  658. if (new_size == 0) return sub_cord;
  659. CordRep* tree = contents_.tree();
  660. if (tree == nullptr) {
  661. sub_cord.contents_.set_data(contents_.data() + pos, new_size);
  662. return sub_cord;
  663. }
  664. if (new_size <= InlineRep::kMaxInline) {
  665. sub_cord.contents_.set_inline_size(new_size);
  666. char* dest = sub_cord.contents_.data_.as_chars();
  667. Cord::ChunkIterator it = chunk_begin();
  668. it.AdvanceBytes(pos);
  669. size_t remaining_size = new_size;
  670. while (remaining_size > it->size()) {
  671. cord_internal::SmallMemmove(dest, it->data(), it->size());
  672. remaining_size -= it->size();
  673. dest += it->size();
  674. ++it;
  675. }
  676. cord_internal::SmallMemmove(dest, it->data(), remaining_size);
  677. return sub_cord;
  678. }
  679. tree = cord_internal::SkipCrcNode(tree);
  680. if (tree->IsBtree()) {
  681. tree = tree->btree()->SubTree(pos, new_size);
  682. } else {
  683. tree = CordRepSubstring::Substring(tree, pos, new_size);
  684. }
  685. sub_cord.contents_.EmplaceTree(tree, contents_.data_,
  686. CordzUpdateTracker::kSubCord);
  687. return sub_cord;
  688. }
  689. // --------------------------------------------------------------------
  690. // Comparators
  691. namespace {
  692. int ClampResult(int memcmp_res) {
  693. return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0);
  694. }
  695. int CompareChunks(y_absl::string_view* lhs, y_absl::string_view* rhs,
  696. size_t* size_to_compare) {
  697. size_t compared_size = std::min(lhs->size(), rhs->size());
  698. assert(*size_to_compare >= compared_size);
  699. *size_to_compare -= compared_size;
  700. int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size);
  701. if (memcmp_res != 0) return memcmp_res;
  702. lhs->remove_prefix(compared_size);
  703. rhs->remove_prefix(compared_size);
  704. return 0;
  705. }
  706. // This overload set computes comparison results from memcmp result. This
  707. // interface is used inside GenericCompare below. Different implementations
  708. // are specialized for int and bool. For int we clamp result to {-1, 0, 1}
  709. // set. For bool we just interested in "value == 0".
  710. template <typename ResultType>
  711. ResultType ComputeCompareResult(int memcmp_res) {
  712. return ClampResult(memcmp_res);
  713. }
  714. template <>
  715. bool ComputeCompareResult<bool>(int memcmp_res) {
  716. return memcmp_res == 0;
  717. }
  718. } // namespace
  719. // Helper routine. Locates the first flat or external chunk of the Cord without
  720. // initializing the iterator, and returns a string_view referencing the data.
  721. inline y_absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
  722. if (!is_tree()) {
  723. return y_absl::string_view(data_.as_chars(), data_.inline_size());
  724. }
  725. CordRep* node = cord_internal::SkipCrcNode(tree());
  726. if (node->IsFlat()) {
  727. return y_absl::string_view(node->flat()->Data(), node->length);
  728. }
  729. if (node->IsExternal()) {
  730. return y_absl::string_view(node->external()->base, node->length);
  731. }
  732. if (node->IsBtree()) {
  733. CordRepBtree* tree = node->btree();
  734. int height = tree->height();
  735. while (--height >= 0) {
  736. tree = tree->Edge(CordRepBtree::kFront)->btree();
  737. }
  738. return tree->Data(tree->begin());
  739. }
  740. // Get the child node if we encounter a SUBSTRING.
  741. size_t offset = 0;
  742. size_t length = node->length;
  743. assert(length != 0);
  744. if (node->IsSubstring()) {
  745. offset = node->substring()->start;
  746. node = node->substring()->child;
  747. }
  748. if (node->IsFlat()) {
  749. return y_absl::string_view(node->flat()->Data() + offset, length);
  750. }
  751. assert(node->IsExternal() && "Expect FLAT or EXTERNAL node here");
  752. return y_absl::string_view(node->external()->base + offset, length);
  753. }
  754. void Cord::SetCrcCordState(crc_internal::CrcCordState state) {
  755. auto constexpr method = CordzUpdateTracker::kSetExpectedChecksum;
  756. if (empty()) {
  757. contents_.MaybeRemoveEmptyCrcNode();
  758. CordRep* rep = CordRepCrc::New(nullptr, std::move(state));
  759. contents_.EmplaceTree(rep, method);
  760. } else if (!contents_.is_tree()) {
  761. CordRep* rep = contents_.MakeFlatWithExtraCapacity(0);
  762. rep = CordRepCrc::New(rep, std::move(state));
  763. contents_.EmplaceTree(rep, method);
  764. } else {
  765. const CordzUpdateScope scope(contents_.data_.cordz_info(), method);
  766. CordRep* rep = CordRepCrc::New(contents_.data_.as_tree(), std::move(state));
  767. contents_.SetTree(rep, scope);
  768. }
  769. }
  770. void Cord::SetExpectedChecksum(uint32_t crc) {
  771. // Construct a CrcCordState with a single chunk.
  772. crc_internal::CrcCordState state;
  773. state.mutable_rep()->prefix_crc.push_back(
  774. crc_internal::CrcCordState::PrefixCrc(size(), y_absl::crc32c_t{crc}));
  775. SetCrcCordState(std::move(state));
  776. }
  777. const crc_internal::CrcCordState* Cord::MaybeGetCrcCordState() const {
  778. if (!contents_.is_tree() || !contents_.tree()->IsCrc()) {
  779. return nullptr;
  780. }
  781. return &contents_.tree()->crc()->crc_cord_state;
  782. }
  783. y_absl::optional<uint32_t> Cord::ExpectedChecksum() const {
  784. if (!contents_.is_tree() || !contents_.tree()->IsCrc()) {
  785. return y_absl::nullopt;
  786. }
  787. return static_cast<uint32_t>(
  788. contents_.tree()->crc()->crc_cord_state.Checksum());
  789. }
  790. inline int Cord::CompareSlowPath(y_absl::string_view rhs, size_t compared_size,
  791. size_t size_to_compare) const {
  792. auto advance = [](Cord::ChunkIterator* it, y_absl::string_view* chunk) {
  793. if (!chunk->empty()) return true;
  794. ++*it;
  795. if (it->bytes_remaining_ == 0) return false;
  796. *chunk = **it;
  797. return true;
  798. };
  799. Cord::ChunkIterator lhs_it = chunk_begin();
  800. // compared_size is inside first chunk.
  801. y_absl::string_view lhs_chunk =
  802. (lhs_it.bytes_remaining_ != 0) ? *lhs_it : y_absl::string_view();
  803. assert(compared_size <= lhs_chunk.size());
  804. assert(compared_size <= rhs.size());
  805. lhs_chunk.remove_prefix(compared_size);
  806. rhs.remove_prefix(compared_size);
  807. size_to_compare -= compared_size; // skip already compared size.
  808. while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) {
  809. int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare);
  810. if (comparison_result != 0) return comparison_result;
  811. if (size_to_compare == 0) return 0;
  812. }
  813. return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty());
  814. }
  815. inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
  816. size_t size_to_compare) const {
  817. auto advance = [](Cord::ChunkIterator* it, y_absl::string_view* chunk) {
  818. if (!chunk->empty()) return true;
  819. ++*it;
  820. if (it->bytes_remaining_ == 0) return false;
  821. *chunk = **it;
  822. return true;
  823. };
  824. Cord::ChunkIterator lhs_it = chunk_begin();
  825. Cord::ChunkIterator rhs_it = rhs.chunk_begin();
  826. // compared_size is inside both first chunks.
  827. y_absl::string_view lhs_chunk =
  828. (lhs_it.bytes_remaining_ != 0) ? *lhs_it : y_absl::string_view();
  829. y_absl::string_view rhs_chunk =
  830. (rhs_it.bytes_remaining_ != 0) ? *rhs_it : y_absl::string_view();
  831. assert(compared_size <= lhs_chunk.size());
  832. assert(compared_size <= rhs_chunk.size());
  833. lhs_chunk.remove_prefix(compared_size);
  834. rhs_chunk.remove_prefix(compared_size);
  835. size_to_compare -= compared_size; // skip already compared size.
  836. while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) {
  837. int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare);
  838. if (memcmp_res != 0) return memcmp_res;
  839. if (size_to_compare == 0) return 0;
  840. }
  841. return static_cast<int>(rhs_chunk.empty()) -
  842. static_cast<int>(lhs_chunk.empty());
  843. }
  844. inline y_absl::string_view Cord::GetFirstChunk(const Cord& c) {
  845. if (c.empty()) return {};
  846. return c.contents_.FindFlatStartPiece();
  847. }
  848. inline y_absl::string_view Cord::GetFirstChunk(y_absl::string_view sv) {
  849. return sv;
  850. }
  851. // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed
  852. // that 'size_to_compare' is greater that size of smallest of first chunks.
  853. template <typename ResultType, typename RHS>
  854. ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
  855. size_t size_to_compare) {
  856. y_absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs);
  857. y_absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs);
  858. size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size());
  859. assert(size_to_compare >= compared_size);
  860. int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size);
  861. if (compared_size == size_to_compare || memcmp_res != 0) {
  862. return ComputeCompareResult<ResultType>(memcmp_res);
  863. }
  864. return ComputeCompareResult<ResultType>(
  865. lhs.CompareSlowPath(rhs, compared_size, size_to_compare));
  866. }
  867. bool Cord::EqualsImpl(y_absl::string_view rhs, size_t size_to_compare) const {
  868. return GenericCompare<bool>(*this, rhs, size_to_compare);
  869. }
  870. bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const {
  871. return GenericCompare<bool>(*this, rhs, size_to_compare);
  872. }
  873. template <typename RHS>
  874. inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) {
  875. size_t lhs_size = lhs.size();
  876. size_t rhs_size = rhs.size();
  877. if (lhs_size == rhs_size) {
  878. return GenericCompare<int>(lhs, rhs, lhs_size);
  879. }
  880. if (lhs_size < rhs_size) {
  881. auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size);
  882. return data_comp_res == 0 ? -1 : data_comp_res;
  883. }
  884. auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size);
  885. return data_comp_res == 0 ? +1 : data_comp_res;
  886. }
  887. int Cord::Compare(y_absl::string_view rhs) const {
  888. return SharedCompareImpl(*this, rhs);
  889. }
  890. int Cord::CompareImpl(const Cord& rhs) const {
  891. return SharedCompareImpl(*this, rhs);
  892. }
  893. bool Cord::EndsWith(y_absl::string_view rhs) const {
  894. size_t my_size = size();
  895. size_t rhs_size = rhs.size();
  896. if (my_size < rhs_size) return false;
  897. Cord tmp(*this);
  898. tmp.RemovePrefix(my_size - rhs_size);
  899. return tmp.EqualsImpl(rhs, rhs_size);
  900. }
  901. bool Cord::EndsWith(const Cord& rhs) const {
  902. size_t my_size = size();
  903. size_t rhs_size = rhs.size();
  904. if (my_size < rhs_size) return false;
  905. Cord tmp(*this);
  906. tmp.RemovePrefix(my_size - rhs_size);
  907. return tmp.EqualsImpl(rhs, rhs_size);
  908. }
  909. // --------------------------------------------------------------------
  910. // Misc.
  911. Cord::operator TString() const {
  912. TString s;
  913. y_absl::CopyCordToString(*this, &s);
  914. return s;
  915. }
  916. void CopyCordToString(const Cord& src, TString* dst) {
  917. if (!src.contents_.is_tree()) {
  918. src.contents_.CopyTo(dst);
  919. } else {
  920. y_absl::strings_internal::STLStringResizeUninitialized(dst, src.size());
  921. src.CopyToArraySlowPath(&(*dst)[0]);
  922. }
  923. }
  924. void Cord::CopyToArraySlowPath(char* dst) const {
  925. assert(contents_.is_tree());
  926. y_absl::string_view fragment;
  927. if (GetFlatAux(contents_.tree(), &fragment)) {
  928. memcpy(dst, fragment.data(), fragment.size());
  929. return;
  930. }
  931. for (y_absl::string_view chunk : Chunks()) {
  932. memcpy(dst, chunk.data(), chunk.size());
  933. dst += chunk.size();
  934. }
  935. }
  936. Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
  937. Y_ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
  938. "Attempted to iterate past `end()`");
  939. Cord subcord;
  940. auto constexpr method = CordzUpdateTracker::kCordReader;
  941. if (n <= InlineRep::kMaxInline) {
  942. // Range to read fits in inline data. Flatten it.
  943. char* data = subcord.contents_.set_data(n);
  944. while (n > current_chunk_.size()) {
  945. memcpy(data, current_chunk_.data(), current_chunk_.size());
  946. data += current_chunk_.size();
  947. n -= current_chunk_.size();
  948. ++*this;
  949. }
  950. memcpy(data, current_chunk_.data(), n);
  951. if (n < current_chunk_.size()) {
  952. RemoveChunkPrefix(n);
  953. } else if (n > 0) {
  954. ++*this;
  955. }
  956. return subcord;
  957. }
  958. if (btree_reader_) {
  959. size_t chunk_size = current_chunk_.size();
  960. if (n <= chunk_size && n <= kMaxBytesToCopy) {
  961. subcord = Cord(current_chunk_.substr(0, n), method);
  962. if (n < chunk_size) {
  963. current_chunk_.remove_prefix(n);
  964. } else {
  965. current_chunk_ = btree_reader_.Next();
  966. }
  967. } else {
  968. CordRep* rep;
  969. current_chunk_ = btree_reader_.Read(n, chunk_size, rep);
  970. subcord.contents_.EmplaceTree(rep, method);
  971. }
  972. bytes_remaining_ -= n;
  973. return subcord;
  974. }
  975. // Short circuit if reading the entire data edge.
  976. assert(current_leaf_ != nullptr);
  977. if (n == current_leaf_->length) {
  978. bytes_remaining_ = 0;
  979. current_chunk_ = {};
  980. CordRep* tree = CordRep::Ref(current_leaf_);
  981. subcord.contents_.EmplaceTree(VerifyTree(tree), method);
  982. return subcord;
  983. }
  984. // From this point on, we need a partial substring node.
  985. // Get pointer to the underlying flat or external data payload and
  986. // compute data pointer and offset into current flat or external.
  987. CordRep* payload = current_leaf_->IsSubstring()
  988. ? current_leaf_->substring()->child
  989. : current_leaf_;
  990. const char* data = payload->IsExternal() ? payload->external()->base
  991. : payload->flat()->Data();
  992. const size_t offset = static_cast<size_t>(current_chunk_.data() - data);
  993. auto* tree = CordRepSubstring::Substring(payload, offset, n);
  994. subcord.contents_.EmplaceTree(VerifyTree(tree), method);
  995. bytes_remaining_ -= n;
  996. current_chunk_.remove_prefix(n);
  997. return subcord;
  998. }
  999. char Cord::operator[](size_t i) const {
  1000. Y_ABSL_HARDENING_ASSERT(i < size());
  1001. size_t offset = i;
  1002. const CordRep* rep = contents_.tree();
  1003. if (rep == nullptr) {
  1004. return contents_.data()[i];
  1005. }
  1006. rep = cord_internal::SkipCrcNode(rep);
  1007. while (true) {
  1008. assert(rep != nullptr);
  1009. assert(offset < rep->length);
  1010. if (rep->IsFlat()) {
  1011. // Get the "i"th character directly from the flat array.
  1012. return rep->flat()->Data()[offset];
  1013. } else if (rep->IsBtree()) {
  1014. return rep->btree()->GetCharacter(offset);
  1015. } else if (rep->IsExternal()) {
  1016. // Get the "i"th character from the external array.
  1017. return rep->external()->base[offset];
  1018. } else {
  1019. // This must be a substring a node, so bypass it to get to the child.
  1020. assert(rep->IsSubstring());
  1021. offset += rep->substring()->start;
  1022. rep = rep->substring()->child;
  1023. }
  1024. }
  1025. }
  1026. y_absl::string_view Cord::FlattenSlowPath() {
  1027. assert(contents_.is_tree());
  1028. size_t total_size = size();
  1029. CordRep* new_rep;
  1030. char* new_buffer;
  1031. // Try to put the contents into a new flat rep. If they won't fit in the
  1032. // biggest possible flat node, use an external rep instead.
  1033. if (total_size <= kMaxFlatLength) {
  1034. new_rep = CordRepFlat::New(total_size);
  1035. new_rep->length = total_size;
  1036. new_buffer = new_rep->flat()->Data();
  1037. CopyToArraySlowPath(new_buffer);
  1038. } else {
  1039. new_buffer = std::allocator<char>().allocate(total_size);
  1040. CopyToArraySlowPath(new_buffer);
  1041. new_rep = y_absl::cord_internal::NewExternalRep(
  1042. y_absl::string_view(new_buffer, total_size), [](y_absl::string_view s) {
  1043. std::allocator<char>().deallocate(const_cast<char*>(s.data()),
  1044. s.size());
  1045. });
  1046. }
  1047. CordzUpdateScope scope(contents_.cordz_info(), CordzUpdateTracker::kFlatten);
  1048. CordRep::Unref(contents_.as_tree());
  1049. contents_.SetTree(new_rep, scope);
  1050. return y_absl::string_view(new_buffer, total_size);
  1051. }
  1052. /* static */ bool Cord::GetFlatAux(CordRep* rep, y_absl::string_view* fragment) {
  1053. assert(rep != nullptr);
  1054. if (rep->length == 0) {
  1055. *fragment = y_absl::string_view();
  1056. return true;
  1057. }
  1058. rep = cord_internal::SkipCrcNode(rep);
  1059. if (rep->IsFlat()) {
  1060. *fragment = y_absl::string_view(rep->flat()->Data(), rep->length);
  1061. return true;
  1062. } else if (rep->IsExternal()) {
  1063. *fragment = y_absl::string_view(rep->external()->base, rep->length);
  1064. return true;
  1065. } else if (rep->IsBtree()) {
  1066. return rep->btree()->IsFlat(fragment);
  1067. } else if (rep->IsSubstring()) {
  1068. CordRep* child = rep->substring()->child;
  1069. if (child->IsFlat()) {
  1070. *fragment = y_absl::string_view(
  1071. child->flat()->Data() + rep->substring()->start, rep->length);
  1072. return true;
  1073. } else if (child->IsExternal()) {
  1074. *fragment = y_absl::string_view(
  1075. child->external()->base + rep->substring()->start, rep->length);
  1076. return true;
  1077. } else if (child->IsBtree()) {
  1078. return child->btree()->IsFlat(rep->substring()->start, rep->length,
  1079. fragment);
  1080. }
  1081. }
  1082. return false;
  1083. }
  1084. /* static */ void Cord::ForEachChunkAux(
  1085. y_absl::cord_internal::CordRep* rep,
  1086. y_absl::FunctionRef<void(y_absl::string_view)> callback) {
  1087. assert(rep != nullptr);
  1088. if (rep->length == 0) return;
  1089. rep = cord_internal::SkipCrcNode(rep);
  1090. if (rep->IsBtree()) {
  1091. ChunkIterator it(rep), end;
  1092. while (it != end) {
  1093. callback(*it);
  1094. ++it;
  1095. }
  1096. return;
  1097. }
  1098. // This is a leaf node, so invoke our callback.
  1099. y_absl::cord_internal::CordRep* current_node = cord_internal::SkipCrcNode(rep);
  1100. y_absl::string_view chunk;
  1101. bool success = GetFlatAux(current_node, &chunk);
  1102. assert(success);
  1103. if (success) {
  1104. callback(chunk);
  1105. }
  1106. }
  1107. static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
  1108. int indent) {
  1109. const int kIndentStep = 1;
  1110. y_absl::InlinedVector<CordRep*, kInlinedVectorSize> stack;
  1111. y_absl::InlinedVector<int, kInlinedVectorSize> indents;
  1112. for (;;) {
  1113. *os << std::setw(3) << rep->refcount.Get();
  1114. *os << " " << std::setw(7) << rep->length;
  1115. *os << " [";
  1116. if (include_data) *os << static_cast<void*>(rep);
  1117. *os << "]";
  1118. *os << " " << std::setw(indent) << "";
  1119. bool leaf = false;
  1120. if (rep == nullptr) {
  1121. *os << "NULL\n";
  1122. leaf = true;
  1123. } else if (rep->IsCrc()) {
  1124. *os << "CRC crc=" << rep->crc()->crc_cord_state.Checksum() << "\n";
  1125. indent += kIndentStep;
  1126. rep = rep->crc()->child;
  1127. } else if (rep->IsSubstring()) {
  1128. *os << "SUBSTRING @ " << rep->substring()->start << "\n";
  1129. indent += kIndentStep;
  1130. rep = rep->substring()->child;
  1131. } else { // Leaf or ring
  1132. leaf = true;
  1133. if (rep->IsExternal()) {
  1134. *os << "EXTERNAL [";
  1135. if (include_data)
  1136. *os << y_absl::CEscape(TString(rep->external()->base, rep->length));
  1137. *os << "]\n";
  1138. } else if (rep->IsFlat()) {
  1139. *os << "FLAT cap=" << rep->flat()->Capacity() << " [";
  1140. if (include_data)
  1141. *os << y_absl::CEscape(TString(rep->flat()->Data(), rep->length));
  1142. *os << "]\n";
  1143. } else {
  1144. CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os);
  1145. }
  1146. }
  1147. if (leaf) {
  1148. if (stack.empty()) break;
  1149. rep = stack.back();
  1150. stack.pop_back();
  1151. indent = indents.back();
  1152. indents.pop_back();
  1153. }
  1154. }
  1155. Y_ABSL_INTERNAL_CHECK(indents.empty(), "");
  1156. }
  1157. static TString ReportError(CordRep* root, CordRep* node) {
  1158. std::ostringstream buf;
  1159. buf << "Error at node " << node << " in:";
  1160. DumpNode(root, true, &buf);
  1161. return TString(buf.str());
  1162. }
  1163. static bool VerifyNode(CordRep* root, CordRep* start_node,
  1164. bool /* full_validation */) {
  1165. y_absl::InlinedVector<CordRep*, 2> worklist;
  1166. worklist.push_back(start_node);
  1167. do {
  1168. CordRep* node = worklist.back();
  1169. worklist.pop_back();
  1170. Y_ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node));
  1171. if (node != root) {
  1172. Y_ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node));
  1173. Y_ABSL_INTERNAL_CHECK(!node->IsCrc(), ReportError(root, node));
  1174. }
  1175. if (node->IsFlat()) {
  1176. Y_ABSL_INTERNAL_CHECK(node->length <= node->flat()->Capacity(),
  1177. ReportError(root, node));
  1178. } else if (node->IsExternal()) {
  1179. Y_ABSL_INTERNAL_CHECK(node->external()->base != nullptr,
  1180. ReportError(root, node));
  1181. } else if (node->IsSubstring()) {
  1182. Y_ABSL_INTERNAL_CHECK(
  1183. node->substring()->start < node->substring()->child->length,
  1184. ReportError(root, node));
  1185. Y_ABSL_INTERNAL_CHECK(node->substring()->start + node->length <=
  1186. node->substring()->child->length,
  1187. ReportError(root, node));
  1188. } else if (node->IsCrc()) {
  1189. Y_ABSL_INTERNAL_CHECK(
  1190. node->crc()->child != nullptr || node->crc()->length == 0,
  1191. ReportError(root, node));
  1192. if (node->crc()->child != nullptr) {
  1193. Y_ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length,
  1194. ReportError(root, node));
  1195. worklist.push_back(node->crc()->child);
  1196. }
  1197. }
  1198. } while (!worklist.empty());
  1199. return true;
  1200. }
  1201. std::ostream& operator<<(std::ostream& out, const Cord& cord) {
  1202. for (y_absl::string_view chunk : cord.Chunks()) {
  1203. out.write(chunk.data(), static_cast<std::streamsize>(chunk.size()));
  1204. }
  1205. return out;
  1206. }
  1207. namespace strings_internal {
  1208. size_t CordTestAccess::FlatOverhead() { return cord_internal::kFlatOverhead; }
  1209. size_t CordTestAccess::MaxFlatLength() { return cord_internal::kMaxFlatLength; }
  1210. size_t CordTestAccess::FlatTagToLength(uint8_t tag) {
  1211. return cord_internal::TagToLength(tag);
  1212. }
  1213. uint8_t CordTestAccess::LengthToTag(size_t s) {
  1214. Y_ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, y_absl::StrCat("Invalid length ", s));
  1215. return cord_internal::AllocatedSizeToTag(s + cord_internal::kFlatOverhead);
  1216. }
  1217. size_t CordTestAccess::SizeofCordRepExternal() {
  1218. return sizeof(CordRepExternal);
  1219. }
  1220. size_t CordTestAccess::SizeofCordRepSubstring() {
  1221. return sizeof(CordRepSubstring);
  1222. }
  1223. } // namespace strings_internal
  1224. Y_ABSL_NAMESPACE_END
  1225. } // namespace y_absl