string.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287
  1. #pragma once
  2. #include <cstddef>
  3. #include <cstring>
  4. #include <stlfwd>
  5. #include <stdexcept>
  6. #include <string>
  7. #include <string_view>
  8. #include <util/system/yassert.h>
  9. #include <util/system/atomic.h>
  10. #include "ptr.h"
  11. #include "utility.h"
  12. #include "bitops.h"
  13. #include "explicit_type.h"
  14. #include "reserve.h"
  15. #include "singleton.h"
  16. #include "strbase.h"
  17. #include "strbuf.h"
  18. #include "string_hash.h"
  19. #if defined(address_sanitizer_enabled) || defined(thread_sanitizer_enabled)
  20. #include "hide_ptr.h"
  21. #endif
  22. template <class TCharType, class TCharTraits, class TAllocator>
  23. void ResizeUninitialized(std::basic_string<TCharType, TCharTraits, TAllocator>& s, size_t len) {
  24. #if defined(_YNDX_LIBCXX_ENABLE_STRING_RESIZE_UNINITIALIZED)
  25. s.resize_uninitialized(len);
  26. #else
  27. s.resize(len);
  28. #endif
  29. }
  30. #define Y_NOEXCEPT
  31. #ifndef TSTRING_IS_STD_STRING
  32. template <class T>
  33. class TStringPtrOps {
  34. public:
  35. static inline void Ref(T* t) noexcept {
  36. if (t != T::NullStr()) {
  37. t->Ref();
  38. }
  39. }
  40. static inline void UnRef(T* t) noexcept {
  41. if (t != T::NullStr()) {
  42. t->UnRef();
  43. }
  44. }
  45. static inline long RefCount(const T* t) noexcept {
  46. if (t == T::NullStr()) {
  47. return -1;
  48. }
  49. return t->RefCount();
  50. }
  51. };
  52. alignas(32) extern const char NULL_STRING_REPR[128];
  53. struct TRefCountHolder {
  54. TAtomicCounter C = 1;
  55. };
  56. template <class B>
  57. struct TStdString: public TRefCountHolder, public B {
  58. template <typename... Args>
  59. inline TStdString(Args&&... args)
  60. : B(std::forward<Args>(args)...)
  61. {
  62. }
  63. inline bool IsNull() const noexcept {
  64. return this == NullStr();
  65. }
  66. static TStdString* NullStr() noexcept {
  67. #ifdef _LIBCPP_VERSION
  68. return (TStdString*)NULL_STRING_REPR;
  69. #else
  70. return Singleton<TStdString>();
  71. #endif
  72. }
  73. private:
  74. friend TStringPtrOps<TStdString>;
  75. inline void Ref() noexcept {
  76. C.Inc();
  77. }
  78. inline void UnRef() noexcept {
  79. if (C.Val() == 1 || C.Dec() == 0) {
  80. delete this;
  81. }
  82. }
  83. inline long RefCount() const noexcept {
  84. return C.Val();
  85. }
  86. };
  87. template <class TStringType>
  88. class TBasicCharRef {
  89. public:
  90. using TChar = typename TStringType::TChar;
  91. TBasicCharRef(TStringType& s, size_t pos)
  92. : S_(s)
  93. , Pos_(pos)
  94. {
  95. }
  96. operator TChar() const {
  97. return S_.at(Pos_);
  98. }
  99. TChar* operator&() {
  100. return S_.begin() + Pos_;
  101. }
  102. const TChar* operator&() const {
  103. return S_.cbegin() + Pos_;
  104. }
  105. TBasicCharRef& operator=(TChar c) {
  106. Y_ASSERT(Pos_ < S_.size() || (Pos_ == S_.size() && !c));
  107. S_.Detach()[Pos_] = c;
  108. return *this;
  109. }
  110. TBasicCharRef& operator=(const TBasicCharRef& other) {
  111. return this->operator=(static_cast<TChar>(other));
  112. }
  113. /*
  114. * WARN:
  115. * Though references are copyable types according to the standard,
  116. * the behavior of this explicit default specification is different from the one
  117. * implemented by the assignment operator above.
  118. *
  119. * An attempt to explicitly delete it will break valid invocations like
  120. * auto c = flag ? s[i] : s[j];
  121. */
  122. TBasicCharRef(const TBasicCharRef&) = default;
  123. private:
  124. TStringType& S_;
  125. size_t Pos_;
  126. };
  127. #endif
  128. template <typename TCharType, typename TTraits>
  129. class TBasicString: public TStringBase<TBasicString<TCharType, TTraits>, TCharType, TTraits> {
  130. public:
  131. // TODO: Move to private section
  132. using TBase = TStringBase<TBasicString, TCharType, TTraits>;
  133. using TStringType = std::basic_string<TCharType, TTraits>;
  134. #ifdef TSTRING_IS_STD_STRING
  135. using TStorage = TStringType;
  136. using reference = typename TStorage::reference;
  137. #else
  138. using TStdStr = TStdString<TStringType>;
  139. using TStorage = TIntrusivePtr<TStdStr, TStringPtrOps<TStdStr>>;
  140. using reference = TBasicCharRef<TBasicString>;
  141. #endif
  142. using char_type = TCharType; // TODO: DROP
  143. using value_type = TCharType;
  144. using traits_type = TTraits;
  145. using iterator = TCharType*;
  146. using reverse_iterator = typename TBase::template TReverseIteratorBase<iterator>;
  147. using typename TBase::const_iterator;
  148. using typename TBase::const_reference;
  149. using typename TBase::const_reverse_iterator;
  150. struct TUninitialized {
  151. explicit TUninitialized(size_t size)
  152. : Size(size)
  153. {
  154. }
  155. size_t Size;
  156. };
  157. static size_t max_size() noexcept {
  158. static size_t res = TStringType().max_size();
  159. return res;
  160. }
  161. protected:
  162. #ifdef TSTRING_IS_STD_STRING
  163. TStorage Storage_;
  164. #else
  165. TStorage S_;
  166. template <typename... A>
  167. static TStorage Construct(A&&... a) {
  168. return {new TStdStr(std::forward<A>(a)...), typename TStorage::TNoIncrement()};
  169. }
  170. static TStorage Construct() noexcept {
  171. return TStdStr::NullStr();
  172. }
  173. TStdStr& StdStr() noexcept {
  174. return *S_;
  175. }
  176. const TStdStr& StdStr() const noexcept {
  177. return *S_;
  178. }
  179. /**
  180. * Makes a distinct copy of this string. `IsDetached()` is always true after this call.
  181. *
  182. * @throw std::length_error
  183. */
  184. void Clone() {
  185. Construct(StdStr()).Swap(S_);
  186. }
  187. size_t RefCount() const noexcept {
  188. return S_.RefCount();
  189. }
  190. #endif
  191. public:
  192. inline const TStringType& ConstRef() const {
  193. #ifdef TSTRING_IS_STD_STRING
  194. return Storage_;
  195. #else
  196. return StdStr();
  197. #endif
  198. }
  199. inline TStringType& MutRef() {
  200. #ifdef TSTRING_IS_STD_STRING
  201. return Storage_;
  202. #else
  203. Detach();
  204. return StdStr();
  205. #endif
  206. }
  207. inline const_reference operator[](size_t pos) const noexcept {
  208. Y_ASSERT(pos <= length());
  209. return this->data()[pos];
  210. }
  211. inline reference operator[](size_t pos) noexcept {
  212. Y_ASSERT(pos <= length());
  213. #ifdef TSTRING_IS_STD_STRING
  214. return Storage_[pos];
  215. #else
  216. return reference(*this, pos);
  217. #endif
  218. }
  219. using TBase::back;
  220. inline reference back() noexcept {
  221. Y_ASSERT(!this->empty());
  222. #ifdef TSTRING_IS_STD_STRING
  223. return Storage_.back();
  224. #else
  225. if (Y_UNLIKELY(this->empty())) {
  226. return reference(*this, 0);
  227. }
  228. return reference(*this, length() - 1);
  229. #endif
  230. }
  231. using TBase::front;
  232. inline reference front() noexcept {
  233. Y_ASSERT(!this->empty());
  234. #ifdef TSTRING_IS_STD_STRING
  235. return Storage_.front();
  236. #else
  237. return reference(*this, 0);
  238. #endif
  239. }
  240. inline size_t length() const noexcept {
  241. return ConstRef().length();
  242. }
  243. inline const TCharType* data() const noexcept {
  244. return ConstRef().data();
  245. }
  246. inline const TCharType* c_str() const noexcept {
  247. return ConstRef().c_str();
  248. }
  249. // ~~~ STL compatible method to obtain data pointer ~~~
  250. iterator begin() {
  251. return &*MutRef().begin();
  252. }
  253. iterator vend() {
  254. return &*MutRef().end();
  255. }
  256. reverse_iterator rbegin() {
  257. return reverse_iterator(vend());
  258. }
  259. reverse_iterator rend() {
  260. return reverse_iterator(begin());
  261. }
  262. using TBase::begin; //!< const_iterator TStringBase::begin() const
  263. using TBase::cbegin; //!< const_iterator TStringBase::cbegin() const
  264. using TBase::cend; //!< const_iterator TStringBase::cend() const
  265. using TBase::crbegin; //!< const_reverse_iterator TStringBase::crbegin() const
  266. using TBase::crend; //!< const_reverse_iterator TStringBase::crend() const
  267. using TBase::end; //!< const_iterator TStringBase::end() const
  268. using TBase::rbegin; //!< const_reverse_iterator TStringBase::rbegin() const
  269. using TBase::rend; //!< const_reverse_iterator TStringBase::rend() const
  270. inline size_t capacity() const noexcept {
  271. #ifdef TSTRING_IS_STD_STRING
  272. return Storage_.capacity();
  273. #else
  274. if (S_->IsNull()) {
  275. return 0;
  276. }
  277. return S_->capacity();
  278. #endif
  279. }
  280. TCharType* Detach() {
  281. #ifdef TSTRING_IS_STD_STRING
  282. return Storage_.data();
  283. #else
  284. if (Y_UNLIKELY(!IsDetached())) {
  285. Clone();
  286. }
  287. return (TCharType*)S_->data();
  288. #endif
  289. }
  290. bool IsDetached() const {
  291. #ifdef TSTRING_IS_STD_STRING
  292. return true;
  293. #else
  294. return 1 == RefCount();
  295. #endif
  296. }
  297. // ~~~ Size and capacity ~~~
  298. TBasicString& resize(size_t n, TCharType c = ' ') { // remove or append
  299. MutRef().resize(n, c);
  300. return *this;
  301. }
  302. // ~~~ Constructor ~~~ : FAMILY0(,TBasicString)
  303. TBasicString() noexcept
  304. #ifndef TSTRING_IS_STD_STRING
  305. : S_(Construct())
  306. #endif
  307. {
  308. }
  309. inline explicit TBasicString(::NDetail::TReserveTag rt)
  310. #ifndef TSTRING_IS_STD_STRING
  311. : S_(Construct())
  312. #endif
  313. {
  314. reserve(rt.Capacity);
  315. }
  316. inline TBasicString(const TBasicString& s)
  317. #ifdef TSTRING_IS_STD_STRING
  318. : Storage_(s.Storage_)
  319. #else
  320. : S_(s.S_)
  321. #endif
  322. {
  323. }
  324. inline TBasicString(TBasicString&& s) noexcept
  325. #ifdef TSTRING_IS_STD_STRING
  326. : Storage_(std::move(s.Storage_))
  327. #else
  328. : S_(Construct())
  329. #endif
  330. {
  331. #ifdef TSTRING_IS_STD_STRING
  332. #else
  333. s.swap(*this);
  334. #endif
  335. }
  336. template <typename T, typename A>
  337. explicit inline TBasicString(const std::basic_string<TCharType, T, A>& s)
  338. : TBasicString(s.data(), s.size())
  339. {
  340. }
  341. template <typename T, typename A>
  342. inline TBasicString(std::basic_string<TCharType, T, A>&& s)
  343. #ifdef TSTRING_IS_STD_STRING
  344. : Storage_(std::move(s))
  345. #else
  346. : S_(s.empty() ? Construct() : Construct(std::move(s)))
  347. #endif
  348. {
  349. }
  350. TBasicString(const TBasicString& s, size_t pos, size_t n) Y_NOEXCEPT
  351. #ifdef TSTRING_IS_STD_STRING
  352. : Storage_(s.Storage_, pos, n)
  353. #else
  354. : S_(n ? Construct(s, pos, n) : Construct())
  355. #endif
  356. {
  357. }
  358. TBasicString(const TCharType* pc)
  359. : TBasicString(pc, TBase::StrLen(pc))
  360. {
  361. }
  362. // TODO thegeorg@: uncomment and fix clients
  363. // TBasicString(std::nullptr_t) = delete;
  364. TBasicString(const TCharType* pc, size_t n)
  365. #ifdef TSTRING_IS_STD_STRING
  366. : Storage_(pc, n)
  367. #else
  368. : S_(n ? Construct(pc, n) : Construct())
  369. #endif
  370. {
  371. }
  372. TBasicString(std::nullptr_t, size_t) = delete;
  373. TBasicString(const TCharType* pc, size_t pos, size_t n)
  374. : TBasicString(pc + pos, n)
  375. {
  376. }
  377. #ifdef TSTRING_IS_STD_STRING
  378. explicit TBasicString(TExplicitType<TCharType> c) {
  379. Storage_.push_back(c);
  380. }
  381. #else
  382. explicit TBasicString(TExplicitType<TCharType> c)
  383. : TBasicString(&c.Value(), 1)
  384. {
  385. }
  386. explicit TBasicString(const reference& c)
  387. : TBasicString(&c, 1)
  388. {
  389. }
  390. #endif
  391. TBasicString(size_t n, TCharType c)
  392. #ifdef TSTRING_IS_STD_STRING
  393. : Storage_(n, c)
  394. #else
  395. : S_(Construct(n, c))
  396. #endif
  397. {
  398. }
  399. /**
  400. * Constructs an uninitialized string of size `uninitialized.Size`. The proper
  401. * way to use this ctor is via `TBasicString::Uninitialized` factory function.
  402. *
  403. * @throw std::length_error
  404. */
  405. TBasicString(TUninitialized uninitialized) {
  406. #if !defined(TSTRING_IS_STD_STRING)
  407. S_ = Construct();
  408. #endif
  409. ReserveAndResize(uninitialized.Size);
  410. }
  411. TBasicString(const TCharType* b, const TCharType* e)
  412. : TBasicString(b, e - b)
  413. {
  414. }
  415. explicit TBasicString(const TBasicStringBuf<TCharType, TTraits> s)
  416. : TBasicString(s.data(), s.size())
  417. {
  418. }
  419. template <typename Traits>
  420. explicit inline TBasicString(const std::basic_string_view<TCharType, Traits>& s)
  421. : TBasicString(s.data(), s.size())
  422. {
  423. }
  424. /**
  425. * WARN:
  426. * Certain invocations of this method will result in link-time error.
  427. * You are free to implement corresponding methods in string.cpp if you need them.
  428. */
  429. static TBasicString FromAscii(const ::TStringBuf& s) {
  430. return TBasicString().AppendAscii(s);
  431. }
  432. static TBasicString FromUtf8(const ::TStringBuf& s) {
  433. return TBasicString().AppendUtf8(s);
  434. }
  435. static TBasicString FromUtf16(const ::TWtringBuf& s) {
  436. return TBasicString().AppendUtf16(s);
  437. }
  438. static TBasicString Uninitialized(size_t n) {
  439. return TBasicString(TUninitialized(n));
  440. }
  441. private:
  442. template <typename... R>
  443. static size_t SumLength(const TBasicStringBuf<TCharType, TTraits> s1, const R&... r) noexcept {
  444. return s1.size() + SumLength(r...);
  445. }
  446. template <typename... R>
  447. static size_t SumLength(const TCharType /*s1*/, const R&... r) noexcept {
  448. return 1 + SumLength(r...);
  449. }
  450. static constexpr size_t SumLength() noexcept {
  451. return 0;
  452. }
  453. template <typename... R>
  454. static void CopyAll(TCharType* p, const TBasicStringBuf<TCharType, TTraits> s, const R&... r) {
  455. TTraits::copy(p, s.data(), s.size());
  456. CopyAll(p + s.size(), r...);
  457. }
  458. template <typename... R, class TNextCharType, typename = std::enable_if_t<std::is_same<TCharType, TNextCharType>::value>>
  459. static void CopyAll(TCharType* p, const TNextCharType s, const R&... r) {
  460. p[0] = s;
  461. CopyAll(p + 1, r...);
  462. }
  463. static void CopyAll(TCharType*) noexcept {
  464. }
  465. public:
  466. inline void clear() noexcept {
  467. #ifdef TSTRING_IS_STD_STRING
  468. Storage_.clear();
  469. #else
  470. if (IsDetached()) {
  471. S_->clear();
  472. return;
  473. }
  474. Construct().Swap(S_);
  475. #endif
  476. }
  477. template <typename... R>
  478. static inline TBasicString Join(const R&... r) {
  479. TBasicString s{TUninitialized{SumLength(r...)}};
  480. TBasicString::CopyAll((TCharType*)s.data(), r...);
  481. return s;
  482. }
  483. // ~~~ Assignment ~~~ : FAMILY0(TBasicString&, assign);
  484. TBasicString& assign(size_t size, TCharType ch) {
  485. ReserveAndResize(size);
  486. std::fill(begin(), vend(), ch);
  487. return *this;
  488. }
  489. TBasicString& assign(const TBasicString& s) {
  490. TBasicString(s).swap(*this);
  491. return *this;
  492. }
  493. TBasicString& assign(const TBasicString& s, size_t pos, size_t n) {
  494. return assign(TBasicString(s, pos, n));
  495. }
  496. TBasicString& assign(const TCharType* pc) {
  497. return assign(pc, TBase::StrLen(pc));
  498. }
  499. TBasicString& assign(TCharType ch) {
  500. return assign(&ch, 1);
  501. }
  502. TBasicString& assign(const TCharType* pc, size_t len) {
  503. #if defined(address_sanitizer_enabled) || defined(thread_sanitizer_enabled)
  504. pc = (const TCharType*)HidePointerOrigin((void*)pc);
  505. #endif
  506. if (IsDetached()) {
  507. MutRef().assign(pc, len);
  508. } else {
  509. TBasicString(pc, len).swap(*this);
  510. }
  511. return *this;
  512. }
  513. TBasicString& assign(const TCharType* first, const TCharType* last) {
  514. return assign(first, last - first);
  515. }
  516. TBasicString& assign(const TCharType* pc, size_t pos, size_t n) {
  517. return assign(pc + pos, n);
  518. }
  519. TBasicString& assign(const TBasicStringBuf<TCharType, TTraits> s) {
  520. return assign(s.data(), s.size());
  521. }
  522. TBasicString& assign(const TBasicStringBuf<TCharType, TTraits> s, size_t spos, size_t sn = TBase::npos) {
  523. return assign(s.SubString(spos, sn));
  524. }
  525. inline TBasicString& AssignNoAlias(const TCharType* pc, size_t len) {
  526. return assign(pc, len);
  527. }
  528. inline TBasicString& AssignNoAlias(const TCharType* b, const TCharType* e) {
  529. return AssignNoAlias(b, e - b);
  530. }
  531. TBasicString& AssignNoAlias(const TBasicStringBuf<TCharType, TTraits> s) {
  532. return AssignNoAlias(s.data(), s.size());
  533. }
  534. TBasicString& AssignNoAlias(const TBasicStringBuf<TCharType, TTraits> s, size_t spos, size_t sn = TBase::npos) {
  535. return AssignNoAlias(s.SubString(spos, sn));
  536. }
  537. /**
  538. * WARN:
  539. * Certain invocations of this method will result in link-time error.
  540. * You are free to implement corresponding methods in string.cpp if you need them.
  541. */
  542. auto AssignAscii(const ::TStringBuf& s) {
  543. clear();
  544. return AppendAscii(s);
  545. }
  546. auto AssignUtf8(const ::TStringBuf& s) {
  547. clear();
  548. return AppendUtf8(s);
  549. }
  550. auto AssignUtf16(const ::TWtringBuf& s) {
  551. clear();
  552. return AppendUtf16(s);
  553. }
  554. TBasicString& operator=(const TBasicString& s) {
  555. return assign(s);
  556. }
  557. TBasicString& operator=(TBasicString&& s) noexcept {
  558. swap(s);
  559. return *this;
  560. }
  561. template <typename T, typename A>
  562. TBasicString& operator=(std::basic_string<TCharType, T, A>&& s) noexcept {
  563. TBasicString(std::move(s)).swap(*this);
  564. return *this;
  565. }
  566. TBasicString& operator=(const TBasicStringBuf<TCharType, TTraits> s) {
  567. return assign(s);
  568. }
  569. TBasicString& operator=(std::initializer_list<TCharType> il) {
  570. return assign(il.begin(), il.end());
  571. }
  572. TBasicString& operator=(const TCharType* s) {
  573. return assign(s);
  574. }
  575. TBasicString& operator=(std::nullptr_t) = delete;
  576. TBasicString& operator=(TExplicitType<TCharType> ch) {
  577. return assign(ch);
  578. }
  579. inline void reserve(size_t len) {
  580. MutRef().reserve(len);
  581. }
  582. // ~~~ Appending ~~~ : FAMILY0(TBasicString&, append);
  583. inline TBasicString& append(size_t count, TCharType ch) {
  584. MutRef().append(count, ch);
  585. return *this;
  586. }
  587. inline TBasicString& append(const TBasicString& s) {
  588. MutRef().append(s.ConstRef());
  589. return *this;
  590. }
  591. inline TBasicString& append(const TBasicString& s, size_t pos, size_t n) {
  592. MutRef().append(s.ConstRef(), pos, n);
  593. return *this;
  594. }
  595. inline TBasicString& append(const TCharType* pc) Y_NOEXCEPT {
  596. MutRef().append(pc);
  597. return *this;
  598. }
  599. inline TBasicString& append(TCharType c) {
  600. MutRef().push_back(c);
  601. return *this;
  602. }
  603. inline TBasicString& append(const TCharType* first, const TCharType* last) {
  604. MutRef().append(first, last);
  605. return *this;
  606. }
  607. inline TBasicString& append(const TCharType* pc, size_t len) {
  608. MutRef().append(pc, len);
  609. return *this;
  610. }
  611. inline void ReserveAndResize(size_t len) {
  612. ::ResizeUninitialized(MutRef(), len);
  613. }
  614. TBasicString& AppendNoAlias(const TCharType* pc, size_t len) {
  615. if (len) {
  616. auto s = this->size();
  617. ReserveAndResize(s + len);
  618. memcpy(&*(begin() + s), pc, len * sizeof(*pc));
  619. }
  620. return *this;
  621. }
  622. TBasicString& AppendNoAlias(const TBasicStringBuf<TCharType, TTraits> s) {
  623. return AppendNoAlias(s.data(), s.size());
  624. }
  625. TBasicString& AppendNoAlias(const TBasicStringBuf<TCharType, TTraits> s, size_t spos, size_t sn = TBase::npos) {
  626. return AppendNoAlias(s.SubString(spos, sn));
  627. }
  628. TBasicString& append(const TBasicStringBuf<TCharType, TTraits> s) {
  629. return append(s.data(), s.size());
  630. }
  631. TBasicString& append(const TBasicStringBuf<TCharType, TTraits> s, size_t spos, size_t sn = TBase::npos) {
  632. return append(s.SubString(spos, sn));
  633. }
  634. TBasicString& append(const TCharType* pc, size_t pos, size_t n, size_t pc_len = TBase::npos) {
  635. return append(pc + pos, Min(n, pc_len - pos));
  636. }
  637. /**
  638. * WARN:
  639. * Certain invocations of this method will result in link-time error.
  640. * You are free to implement corresponding methods in string.cpp if you need them.
  641. */
  642. TBasicString& AppendAscii(const ::TStringBuf& s);
  643. TBasicString& AppendUtf8(const ::TStringBuf& s);
  644. TBasicString& AppendUtf16(const ::TWtringBuf& s);
  645. inline void push_back(TCharType c) {
  646. // TODO
  647. append(c);
  648. }
  649. template <class T>
  650. TBasicString& operator+=(const T& s) {
  651. return append(s);
  652. }
  653. template <class T>
  654. friend TBasicString operator*(const TBasicString& s, T count) {
  655. TBasicString result;
  656. for (T i = 0; i < count; ++i) {
  657. result += s;
  658. }
  659. return result;
  660. }
  661. template <class T>
  662. TBasicString& operator*=(T count) {
  663. TBasicString temp;
  664. for (T i = 0; i < count; ++i) {
  665. temp += *this;
  666. }
  667. swap(temp);
  668. return *this;
  669. }
  670. operator const TStringType&() const noexcept {
  671. return this->ConstRef();
  672. }
  673. operator TStringType&() {
  674. return this->MutRef();
  675. }
  676. /*
  677. * Following overloads of "operator+" aim to choose the cheapest implementation depending on
  678. * summand types: lvalues, detached rvalues, shared rvalues.
  679. *
  680. * General idea is to use the detached-rvalue argument (left of right) to store the result
  681. * wherever possible. If a buffer in rvalue is large enough this saves a re-allocation. If
  682. * both arguments are rvalues we check which one is detached. If both of them are detached then
  683. * the left argument is obviously preferrable because you won't need to shift the data.
  684. *
  685. * If an rvalue is shared then it's basically the same as lvalue because you cannot use its
  686. * buffer to store the sum. However, we rely on the fact that append() and prepend() are already
  687. * optimized for the shared case and detach the string into the buffer large enough to store
  688. * the sum (compared to the detach+reallocation). This way, if we have only one rvalue argument
  689. * (left or right) then we simply append/prepend into it, without checking if it's detached or
  690. * not. This will be checked inside ReserveAndResize anyway.
  691. *
  692. * If both arguments cannot be used to store the sum (e.g. two lvalues) then we fall back to the
  693. * Join function that constructs a resulting string in the new buffer with the minimum overhead:
  694. * malloc + memcpy + memcpy.
  695. */
  696. friend TBasicString operator+(TBasicString&& s1, const TBasicString& s2) Y_WARN_UNUSED_RESULT {
  697. s1 += s2;
  698. return std::move(s1);
  699. }
  700. friend TBasicString operator+(const TBasicString& s1, TBasicString&& s2) Y_WARN_UNUSED_RESULT {
  701. s2.prepend(s1);
  702. return std::move(s2);
  703. }
  704. friend TBasicString operator+(TBasicString&& s1, TBasicString&& s2) Y_WARN_UNUSED_RESULT {
  705. #if 0 && !defined(TSTRING_IS_STD_STRING)
  706. if (!s1.IsDetached() && s2.IsDetached()) {
  707. s2.prepend(s1);
  708. return std::move(s2);
  709. }
  710. #endif
  711. s1 += s2;
  712. return std::move(s1);
  713. }
  714. friend TBasicString operator+(TBasicString&& s1, const TBasicStringBuf<TCharType, TTraits> s2) Y_WARN_UNUSED_RESULT {
  715. s1 += s2;
  716. return std::move(s1);
  717. }
  718. friend TBasicString operator+(TBasicString&& s1, const TCharType* s2) Y_WARN_UNUSED_RESULT {
  719. s1 += s2;
  720. return std::move(s1);
  721. }
  722. friend TBasicString operator+(TBasicString&& s1, TCharType s2) Y_WARN_UNUSED_RESULT {
  723. s1 += s2;
  724. return std::move(s1);
  725. }
  726. friend TBasicString operator+(TExplicitType<TCharType> ch, const TBasicString& s) Y_WARN_UNUSED_RESULT {
  727. return Join(TCharType(ch), s);
  728. }
  729. friend TBasicString operator+(const TBasicString& s1, const TBasicString& s2) Y_WARN_UNUSED_RESULT {
  730. return Join(s1, s2);
  731. }
  732. friend TBasicString operator+(const TBasicString& s1, const TBasicStringBuf<TCharType, TTraits> s2) Y_WARN_UNUSED_RESULT {
  733. return Join(s1, s2);
  734. }
  735. friend TBasicString operator+(const TBasicString& s1, const TCharType* s2) Y_WARN_UNUSED_RESULT {
  736. return Join(s1, s2);
  737. }
  738. friend TBasicString operator+(const TBasicString& s1, TCharType s2) Y_WARN_UNUSED_RESULT {
  739. return Join(s1, TBasicStringBuf<TCharType, TTraits>(&s2, 1));
  740. }
  741. friend TBasicString operator+(const TCharType* s1, TBasicString&& s2) Y_WARN_UNUSED_RESULT {
  742. s2.prepend(s1);
  743. return std::move(s2);
  744. }
  745. friend TBasicString operator+(const TBasicStringBuf<TCharType, TTraits> s1, TBasicString&& s2) Y_WARN_UNUSED_RESULT {
  746. s2.prepend(s1);
  747. return std::move(s2);
  748. }
  749. friend TBasicString operator+(const TBasicStringBuf<TCharType, TTraits> s1, const TBasicString& s2) Y_WARN_UNUSED_RESULT {
  750. return Join(s1, s2);
  751. }
  752. friend TBasicString operator+(const TCharType* s1, const TBasicString& s2) Y_WARN_UNUSED_RESULT {
  753. return Join(s1, s2);
  754. }
  755. friend TBasicString operator+(std::basic_string<TCharType, TTraits> l, TBasicString r) {
  756. return l + r.ConstRef();
  757. }
  758. friend TBasicString operator+(TBasicString l, std::basic_string<TCharType, TTraits> r) {
  759. return l.ConstRef() + r;
  760. }
  761. // ~~~ Prepending ~~~ : FAMILY0(TBasicString&, prepend);
  762. TBasicString& prepend(const TBasicString& s) {
  763. MutRef().insert(0, s.ConstRef());
  764. return *this;
  765. }
  766. TBasicString& prepend(const TBasicString& s, size_t pos, size_t n) {
  767. MutRef().insert(0, s.ConstRef(), pos, n);
  768. return *this;
  769. }
  770. TBasicString& prepend(const TCharType* pc) {
  771. MutRef().insert(0, pc);
  772. return *this;
  773. }
  774. TBasicString& prepend(size_t n, TCharType c) {
  775. MutRef().insert(size_t(0), n, c);
  776. return *this;
  777. }
  778. TBasicString& prepend(TCharType c) {
  779. MutRef().insert(size_t(0), 1, c);
  780. return *this;
  781. }
  782. TBasicString& prepend(const TBasicStringBuf<TCharType, TTraits> s, size_t spos = 0, size_t sn = TBase::npos) {
  783. return insert(0, s, spos, sn);
  784. }
  785. // ~~~ Insertion ~~~ : FAMILY1(TBasicString&, insert, size_t pos);
  786. TBasicString& insert(size_t pos, const TBasicString& s) {
  787. MutRef().insert(pos, s.ConstRef());
  788. return *this;
  789. }
  790. TBasicString& insert(size_t pos, const TBasicString& s, size_t pos1, size_t n1) {
  791. MutRef().insert(pos, s.ConstRef(), pos1, n1);
  792. return *this;
  793. }
  794. TBasicString& insert(size_t pos, const TCharType* pc) {
  795. MutRef().insert(pos, pc);
  796. return *this;
  797. }
  798. TBasicString& insert(size_t pos, const TCharType* pc, size_t len) {
  799. MutRef().insert(pos, pc, len);
  800. return *this;
  801. }
  802. TBasicString& insert(const_iterator pos, const_iterator b, const_iterator e) {
  803. #ifdef TSTRING_IS_STD_STRING
  804. Storage_.insert(Storage_.begin() + this->off(pos), b, e);
  805. return *this;
  806. #else
  807. return insert(this->off(pos), b, e - b);
  808. #endif
  809. }
  810. TBasicString& insert(size_t pos, size_t n, TCharType c) {
  811. MutRef().insert(pos, n, c);
  812. return *this;
  813. }
  814. TBasicString& insert(const_iterator pos, size_t len, TCharType ch) {
  815. return this->insert(this->off(pos), len, ch);
  816. }
  817. TBasicString& insert(const_iterator pos, TCharType ch) {
  818. return this->insert(pos, 1, ch);
  819. }
  820. TBasicString& insert(size_t pos, const TBasicStringBuf<TCharType, TTraits> s, size_t spos = 0, size_t sn = TBase::npos) {
  821. MutRef().insert(pos, s, spos, sn);
  822. return *this;
  823. }
  824. // ~~~ Removing ~~~
  825. TBasicString& remove(size_t pos, size_t n) Y_NOEXCEPT {
  826. if (pos < length()) {
  827. MutRef().erase(pos, n);
  828. }
  829. return *this;
  830. }
  831. TBasicString& remove(size_t pos = 0) Y_NOEXCEPT {
  832. if (pos < length()) {
  833. MutRef().erase(pos);
  834. }
  835. return *this;
  836. }
  837. TBasicString& erase(size_t pos = 0, size_t n = TBase::npos) Y_NOEXCEPT {
  838. MutRef().erase(pos, n);
  839. return *this;
  840. }
  841. TBasicString& erase(const_iterator b, const_iterator e) Y_NOEXCEPT {
  842. return erase(this->off(b), e - b);
  843. }
  844. TBasicString& erase(const_iterator i) Y_NOEXCEPT {
  845. return erase(i, i + 1);
  846. }
  847. TBasicString& pop_back() Y_NOEXCEPT {
  848. Y_ASSERT(!this->empty());
  849. MutRef().pop_back();
  850. return *this;
  851. }
  852. // ~~~ replacement ~~~ : FAMILY2(TBasicString&, replace, size_t pos, size_t n);
  853. TBasicString& replace(size_t pos, size_t n, const TBasicString& s) Y_NOEXCEPT {
  854. MutRef().replace(pos, n, s.ConstRef());
  855. return *this;
  856. }
  857. TBasicString& replace(size_t pos, size_t n, const TBasicString& s, size_t pos1, size_t n1) Y_NOEXCEPT {
  858. MutRef().replace(pos, n, s.ConstRef(), pos1, n1);
  859. return *this;
  860. }
  861. TBasicString& replace(size_t pos, size_t n, const TCharType* pc) Y_NOEXCEPT {
  862. MutRef().replace(pos, n, pc);
  863. return *this;
  864. }
  865. TBasicString& replace(size_t pos, size_t n, const TCharType* s, size_t len) Y_NOEXCEPT {
  866. MutRef().replace(pos, n, s, len);
  867. return *this;
  868. }
  869. TBasicString& replace(size_t pos, size_t n, const TCharType* s, size_t spos, size_t sn) Y_NOEXCEPT {
  870. MutRef().replace(pos, n, s + spos, sn - spos);
  871. return *this;
  872. }
  873. TBasicString& replace(size_t pos, size_t n1, size_t n2, TCharType c) Y_NOEXCEPT {
  874. MutRef().replace(pos, n1, n2, c);
  875. return *this;
  876. }
  877. TBasicString& replace(size_t pos, size_t n, const TBasicStringBuf<TCharType, TTraits> s, size_t spos = 0, size_t sn = TBase::npos) Y_NOEXCEPT {
  878. MutRef().replace(pos, n, s, spos, sn);
  879. return *this;
  880. }
  881. void swap(TBasicString& s) noexcept {
  882. #ifdef TSTRING_IS_STD_STRING
  883. std::swap(Storage_, s.Storage_);
  884. #else
  885. S_.Swap(s.S_);
  886. #endif
  887. }
  888. /**
  889. * @returns String suitable for debug printing (like Python's `repr()`).
  890. * Format of the string is unspecified and may be changed over time.
  891. */
  892. TBasicString Quote() const {
  893. extern TBasicString EscapeC(const TBasicString&);
  894. return TBasicString() + '"' + EscapeC(*this) + '"';
  895. }
  896. /**
  897. * Modifies the case of the string, depending on the operation.
  898. * @return false if no changes have been made.
  899. *
  900. * @warning when the value_type is char, these methods will not work with non-ASCII letters.
  901. */
  902. bool to_lower(size_t pos = 0, size_t n = TBase::npos);
  903. bool to_upper(size_t pos = 0, size_t n = TBase::npos);
  904. bool to_title(size_t pos = 0, size_t n = TBase::npos);
  905. public:
  906. /**
  907. * Modifies the substring of length `n` starting from `pos`, applying `f` to each position and symbol.
  908. *
  909. * @return false if no changes have been made.
  910. */
  911. template <typename T>
  912. bool Transform(T&& f, size_t pos = 0, size_t n = TBase::npos) {
  913. size_t len = length();
  914. if (pos > len) {
  915. pos = len;
  916. }
  917. if (n > len - pos) {
  918. n = len - pos;
  919. }
  920. bool changed = false;
  921. for (size_t i = pos; i != pos + n; ++i) {
  922. #ifdef TSTRING_IS_STD_STRING
  923. auto c = f(i, Storage_[i]);
  924. if (c != Storage_[i]) {
  925. changed = true;
  926. Storage_[i] = c;
  927. }
  928. #else
  929. auto c = f(i, data()[i]);
  930. if (c != data()[i]) {
  931. if (!changed) {
  932. Detach();
  933. changed = true;
  934. }
  935. begin()[i] = c;
  936. }
  937. #endif
  938. }
  939. return changed;
  940. }
  941. };
  942. std::ostream& operator<<(std::ostream&, const TString&);
  943. std::istream& operator>>(std::istream&, TString&);
  944. template <typename TCharType, typename TTraits>
  945. TBasicString<TCharType> to_lower(const TBasicString<TCharType, TTraits>& s) {
  946. TBasicString<TCharType> ret(s);
  947. ret.to_lower();
  948. return ret;
  949. }
  950. template <typename TCharType, typename TTraits>
  951. TBasicString<TCharType> to_upper(const TBasicString<TCharType, TTraits>& s) {
  952. TBasicString<TCharType> ret(s);
  953. ret.to_upper();
  954. return ret;
  955. }
  956. template <typename TCharType, typename TTraits>
  957. TBasicString<TCharType> to_title(const TBasicString<TCharType, TTraits>& s) {
  958. TBasicString<TCharType> ret(s);
  959. ret.to_title();
  960. return ret;
  961. }
  962. namespace std {
  963. template <>
  964. struct hash<TString> {
  965. using argument_type = TString;
  966. using result_type = size_t;
  967. inline result_type operator()(argument_type const& s) const noexcept {
  968. return NHashPrivate::ComputeStringHash(s.data(), s.size());
  969. }
  970. };
  971. }
  972. #undef Y_NOEXCEPT
  973. template <class S>
  974. inline S LegacySubstr(const S& s, size_t pos, size_t n = S::npos) {
  975. size_t len = s.length();
  976. pos = Min(pos, len);
  977. n = Min(n, len - pos);
  978. return S(s, pos, n);
  979. }
  980. template <typename S, typename... Args>
  981. inline S&& LegacyReplace(S&& s, size_t pos, Args&&... args) {
  982. if (pos <= s.length()) {
  983. s.replace(pos, std::forward<Args>(args)...);
  984. }
  985. return s;
  986. }
  987. template <typename S, typename... Args>
  988. inline S&& LegacyErase(S&& s, size_t pos, Args&&... args) {
  989. if (pos <= s.length()) {
  990. s.erase(pos, std::forward<Args>(args)...);
  991. }
  992. return s;
  993. }
  994. inline const char* LegacyStr(const char* s) noexcept {
  995. return s ? s : "";
  996. }
  997. // interop
  998. template <class TCharType, class TTraits>
  999. auto& MutRef(TBasicString<TCharType, TTraits>& s) {
  1000. return s.MutRef();
  1001. }
  1002. template <class TCharType, class TTraits>
  1003. const auto& ConstRef(const TBasicString<TCharType, TTraits>& s) noexcept {
  1004. return s.ConstRef();
  1005. }
  1006. template <class TCharType, class TCharTraits, class TAllocator>
  1007. auto& MutRef(std::basic_string<TCharType, TCharTraits, TAllocator>& s) noexcept {
  1008. return s;
  1009. }
  1010. template <class TCharType, class TCharTraits, class TAllocator>
  1011. const auto& ConstRef(const std::basic_string<TCharType, TCharTraits, TAllocator>& s) noexcept {
  1012. return s;
  1013. }
  1014. template <class TCharType, class TTraits>
  1015. void ResizeUninitialized(TBasicString<TCharType, TTraits>& s, size_t len) {
  1016. s.ReserveAndResize(len);
  1017. }