strbuf.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. #pragma once
  2. #include "fwd.h"
  3. #include "iterator.h"
  4. #include "strbase.h"
  5. #include "utility.h"
  6. #include "typetraits.h"
  7. #include <util/system/compiler.h>
  8. #include <string_view>
  9. using namespace std::string_view_literals;
  10. template <typename TCharType, typename TTraits>
  11. class TBasicStringBuf: public std::basic_string_view<TCharType>,
  12. public TStringBase<TBasicStringBuf<TCharType, TTraits>, TCharType, TTraits> {
  13. private:
  14. using TdSelf = TBasicStringBuf;
  15. using TBase = TStringBase<TdSelf, TCharType, TTraits>;
  16. using TStringView = std::basic_string_view<TCharType>;
  17. public:
  18. using char_type = TCharType; // TODO: DROP
  19. using traits_type = TTraits;
  20. // Resolving some ambiguity between TStringBase and std::basic_string_view
  21. // for typenames
  22. using typename TStringView::const_iterator;
  23. using typename TStringView::const_reference;
  24. using typename TStringView::const_reverse_iterator;
  25. using typename TStringView::iterator;
  26. using typename TStringView::reference;
  27. using typename TStringView::reverse_iterator;
  28. using typename TStringView::size_type;
  29. using typename TStringView::value_type;
  30. // for constants
  31. using TStringView::npos;
  32. // for methods and operators
  33. using TStringView::begin;
  34. using TStringView::cbegin;
  35. using TStringView::cend;
  36. using TStringView::crbegin;
  37. using TStringView::crend;
  38. using TStringView::end;
  39. using TStringView::rbegin;
  40. using TStringView::rend;
  41. using TStringView::data;
  42. using TStringView::empty;
  43. using TStringView::size;
  44. using TStringView::operator[];
  45. /*
  46. * WARN:
  47. * TBase::at silently return 0 in case of range error,
  48. * while std::string_view throws std::out_of_range.
  49. */
  50. using TBase::at;
  51. using TStringView::back;
  52. using TStringView::front;
  53. using TStringView::find;
  54. /*
  55. * WARN:
  56. * TBase::*find* methods take into account TCharTraits,
  57. * while TTStringView::*find* would use default std::char_traits.
  58. */
  59. using TBase::find_first_not_of;
  60. using TBase::find_first_of;
  61. using TBase::find_last_not_of;
  62. using TBase::find_last_of;
  63. using TBase::rfind;
  64. using TStringView::copy;
  65. /*
  66. * WARN:
  67. * TBase::compare takes into account TCharTraits,
  68. * thus making it possible to implement case-insensitive string buffers,
  69. * if it is using TStringBase::compare
  70. */
  71. using TBase::compare;
  72. /*
  73. * WARN:
  74. * TBase::substr properly checks boundary cases and clamps them with maximum valid values,
  75. * while TStringView::substr throws std::out_of_range error.
  76. */
  77. using TBase::substr;
  78. /*
  79. * WARN:
  80. * Constructing std::string_view(nullptr, non_zero_size) ctor
  81. * results in undefined behavior according to the standard.
  82. * In libc++ this UB results in runtime assertion, though it is better
  83. * to generate compilation error instead.
  84. */
  85. constexpr inline TBasicStringBuf(std::nullptr_t begin, size_t size) = delete;
  86. // TODO: Uncomment.
  87. // constexpr TBasicStringBuf(std::nullptr_t) = delete;
  88. constexpr inline TBasicStringBuf(const TCharType* data Y_LIFETIME_BOUND, size_t size) noexcept
  89. : TStringView(data, size)
  90. {
  91. }
  92. constexpr TBasicStringBuf(const TCharType* data Y_LIFETIME_BOUND) noexcept
  93. /*
  94. * WARN: TBase::StrLen properly handles nullptr,
  95. * while std::string_view (using std::char_traits) will abort in such case
  96. */
  97. : TStringView(data, TBase::StrLen(data))
  98. {
  99. }
  100. constexpr inline TBasicStringBuf(const TCharType* beg Y_LIFETIME_BOUND, const TCharType* end Y_LIFETIME_BOUND) noexcept
  101. #if __cplusplus >= 202002L && __cpp_lib_string_view >= 201803L && !defined(_LIBCPP_HAS_NO_CONCEPTS)
  102. : TStringView(beg, end)
  103. #else
  104. : TStringView(beg, NonNegativeDistance(beg, end))
  105. #endif
  106. {
  107. }
  108. template <typename D, typename T>
  109. inline TBasicStringBuf(const TStringBase<D, TCharType, T>& str) noexcept
  110. : TStringView(str.data(), str.size())
  111. {
  112. }
  113. template <typename T>
  114. inline TBasicStringBuf(const TBasicString<TCharType, T>& str Y_STRING_LIFETIME_BOUND) noexcept
  115. : TStringView(str.data(), str.size())
  116. {
  117. }
  118. template <typename T, typename A>
  119. inline TBasicStringBuf(const std::basic_string<TCharType, T, A>& str Y_LIFETIME_BOUND) noexcept
  120. : TStringView(str)
  121. {
  122. }
  123. template <typename TCharTraits>
  124. constexpr TBasicStringBuf(std::basic_string_view<TCharType, TCharTraits> view Y_LIFETIME_BOUND) noexcept
  125. : TStringView(view)
  126. {
  127. }
  128. template <typename TCharTraits>
  129. constexpr TBasicStringBuf(TBasicStringBuf<TCharType, TCharTraits> sb Y_LIFETIME_BOUND) noexcept
  130. : TStringView(sb)
  131. {
  132. }
  133. constexpr inline TBasicStringBuf() noexcept {
  134. /*
  135. * WARN:
  136. * This ctor can not be defaulted due to the following feature of default initialization:
  137. * If T is a const-qualified type, it must be a class type with a user-provided default constructor.
  138. * (see https://en.cppreference.com/w/cpp/language/default_initialization).
  139. *
  140. * This means, that a class with default ctor can not be a constant member of another class with default ctor.
  141. */
  142. }
  143. inline TBasicStringBuf(const TBasicStringBuf src Y_LIFETIME_BOUND, size_t pos, size_t n) noexcept
  144. : TBasicStringBuf(src)
  145. {
  146. Skip(pos).Trunc(n);
  147. }
  148. inline TBasicStringBuf(const TBasicStringBuf src Y_LIFETIME_BOUND, size_t pos) noexcept
  149. : TBasicStringBuf(src, pos, TBase::npos)
  150. {
  151. }
  152. Y_PURE_FUNCTION inline TBasicStringBuf SubString(size_t pos, size_t n) const noexcept {
  153. pos = Min(pos, size());
  154. n = Min(n, size() - pos);
  155. return TBasicStringBuf(data() + pos, n);
  156. }
  157. public:
  158. void Clear() {
  159. *this = TdSelf();
  160. }
  161. constexpr bool IsInited() const noexcept {
  162. return data() != nullptr;
  163. }
  164. public:
  165. /**
  166. * Tries to split string in two parts using given delimiter character.
  167. * Searches for the delimiter, scanning string from the beginning.
  168. * The delimiter is excluded from the result. Both out parameters are
  169. * left unmodified if there was no delimiter character in string.
  170. *
  171. * @param[in] delim Delimiter character.
  172. * @param[out] l The first part of split result.
  173. * @param[out] r The second part of split result.
  174. * @returns Whether the split was actually performed.
  175. */
  176. inline bool TrySplit(TCharType delim, TdSelf& l, TdSelf& r) const noexcept {
  177. return TrySplitOn(TBase::find(delim), l, r);
  178. }
  179. /**
  180. * Tries to split string in two parts using given delimiter character.
  181. * Searches for the delimiter, scanning string from the end.
  182. * The delimiter is excluded from the result. Both out parameters are
  183. * left unmodified if there was no delimiter character in string.
  184. *
  185. * @param[in] delim Delimiter character.
  186. * @param[out] l The first part of split result.
  187. * @param[out] r The second part of split result.
  188. * @returns Whether the split was actually performed.
  189. */
  190. inline bool TryRSplit(TCharType delim, TdSelf& l, TdSelf& r) const noexcept {
  191. return TrySplitOn(TBase::rfind(delim), l, r);
  192. }
  193. /**
  194. * Tries to split string in two parts using given delimiter sequence.
  195. * Searches for the delimiter, scanning string from the beginning.
  196. * The delimiter sequence is excluded from the result. Both out parameters
  197. * are left unmodified if there was no delimiter character in string.
  198. *
  199. * @param[in] delim Delimiter sequence.
  200. * @param[out] l The first part of split result.
  201. * @param[out] r The second part of split result.
  202. * @returns Whether the split was actually performed.
  203. */
  204. inline bool TrySplit(TdSelf delim, TdSelf& l, TdSelf& r) const noexcept {
  205. return TrySplitOn(TBase::find(delim), l, r, delim.size());
  206. }
  207. /**
  208. * Tries to split string in two parts using given delimiter sequence.
  209. * Searches for the delimiter, scanning string from the end.
  210. * The delimiter sequence is excluded from the result. Both out parameters
  211. * are left unmodified if there was no delimiter character in string.
  212. *
  213. * @param[in] delim Delimiter sequence.
  214. * @param[out] l The first part of split result.
  215. * @param[out] r The second part of split result.
  216. * @returns Whether the split was actually performed.
  217. */
  218. inline bool TryRSplit(TdSelf delim, TdSelf& l, TdSelf& r) const noexcept {
  219. return TrySplitOn(TBase::rfind(delim), l, r, delim.size());
  220. }
  221. inline void Split(TCharType delim, TdSelf& l, TdSelf& r) const noexcept {
  222. SplitTemplate(delim, l, r);
  223. }
  224. inline void RSplit(TCharType delim, TdSelf& l, TdSelf& r) const noexcept {
  225. RSplitTemplate(delim, l, r);
  226. }
  227. inline void Split(TdSelf delim, TdSelf& l, TdSelf& r) const noexcept {
  228. SplitTemplate(delim, l, r);
  229. }
  230. inline void RSplit(TdSelf delim, TdSelf& l, TdSelf& r) const noexcept {
  231. RSplitTemplate(delim, l, r);
  232. }
  233. private:
  234. // splits on a delimiter at a given position; delimiter is excluded
  235. void DoSplitOn(size_t pos, TdSelf& l, TdSelf& r, size_t len) const noexcept {
  236. Y_ASSERT(pos != TBase::npos);
  237. // make a copy in case one of l/r is really *this
  238. const TdSelf tok = SubStr(pos + len);
  239. l = Head(pos);
  240. r = tok;
  241. }
  242. public:
  243. // In all methods below with @pos parameter, @pos is supposed to be
  244. // a result of string find()/rfind()/find_first() or other similiar functions,
  245. // returning either position within string length [0..size()) or npos.
  246. // For all other @pos values (out of string index range) the behaviour isn't well defined
  247. // For example, for TStringBuf s("abc"):
  248. // s.TrySplitOn(s.find('z'), ...) is false, but s.TrySplitOn(100500, ...) is true.
  249. bool TrySplitOn(size_t pos, TdSelf& l, TdSelf& r, size_t len = 1) const noexcept {
  250. if (TBase::npos == pos) {
  251. return false;
  252. }
  253. DoSplitOn(pos, l, r, len);
  254. return true;
  255. }
  256. void SplitOn(size_t pos, TdSelf& l, TdSelf& r, size_t len = 1) const noexcept {
  257. if (!TrySplitOn(pos, l, r, len)) {
  258. l = *this;
  259. r = TdSelf();
  260. }
  261. }
  262. bool TrySplitAt(size_t pos, TdSelf& l, TdSelf& r) const noexcept {
  263. return TrySplitOn(pos, l, r, 0);
  264. }
  265. void SplitAt(size_t pos, TdSelf& l, TdSelf& r) const noexcept {
  266. SplitOn(pos, l, r, 0);
  267. }
  268. /*
  269. // Not implemented intentionally, use TrySplitOn() instead
  270. void RSplitOn(size_t pos, TdSelf& l, TdSelf& r) const noexcept;
  271. void RSplitAt(size_t pos, TdSelf& l, TdSelf& r) const noexcept;
  272. */
  273. public:
  274. Y_PURE_FUNCTION inline TdSelf After(TCharType c) const noexcept {
  275. TdSelf l, r;
  276. return TrySplit(c, l, r) ? r : *this;
  277. }
  278. Y_PURE_FUNCTION inline TdSelf Before(TCharType c) const noexcept {
  279. TdSelf l, r;
  280. return TrySplit(c, l, r) ? l : *this;
  281. }
  282. Y_PURE_FUNCTION inline TdSelf RAfter(TCharType c) const noexcept {
  283. TdSelf l, r;
  284. return TryRSplit(c, l, r) ? r : *this;
  285. }
  286. Y_PURE_FUNCTION inline TdSelf RBefore(TCharType c) const noexcept {
  287. TdSelf l, r;
  288. return TryRSplit(c, l, r) ? l : *this;
  289. }
  290. public:
  291. inline bool AfterPrefix(const TdSelf& prefix, TdSelf& result) const noexcept {
  292. if (this->StartsWith(prefix)) {
  293. result = Tail(prefix.size());
  294. return true;
  295. }
  296. return false;
  297. }
  298. inline bool BeforeSuffix(const TdSelf& suffix, TdSelf& result) const noexcept {
  299. if (this->EndsWith(suffix)) {
  300. result = Head(size() - suffix.size());
  301. return true;
  302. }
  303. return false;
  304. }
  305. // returns true if string started with `prefix`, false otherwise
  306. inline bool SkipPrefix(const TdSelf& prefix) noexcept {
  307. return AfterPrefix(prefix, *this);
  308. }
  309. // returns true if string ended with `suffix`, false otherwise
  310. inline bool ChopSuffix(const TdSelf& suffix) noexcept {
  311. return BeforeSuffix(suffix, *this);
  312. }
  313. public:
  314. // returns tail, including pos
  315. TdSelf SplitOffAt(size_t pos) {
  316. const TdSelf tok = SubStr(pos);
  317. Trunc(pos);
  318. return tok;
  319. }
  320. // returns head, tail includes pos
  321. TdSelf NextTokAt(size_t pos) {
  322. const TdSelf tok = Head(pos);
  323. Skip(pos);
  324. return tok;
  325. }
  326. TdSelf SplitOffOn(size_t pos) {
  327. TdSelf tok;
  328. SplitOn(pos, *this, tok);
  329. return tok;
  330. }
  331. TdSelf NextTokOn(size_t pos) {
  332. TdSelf tok;
  333. SplitOn(pos, tok, *this);
  334. return tok;
  335. }
  336. /*
  337. // See comment on RSplitOn() above
  338. TdSelf RSplitOffOn(size_t pos);
  339. TdSelf RNextTokOn(size_t pos);
  340. */
  341. public:
  342. TdSelf SplitOff(TCharType delim) {
  343. TdSelf tok;
  344. Split(delim, *this, tok);
  345. return tok;
  346. }
  347. TdSelf RSplitOff(TCharType delim) {
  348. TdSelf tok;
  349. RSplit(delim, tok, *this);
  350. return tok;
  351. }
  352. bool NextTok(TCharType delim, TdSelf& tok) {
  353. return NextTokTemplate(delim, tok);
  354. }
  355. bool NextTok(TdSelf delim, TdSelf& tok) {
  356. return NextTokTemplate(delim, tok);
  357. }
  358. bool RNextTok(TCharType delim, TdSelf& tok) {
  359. return RNextTokTemplate(delim, tok);
  360. }
  361. bool RNextTok(TdSelf delim, TdSelf& tok) {
  362. return RNextTokTemplate(delim, tok);
  363. }
  364. bool ReadLine(TdSelf& tok) {
  365. if (NextTok('\n', tok)) {
  366. while (!tok.empty() && tok.back() == '\r') {
  367. tok.remove_suffix(1);
  368. }
  369. return true;
  370. }
  371. return false;
  372. }
  373. TdSelf NextTok(TCharType delim) {
  374. return NextTokTemplate(delim);
  375. }
  376. TdSelf RNextTok(TCharType delim) {
  377. return RNextTokTemplate(delim);
  378. }
  379. TdSelf NextTok(TdSelf delim) {
  380. return NextTokTemplate(delim);
  381. }
  382. TdSelf RNextTok(TdSelf delim) {
  383. return RNextTokTemplate(delim);
  384. }
  385. public: // string subsequences
  386. /// Cut last @c shift characters (or less if length is less than @c shift)
  387. inline TdSelf& Chop(size_t shift) noexcept {
  388. this->remove_suffix(std::min(shift, size()));
  389. return *this;
  390. }
  391. /// Cut first @c shift characters (or less if length is less than @c shift)
  392. inline TdSelf& Skip(size_t shift) noexcept {
  393. this->remove_prefix(std::min(shift, size()));
  394. return *this;
  395. }
  396. /// Sets the start pointer to a position relative to the end
  397. inline TdSelf& RSeek(size_t tailSize) noexcept {
  398. if (size() > tailSize) {
  399. // WARN: removing TStringView:: will lead to an infinite recursion
  400. *this = TStringView::substr(size() - tailSize, tailSize);
  401. }
  402. return *this;
  403. }
  404. // coverity[exn_spec_violation]
  405. inline TdSelf& Trunc(size_t targetSize) noexcept {
  406. // Coverity false positive issue
  407. // exn_spec_violation: An exception of type "std::out_of_range" is thrown but the exception specification "noexcept" doesn't allow it to be thrown. This will result in a call to terminate().
  408. // fun_call_w_exception: Called function TStringView::substr throws an exception of type "std::out_of_range".
  409. // Suppress this issue because we pass argument pos=0 and string_view can't throw std::out_of_range.
  410. *this = TStringView::substr(0, targetSize); // WARN: removing TStringView:: will lead to an infinite recursion
  411. return *this;
  412. }
  413. Y_PURE_FUNCTION inline TdSelf SubStr(size_t beg) const noexcept {
  414. return TdSelf(*this).Skip(beg);
  415. }
  416. Y_PURE_FUNCTION inline TdSelf SubStr(size_t beg, size_t len) const noexcept {
  417. return SubStr(beg).Trunc(len);
  418. }
  419. Y_PURE_FUNCTION inline TdSelf Head(size_t pos) const noexcept {
  420. return TdSelf(*this).Trunc(pos);
  421. }
  422. Y_PURE_FUNCTION inline TdSelf Tail(size_t pos) const noexcept {
  423. return SubStr(pos);
  424. }
  425. Y_PURE_FUNCTION inline TdSelf Last(size_t len) const noexcept {
  426. return TdSelf(*this).RSeek(len);
  427. }
  428. private:
  429. template <typename TDelimiterType>
  430. TdSelf NextTokTemplate(TDelimiterType delim) {
  431. TdSelf tok;
  432. Split(delim, tok, *this);
  433. return tok;
  434. }
  435. template <typename TDelimiterType>
  436. TdSelf RNextTokTemplate(TDelimiterType delim) {
  437. TdSelf tok;
  438. RSplit(delim, *this, tok);
  439. return tok;
  440. }
  441. template <typename TDelimiterType>
  442. bool NextTokTemplate(TDelimiterType delim, TdSelf& tok) {
  443. if (!empty()) {
  444. tok = NextTokTemplate(delim);
  445. return true;
  446. }
  447. return false;
  448. }
  449. template <typename TDelimiterType>
  450. bool RNextTokTemplate(TDelimiterType delim, TdSelf& tok) {
  451. if (!empty()) {
  452. tok = RNextTokTemplate(delim);
  453. return true;
  454. }
  455. return false;
  456. }
  457. template <typename TDelimiterType>
  458. inline void SplitTemplate(TDelimiterType delim, TdSelf& l, TdSelf& r) const noexcept {
  459. if (!TrySplit(delim, l, r)) {
  460. l = *this;
  461. r = TdSelf();
  462. }
  463. }
  464. template <typename TDelimiterType>
  465. inline void RSplitTemplate(TDelimiterType delim, TdSelf& l, TdSelf& r) const noexcept {
  466. if (!TryRSplit(delim, l, r)) {
  467. r = *this;
  468. l = TdSelf();
  469. }
  470. }
  471. };
  472. std::ostream& operator<<(std::ostream& os, TStringBuf buf);
  473. constexpr TStringBuf operator""_sb(const char* str, size_t len) {
  474. return TStringBuf{str, len};
  475. }