strbuf.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. #pragma once
  2. #include "fwd.h"
  3. #include "strbase.h"
  4. #include "utility.h"
  5. #include "typetraits.h"
  6. #include <string_view>
  7. using namespace std::string_view_literals;
  8. template <typename TCharType, typename TTraits>
  9. class TBasicStringBuf: public std::basic_string_view<TCharType>,
  10. public TStringBase<TBasicStringBuf<TCharType, TTraits>, TCharType, TTraits> {
  11. private:
  12. using TdSelf = TBasicStringBuf;
  13. using TBase = TStringBase<TdSelf, TCharType, TTraits>;
  14. using TStringView = std::basic_string_view<TCharType>;
  15. public:
  16. using char_type = TCharType; // TODO: DROP
  17. using traits_type = TTraits;
  18. //Resolving some ambiguity between TStringBase and std::basic_string_view
  19. //for typenames
  20. using typename TStringView::const_iterator;
  21. using typename TStringView::const_reference;
  22. using typename TStringView::const_reverse_iterator;
  23. using typename TStringView::iterator;
  24. using typename TStringView::reference;
  25. using typename TStringView::reverse_iterator;
  26. using typename TStringView::size_type;
  27. using typename TStringView::value_type;
  28. //for constants
  29. using TStringView::npos;
  30. //for methods and operators
  31. using TStringView::begin;
  32. using TStringView::cbegin;
  33. using TStringView::cend;
  34. using TStringView::crbegin;
  35. using TStringView::crend;
  36. using TStringView::end;
  37. using TStringView::rbegin;
  38. using TStringView::rend;
  39. using TStringView::data;
  40. using TStringView::empty;
  41. using TStringView::size;
  42. using TStringView::operator[];
  43. /*
  44. * WARN:
  45. * TBase::at silently return 0 in case of range error,
  46. * while std::string_view throws std::out_of_range.
  47. */
  48. using TBase::at;
  49. using TStringView::back;
  50. using TStringView::front;
  51. using TStringView::find;
  52. /*
  53. * WARN:
  54. * TBase::*find* methods take into account TCharTraits,
  55. * while TTStringView::*find* would use default std::char_traits.
  56. */
  57. using TBase::find_first_not_of;
  58. using TBase::find_first_of;
  59. using TBase::find_last_not_of;
  60. using TBase::find_last_of;
  61. using TBase::rfind;
  62. using TStringView::copy;
  63. /*
  64. * WARN:
  65. * TBase::compare takes into account TCharTraits,
  66. * thus making it possible to implement case-insensitive string buffers,
  67. * if it is using TStringBase::compare
  68. */
  69. using TBase::compare;
  70. /*
  71. * WARN:
  72. * TBase::substr properly checks boundary cases and clamps them with maximum valid values,
  73. * while TStringView::substr throws std::out_of_range error.
  74. */
  75. using TBase::substr;
  76. /*
  77. * WARN:
  78. * Constructing std::string_view(nullptr, non_zero_size) ctor
  79. * results in undefined behavior according to the standard.
  80. * In libc++ this UB results in runtime assertion, though it is better
  81. * to generate compilation error instead.
  82. */
  83. constexpr inline TBasicStringBuf(std::nullptr_t begin, size_t size) = delete;
  84. constexpr inline TBasicStringBuf(const TCharType* data, size_t size) noexcept
  85. : TStringView(data, size)
  86. {
  87. }
  88. constexpr TBasicStringBuf(const TCharType* data) noexcept
  89. /*
  90. * WARN: TBase::StrLen properly handles nullptr,
  91. * while std::string_view (using std::char_traits) will abort in such case
  92. */
  93. : TStringView(data, TBase::StrLen(data))
  94. {
  95. }
  96. constexpr inline TBasicStringBuf(const TCharType* beg, const TCharType* end) noexcept
  97. : TStringView(beg, end - beg)
  98. {
  99. }
  100. template <typename D, typename T>
  101. inline TBasicStringBuf(const TStringBase<D, TCharType, T>& str) noexcept
  102. : TStringView(str.data(), str.size())
  103. {
  104. }
  105. template <typename T, typename A>
  106. inline TBasicStringBuf(const std::basic_string<TCharType, T, A>& str) noexcept
  107. : TStringView(str)
  108. {
  109. }
  110. template <typename TCharTraits>
  111. constexpr TBasicStringBuf(std::basic_string_view<TCharType, TCharTraits> view) noexcept
  112. : TStringView(view)
  113. {
  114. }
  115. constexpr inline TBasicStringBuf() noexcept {
  116. /*
  117. * WARN:
  118. * This ctor can not be defaulted due to the following feature of default initialization:
  119. * If T is a const-qualified type, it must be a class type with a user-provided default constructor.
  120. * (see https://en.cppreference.com/w/cpp/language/default_initialization).
  121. *
  122. * This means, that a class with default ctor can not be a constant member of another class with default ctor.
  123. */
  124. }
  125. inline TBasicStringBuf(const TBasicStringBuf& src, size_t pos, size_t n) noexcept
  126. : TBasicStringBuf(src)
  127. {
  128. Skip(pos).Trunc(n);
  129. }
  130. inline TBasicStringBuf(const TBasicStringBuf& src, size_t pos) noexcept
  131. : TBasicStringBuf(src, pos, TBase::npos)
  132. {
  133. }
  134. Y_PURE_FUNCTION inline TBasicStringBuf SubString(size_t pos, size_t n) const noexcept {
  135. pos = Min(pos, size());
  136. n = Min(n, size() - pos);
  137. return TBasicStringBuf(data() + pos, n);
  138. }
  139. public:
  140. void Clear() {
  141. *this = TdSelf();
  142. }
  143. constexpr bool IsInited() const noexcept {
  144. return data() != nullptr;
  145. }
  146. public:
  147. /**
  148. * Tries to split string in two parts using given delimiter character.
  149. * Searches for the delimiter, scanning string from the beginning.
  150. * The delimiter is excluded from the result. Both out parameters are
  151. * left unmodified if there was no delimiter character in string.
  152. *
  153. * @param[in] delim Delimiter character.
  154. * @param[out] l The first part of split result.
  155. * @param[out] r The second part of split result.
  156. * @returns Whether the split was actually performed.
  157. */
  158. inline bool TrySplit(TCharType delim, TdSelf& l, TdSelf& r) const noexcept {
  159. return TrySplitOn(TBase::find(delim), l, r);
  160. }
  161. /**
  162. * Tries to split string in two parts using given delimiter character.
  163. * Searches for the delimiter, scanning string from the end.
  164. * The delimiter is excluded from the result. Both out parameters are
  165. * left unmodified if there was no delimiter character in string.
  166. *
  167. * @param[in] delim Delimiter character.
  168. * @param[out] l The first part of split result.
  169. * @param[out] r The second part of split result.
  170. * @returns Whether the split was actually performed.
  171. */
  172. inline bool TryRSplit(TCharType delim, TdSelf& l, TdSelf& r) const noexcept {
  173. return TrySplitOn(TBase::rfind(delim), l, r);
  174. }
  175. /**
  176. * Tries to split string in two parts using given delimiter sequence.
  177. * Searches for the delimiter, scanning string from the beginning.
  178. * The delimiter sequence is excluded from the result. Both out parameters
  179. * are left unmodified if there was no delimiter character in string.
  180. *
  181. * @param[in] delim Delimiter sequence.
  182. * @param[out] l The first part of split result.
  183. * @param[out] r The second part of split result.
  184. * @returns Whether the split was actually performed.
  185. */
  186. inline bool TrySplit(TdSelf delim, TdSelf& l, TdSelf& r) const noexcept {
  187. return TrySplitOn(TBase::find(delim), l, r, delim.size());
  188. }
  189. /**
  190. * Tries to split string in two parts using given delimiter sequence.
  191. * Searches for the delimiter, scanning string from the end.
  192. * The delimiter sequence is excluded from the result. Both out parameters
  193. * are left unmodified if there was no delimiter character in string.
  194. *
  195. * @param[in] delim Delimiter sequence.
  196. * @param[out] l The first part of split result.
  197. * @param[out] r The second part of split result.
  198. * @returns Whether the split was actually performed.
  199. */
  200. inline bool TryRSplit(TdSelf delim, TdSelf& l, TdSelf& r) const noexcept {
  201. return TrySplitOn(TBase::rfind(delim), l, r, delim.size());
  202. }
  203. inline void Split(TCharType delim, TdSelf& l, TdSelf& r) const noexcept {
  204. SplitTemplate(delim, l, r);
  205. }
  206. inline void RSplit(TCharType delim, TdSelf& l, TdSelf& r) const noexcept {
  207. RSplitTemplate(delim, l, r);
  208. }
  209. inline void Split(TdSelf delim, TdSelf& l, TdSelf& r) const noexcept {
  210. SplitTemplate(delim, l, r);
  211. }
  212. inline void RSplit(TdSelf delim, TdSelf& l, TdSelf& r) const noexcept {
  213. RSplitTemplate(delim, l, r);
  214. }
  215. private:
  216. // splits on a delimiter at a given position; delimiter is excluded
  217. void DoSplitOn(size_t pos, TdSelf& l, TdSelf& r, size_t len) const noexcept {
  218. Y_ASSERT(pos != TBase::npos);
  219. // make a copy in case one of l/r is really *this
  220. const TdSelf tok = SubStr(pos + len);
  221. l = Head(pos);
  222. r = tok;
  223. }
  224. public:
  225. // In all methods below with @pos parameter, @pos is supposed to be
  226. // a result of string find()/rfind()/find_first() or other similiar functions,
  227. // returning either position within string length [0..size()) or npos.
  228. // For all other @pos values (out of string index range) the behaviour isn't well defined
  229. // For example, for TStringBuf s("abc"):
  230. // s.TrySplitOn(s.find('z'), ...) is false, but s.TrySplitOn(100500, ...) is true.
  231. bool TrySplitOn(size_t pos, TdSelf& l, TdSelf& r, size_t len = 1) const noexcept {
  232. if (TBase::npos == pos)
  233. return false;
  234. DoSplitOn(pos, l, r, len);
  235. return true;
  236. }
  237. void SplitOn(size_t pos, TdSelf& l, TdSelf& r, size_t len = 1) const noexcept {
  238. if (!TrySplitOn(pos, l, r, len)) {
  239. l = *this;
  240. r = TdSelf();
  241. }
  242. }
  243. bool TrySplitAt(size_t pos, TdSelf& l, TdSelf& r) const noexcept {
  244. return TrySplitOn(pos, l, r, 0);
  245. }
  246. void SplitAt(size_t pos, TdSelf& l, TdSelf& r) const noexcept {
  247. SplitOn(pos, l, r, 0);
  248. }
  249. /*
  250. // Not implemented intentionally, use TrySplitOn() instead
  251. void RSplitOn(size_t pos, TdSelf& l, TdSelf& r) const noexcept;
  252. void RSplitAt(size_t pos, TdSelf& l, TdSelf& r) const noexcept;
  253. */
  254. public:
  255. Y_PURE_FUNCTION inline TdSelf After(TCharType c) const noexcept {
  256. TdSelf l, r;
  257. return TrySplit(c, l, r) ? r : *this;
  258. }
  259. Y_PURE_FUNCTION inline TdSelf Before(TCharType c) const noexcept {
  260. TdSelf l, r;
  261. return TrySplit(c, l, r) ? l : *this;
  262. }
  263. Y_PURE_FUNCTION inline TdSelf RAfter(TCharType c) const noexcept {
  264. TdSelf l, r;
  265. return TryRSplit(c, l, r) ? r : *this;
  266. }
  267. Y_PURE_FUNCTION inline TdSelf RBefore(TCharType c) const noexcept {
  268. TdSelf l, r;
  269. return TryRSplit(c, l, r) ? l : *this;
  270. }
  271. public:
  272. inline bool AfterPrefix(const TdSelf& prefix, TdSelf& result) const noexcept {
  273. if (this->StartsWith(prefix)) {
  274. result = Tail(prefix.size());
  275. return true;
  276. }
  277. return false;
  278. }
  279. inline bool BeforeSuffix(const TdSelf& suffix, TdSelf& result) const noexcept {
  280. if (this->EndsWith(suffix)) {
  281. result = Head(size() - suffix.size());
  282. return true;
  283. }
  284. return false;
  285. }
  286. // returns true if string started with `prefix`, false otherwise
  287. inline bool SkipPrefix(const TdSelf& prefix) noexcept {
  288. return AfterPrefix(prefix, *this);
  289. }
  290. // returns true if string ended with `suffix`, false otherwise
  291. inline bool ChopSuffix(const TdSelf& suffix) noexcept {
  292. return BeforeSuffix(suffix, *this);
  293. }
  294. public:
  295. // returns tail, including pos
  296. TdSelf SplitOffAt(size_t pos) {
  297. const TdSelf tok = SubStr(pos);
  298. Trunc(pos);
  299. return tok;
  300. }
  301. // returns head, tail includes pos
  302. TdSelf NextTokAt(size_t pos) {
  303. const TdSelf tok = Head(pos);
  304. Skip(pos);
  305. return tok;
  306. }
  307. TdSelf SplitOffOn(size_t pos) {
  308. TdSelf tok;
  309. SplitOn(pos, *this, tok);
  310. return tok;
  311. }
  312. TdSelf NextTokOn(size_t pos) {
  313. TdSelf tok;
  314. SplitOn(pos, tok, *this);
  315. return tok;
  316. }
  317. /*
  318. // See comment on RSplitOn() above
  319. TdSelf RSplitOffOn(size_t pos);
  320. TdSelf RNextTokOn(size_t pos);
  321. */
  322. public:
  323. TdSelf SplitOff(TCharType delim) {
  324. TdSelf tok;
  325. Split(delim, *this, tok);
  326. return tok;
  327. }
  328. TdSelf RSplitOff(TCharType delim) {
  329. TdSelf tok;
  330. RSplit(delim, tok, *this);
  331. return tok;
  332. }
  333. bool NextTok(TCharType delim, TdSelf& tok) {
  334. return NextTokTemplate(delim, tok);
  335. }
  336. bool NextTok(TdSelf delim, TdSelf& tok) {
  337. return NextTokTemplate(delim, tok);
  338. }
  339. bool RNextTok(TCharType delim, TdSelf& tok) {
  340. return RNextTokTemplate(delim, tok);
  341. }
  342. bool RNextTok(TdSelf delim, TdSelf& tok) {
  343. return RNextTokTemplate(delim, tok);
  344. }
  345. bool ReadLine(TdSelf& tok) {
  346. if (NextTok('\n', tok)) {
  347. while (!tok.empty() && tok.back() == '\r') {
  348. tok.remove_suffix(1);
  349. }
  350. return true;
  351. }
  352. return false;
  353. }
  354. TdSelf NextTok(TCharType delim) {
  355. return NextTokTemplate(delim);
  356. }
  357. TdSelf RNextTok(TCharType delim) {
  358. return RNextTokTemplate(delim);
  359. }
  360. TdSelf NextTok(TdSelf delim) {
  361. return NextTokTemplate(delim);
  362. }
  363. TdSelf RNextTok(TdSelf delim) {
  364. return RNextTokTemplate(delim);
  365. }
  366. public: // string subsequences
  367. /// Cut last @c shift characters (or less if length is less than @c shift)
  368. inline TdSelf& Chop(size_t shift) noexcept {
  369. this->remove_suffix(std::min(shift, size()));
  370. return *this;
  371. }
  372. /// Cut first @c shift characters (or less if length is less than @c shift)
  373. inline TdSelf& Skip(size_t shift) noexcept {
  374. this->remove_prefix(std::min(shift, size()));
  375. return *this;
  376. }
  377. /// Sets the start pointer to a position relative to the end
  378. inline TdSelf& RSeek(size_t tailSize) noexcept {
  379. if (size() > tailSize) {
  380. //WARN: removing TStringView:: will lead to an infinite recursion
  381. *this = TStringView::substr(size() - tailSize, tailSize);
  382. }
  383. return *this;
  384. }
  385. // coverity[exn_spec_violation]
  386. inline TdSelf& Trunc(size_t targetSize) noexcept {
  387. // Coverity false positive issue
  388. // exn_spec_violation: An exception of type "std::out_of_range" is thrown but the exception specification "noexcept" doesn't allow it to be thrown. This will result in a call to terminate().
  389. // fun_call_w_exception: Called function TStringView::substr throws an exception of type "std::out_of_range".
  390. // Suppress this issue because we pass argument pos=0 and string_view can't throw std::out_of_range.
  391. *this = TStringView::substr(0, targetSize); //WARN: removing TStringView:: will lead to an infinite recursion
  392. return *this;
  393. }
  394. Y_PURE_FUNCTION inline TdSelf SubStr(size_t beg) const noexcept {
  395. return TdSelf(*this).Skip(beg);
  396. }
  397. Y_PURE_FUNCTION inline TdSelf SubStr(size_t beg, size_t len) const noexcept {
  398. return SubStr(beg).Trunc(len);
  399. }
  400. Y_PURE_FUNCTION inline TdSelf Head(size_t pos) const noexcept {
  401. return TdSelf(*this).Trunc(pos);
  402. }
  403. Y_PURE_FUNCTION inline TdSelf Tail(size_t pos) const noexcept {
  404. return SubStr(pos);
  405. }
  406. Y_PURE_FUNCTION inline TdSelf Last(size_t len) const noexcept {
  407. return TdSelf(*this).RSeek(len);
  408. }
  409. private:
  410. template <typename TDelimiterType>
  411. TdSelf NextTokTemplate(TDelimiterType delim) {
  412. TdSelf tok;
  413. Split(delim, tok, *this);
  414. return tok;
  415. }
  416. template <typename TDelimiterType>
  417. TdSelf RNextTokTemplate(TDelimiterType delim) {
  418. TdSelf tok;
  419. RSplit(delim, *this, tok);
  420. return tok;
  421. }
  422. template <typename TDelimiterType>
  423. bool NextTokTemplate(TDelimiterType delim, TdSelf& tok) {
  424. if (!empty()) {
  425. tok = NextTokTemplate(delim);
  426. return true;
  427. }
  428. return false;
  429. }
  430. template <typename TDelimiterType>
  431. bool RNextTokTemplate(TDelimiterType delim, TdSelf& tok) {
  432. if (!empty()) {
  433. tok = RNextTokTemplate(delim);
  434. return true;
  435. }
  436. return false;
  437. }
  438. template <typename TDelimiterType>
  439. inline void SplitTemplate(TDelimiterType delim, TdSelf& l, TdSelf& r) const noexcept {
  440. if (!TrySplit(delim, l, r)) {
  441. l = *this;
  442. r = TdSelf();
  443. }
  444. }
  445. template <typename TDelimiterType>
  446. inline void RSplitTemplate(TDelimiterType delim, TdSelf& l, TdSelf& r) const noexcept {
  447. if (!TryRSplit(delim, l, r)) {
  448. r = *this;
  449. l = TdSelf();
  450. }
  451. }
  452. };
  453. std::ostream& operator<<(std::ostream& os, TStringBuf buf);
  454. constexpr TStringBuf operator""_sb(const char* str, size_t len) {
  455. return TStringBuf{str, len};
  456. }