split.h 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071
  1. #pragma once
  2. #include "strspn.h"
  3. #include "cast.h"
  4. #include <util/generic/algorithm.h>
  5. #include <util/generic/fwd.h>
  6. #include <util/generic/iterator.h>
  7. #include <util/generic/iterator_range.h>
  8. #include <util/generic/store_policy.h>
  9. #include <util/generic/strbuf.h>
  10. #include <util/generic/string.h>
  11. #include <util/generic/typetraits.h>
  12. #include <util/generic/vector.h>
  13. #include <util/generic/ylimits.h>
  14. #include <util/system/compat.h>
  15. #include <util/system/defaults.h>
  16. #include <utility>
  17. #include <stlfwd>
  18. // NOTE: Check StringSplitter below to get more convenient split string interface.
  19. namespace NStringSplitPrivate {
  20. template <class T, class I, class = void>
  21. struct TIsConsumer: std::false_type {};
  22. template <class T, class I>
  23. struct TIsConsumer<
  24. T, I,
  25. TVoidT<decltype(std::declval<T>().Consume(
  26. std::declval<I>(), std::declval<I>(), std::declval<I>()))>>
  27. : std::true_type {};
  28. template <class T, class I>
  29. constexpr bool TIsConsumerV = TIsConsumer<T, I>::value;
  30. template <class T>
  31. T* Find(T* str, std::common_type_t<T> ch) {
  32. for (; *str; ++str) {
  33. if (*str == ch) {
  34. return str;
  35. }
  36. }
  37. return nullptr;
  38. }
  39. }
  40. template <class I, class TDelim, class TConsumer>
  41. std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>>
  42. SplitString(I b, I e, const TDelim& d, TConsumer&& c) {
  43. I l, i;
  44. do {
  45. l = b;
  46. i = d.Find(b, e);
  47. } while (c.Consume(l, i, b) && (b != i));
  48. }
  49. template <class I, class TDelim, class TConsumer>
  50. std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>>
  51. SplitString(I b, const TDelim& d, TConsumer&& c) {
  52. I l, i;
  53. do {
  54. l = b;
  55. i = d.Find(b);
  56. } while (c.Consume(l, i, b) && (b != i));
  57. }
  58. template <class I1, class I2>
  59. static inline I1* FastStrChr(I1* str, I2 f) noexcept {
  60. I1* ret = NStringSplitPrivate::Find(str, f);
  61. if (!ret) {
  62. ret = str + std::char_traits<I1>::length(str);
  63. }
  64. return ret;
  65. }
  66. template <class I>
  67. static inline I* FastStrStr(I* str, I* f, size_t l) noexcept {
  68. std::basic_string_view<I> strView(str);
  69. const auto ret = strView.find(*f);
  70. if (ret != std::string::npos) {
  71. std::basic_string_view<I> fView(f, l);
  72. strView = strView.substr(ret);
  73. for (; strView.size() >= l; strView = strView.substr(1)) {
  74. if (strView.substr(0, l) == fView) {
  75. break;
  76. }
  77. }
  78. return strView.size() >= l ? strView.data() : strView.data() + strView.size();
  79. } else {
  80. return strView.data() + strView.size();
  81. }
  82. }
  83. template <class Char>
  84. struct TStringDelimiter {
  85. inline TStringDelimiter(Char* delim) noexcept
  86. : Delim(delim)
  87. , Len(std::char_traits<Char>::length(delim))
  88. {
  89. }
  90. inline TStringDelimiter(Char* delim, size_t len) noexcept
  91. : Delim(delim)
  92. , Len(len)
  93. {
  94. }
  95. inline Char* Find(Char*& b, Char* e) const noexcept {
  96. const auto ret = std::basic_string_view<Char>(b, e - b).find(Delim, 0, Len);
  97. if (ret != std::string::npos) {
  98. const auto result = b + ret;
  99. b = result + Len;
  100. return result;
  101. }
  102. return (b = e);
  103. }
  104. inline Char* Find(Char*& b) const noexcept {
  105. Char* ret = FastStrStr(b, Delim, Len);
  106. b = *ret ? ret + Len : ret;
  107. return ret;
  108. }
  109. Char* Delim;
  110. const size_t Len;
  111. };
  112. template <class Char>
  113. struct TCharDelimiter {
  114. inline TCharDelimiter(Char ch) noexcept
  115. : Ch(ch)
  116. {
  117. }
  118. inline Char* Find(Char*& b, Char* e) const noexcept {
  119. const auto ret = std::basic_string_view<Char>(b, e - b).find(Ch);
  120. if (ret != std::string::npos) {
  121. const auto result = b + ret;
  122. b = result + 1;
  123. return result;
  124. }
  125. return (b = e);
  126. }
  127. inline Char* Find(Char*& b) const noexcept {
  128. Char* ret = FastStrChr(b, Ch);
  129. if (*ret) {
  130. b = ret + 1;
  131. } else {
  132. b = ret;
  133. }
  134. return ret;
  135. }
  136. Char Ch;
  137. };
  138. template <class Iterator, class Condition>
  139. struct TFuncDelimiter {
  140. public:
  141. template <class... Args>
  142. TFuncDelimiter(Args&&... args)
  143. : Fn(std::forward<Args>(args)...)
  144. {
  145. }
  146. inline Iterator Find(Iterator& b, Iterator e) const noexcept {
  147. if ((b = std::find_if(b, e, Fn)) != e) {
  148. return b++;
  149. }
  150. return b;
  151. }
  152. private:
  153. Condition Fn;
  154. };
  155. template <class Char>
  156. struct TFindFirstOf {
  157. inline TFindFirstOf(Char* set)
  158. : Set(set)
  159. {
  160. }
  161. inline Char* FindFirstOf(Char* b, Char* e) const noexcept {
  162. Char* ret = b;
  163. for (; ret != e; ++ret) {
  164. if (NStringSplitPrivate::Find(Set, *ret))
  165. break;
  166. }
  167. return ret;
  168. }
  169. inline Char* FindFirstOf(Char* b) const noexcept {
  170. const std::basic_string_view<Char> bView(b);
  171. const auto ret = bView.find_first_of(Set);
  172. return ret != std::string::npos ? b + ret : b + bView.size();
  173. }
  174. Char* Set;
  175. };
  176. template <>
  177. struct TFindFirstOf<const char>: public TCompactStrSpn {
  178. inline TFindFirstOf(const char* set, const char* e)
  179. : TCompactStrSpn(set, e)
  180. {
  181. }
  182. inline TFindFirstOf(const char* set)
  183. : TCompactStrSpn(set)
  184. {
  185. }
  186. };
  187. template <class Char>
  188. struct TSetDelimiter: private TFindFirstOf<const Char> {
  189. using TFindFirstOf<const Char>::TFindFirstOf;
  190. inline Char* Find(Char*& b, Char* e) const noexcept {
  191. Char* ret = const_cast<Char*>(this->FindFirstOf(b, e));
  192. if (ret != e) {
  193. b = ret + 1;
  194. return ret;
  195. }
  196. return (b = e);
  197. }
  198. inline Char* Find(Char*& b) const noexcept {
  199. Char* ret = const_cast<Char*>(this->FindFirstOf(b));
  200. if (*ret) {
  201. b = ret + 1;
  202. return ret;
  203. }
  204. return (b = ret);
  205. }
  206. };
  207. namespace NSplitTargetHasPushBack {
  208. Y_HAS_MEMBER(push_back, PushBack);
  209. }
  210. template <class T, class = void>
  211. struct TConsumerBackInserter;
  212. template <class T>
  213. struct TConsumerBackInserter<T, std::enable_if_t<NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> {
  214. static void DoInsert(T* C, const typename T::value_type& i) {
  215. C->push_back(i);
  216. }
  217. };
  218. template <class T>
  219. struct TConsumerBackInserter<T, std::enable_if_t<!NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> {
  220. static void DoInsert(T* C, const typename T::value_type& i) {
  221. C->insert(C->end(), i);
  222. }
  223. };
  224. template <class T>
  225. struct TContainerConsumer {
  226. inline TContainerConsumer(T* c) noexcept
  227. : C(c)
  228. {
  229. }
  230. template <class I>
  231. inline bool Consume(I* b, I* d, I* /*e*/) {
  232. TConsumerBackInserter<T>::DoInsert(C, typename T::value_type(b, d));
  233. return true;
  234. }
  235. T* C;
  236. };
  237. template <class T>
  238. struct TContainerConvertingConsumer {
  239. inline TContainerConvertingConsumer(T* c) noexcept
  240. : C(c)
  241. {
  242. }
  243. template <class I>
  244. inline bool Consume(I* b, I* d, I* /*e*/) {
  245. TConsumerBackInserter<T>::DoInsert(C, FromString<typename T::value_type>(TStringBuf(b, d)));
  246. return true;
  247. }
  248. T* C;
  249. };
  250. template <class S, class I>
  251. struct TLimitingConsumer {
  252. inline TLimitingConsumer(size_t cnt, S* slave) noexcept
  253. : Cnt(cnt ? cnt - 1 : Max<size_t>())
  254. , Slave(slave)
  255. , Last(nullptr)
  256. {
  257. }
  258. inline bool Consume(I* b, I* d, I* e) {
  259. if (!Cnt) {
  260. Last = b;
  261. return false;
  262. }
  263. --Cnt;
  264. return Slave->Consume(b, d, e);
  265. }
  266. size_t Cnt;
  267. S* Slave;
  268. I* Last;
  269. };
  270. template <class S>
  271. struct TSkipEmptyTokens {
  272. inline TSkipEmptyTokens(S* slave) noexcept
  273. : Slave(slave)
  274. {
  275. }
  276. template <class I>
  277. inline bool Consume(I* b, I* d, I* e) {
  278. if (b != d) {
  279. return Slave->Consume(b, d, e);
  280. }
  281. return true;
  282. }
  283. S* Slave;
  284. };
  285. template <class S>
  286. struct TKeepDelimiters {
  287. inline TKeepDelimiters(S* slave) noexcept
  288. : Slave(slave)
  289. {
  290. }
  291. template <class I>
  292. inline bool Consume(I* b, I* d, I* e) {
  293. if (Slave->Consume(b, d, d)) {
  294. if (d != e) {
  295. return Slave->Consume(d, e, e);
  296. }
  297. return true;
  298. }
  299. return false;
  300. }
  301. S* Slave;
  302. };
  303. template <class T>
  304. struct TSimplePusher {
  305. inline bool Consume(char* b, char* d, char*) {
  306. *d = 0;
  307. C->push_back(b);
  308. return true;
  309. }
  310. T* C;
  311. };
  312. template <class T>
  313. static inline void Split(char* buf, char ch, T* res) {
  314. res->resize(0);
  315. if (*buf == 0)
  316. return;
  317. TCharDelimiter<char> delim(ch);
  318. TSimplePusher<T> pusher = {res};
  319. SplitString(buf, delim, pusher);
  320. }
  321. /// Split string into res vector. Res vector is cleared before split.
  322. /// Old good slow split function.
  323. /// Field delimter is any number of symbols specified in delim (no empty strings in res vector)
  324. /// @return number of elements created
  325. size_t Split(const char* in, const char* delim, TVector<TString>& res);
  326. size_t Split(const TString& in, const TString& delim, TVector<TString>& res);
  327. /// Old split reimplemented for TStringBuf using the new code
  328. /// Note that delim can be constructed from char* automatically (it is not cheap though)
  329. inline size_t Split(const TStringBuf s, const TSetDelimiter<const char>& delim, TVector<TStringBuf>& res) {
  330. res.clear();
  331. TContainerConsumer<TVector<TStringBuf>> res1(&res);
  332. TSkipEmptyTokens<TContainerConsumer<TVector<TStringBuf>>> consumer(&res1);
  333. SplitString(s.data(), s.data() + s.size(), delim, consumer);
  334. return res.size();
  335. }
  336. template <class P, class D>
  337. void GetNext(TStringBuf& s, D delim, P& param) {
  338. TStringBuf next = s.NextTok(delim);
  339. Y_ENSURE(next.IsInited(), TStringBuf("Split: number of fields less than number of Split output arguments"));
  340. param = FromString<P>(next);
  341. }
  342. template <class P, class D>
  343. void GetNext(TStringBuf& s, D delim, TMaybe<P>& param) {
  344. TStringBuf next = s.NextTok(delim);
  345. if (next.IsInited()) {
  346. param = FromString<P>(next);
  347. } else {
  348. param.Clear();
  349. }
  350. }
  351. // example:
  352. // Split(TStringBuf("Sherlock,2014,36.6"), ',', name, year, temperature);
  353. template <class D, class P1, class P2>
  354. void Split(TStringBuf s, D delim, P1& p1, P2& p2) {
  355. GetNext(s, delim, p1);
  356. GetNext(s, delim, p2);
  357. Y_ENSURE(!s.IsInited(), TStringBuf("Split: number of fields more than number of Split output arguments"));
  358. }
  359. template <class D, class P1, class P2, class... Other>
  360. void Split(TStringBuf s, D delim, P1& p1, P2& p2, Other&... other) {
  361. GetNext(s, delim, p1);
  362. Split(s, delim, p2, other...);
  363. }
  364. /**
  365. * \fn auto StringSplitter(...)
  366. *
  367. * Creates a string splitter object. The only use for it is to call one of its
  368. * `Split*` methods, and then do something with the resulting proxy range.
  369. *
  370. * Some examples:
  371. * \code
  372. * TVector<TStringBuf> values = StringSplitter("1\t2\t3").Split('\t');
  373. *
  374. * for(TStringBuf part: StringSplitter("1::2::::3").SplitByString("::").SkipEmpty()) {
  375. * Cerr << part;
  376. * }
  377. *
  378. * TVector<TString> firstTwoValues = StringSplitter("1\t2\t3").Split('\t').Take(2);
  379. * \endcode
  380. *
  381. * Use `Collect` or `AddTo` to store split results into an existing container:
  382. * \code
  383. * TVector<TStringBuf> values = {"0"};
  384. * StringSplitter("1\t2\t3").Split('\t').AddTo(&values);
  385. * \endcode
  386. * Note that `Collect` clears target container, while `AddTo` just inserts values.
  387. * You can use these methods with any container that has `emplace` / `emplace_back`.
  388. *
  389. * Use `ParseInto` to also perform string conversions before inserting values
  390. * into target container:
  391. * \code
  392. * TSet<int> values;
  393. * StringSplitter("1\t2\t3").Split('\t').ParseInto(&values);
  394. * \endcode
  395. */
  396. namespace NStringSplitPrivate {
  397. Y_HAS_MEMBER(push_back, PushBack);
  398. Y_HAS_MEMBER(insert, Insert);
  399. Y_HAS_MEMBER(data, Data);
  400. /**
  401. * This one is needed here so that `std::string_view -> std::string_view`
  402. * conversion works.
  403. */
  404. template <class Src, class Dst>
  405. inline void DoFromString(const Src& src, Dst* dst) {
  406. *dst = ::FromString<Dst>(src);
  407. }
  408. template <class T>
  409. inline void DoFromString(const T& src, T* dst) noexcept {
  410. *dst = src;
  411. }
  412. template <class T>
  413. inline void DoFromString(const T& src, decltype(std::ignore)* dst) noexcept {
  414. *dst = src;
  415. }
  416. template <class Src, class Dst>
  417. inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const Src& src, Dst* dst) noexcept {
  418. return ::TryFromString(src, *dst);
  419. }
  420. template <class T>
  421. inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, T* dst) noexcept {
  422. *dst = src;
  423. return true;
  424. }
  425. template <class T>
  426. inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, decltype(std::ignore)* dst) noexcept {
  427. *dst = src;
  428. return true;
  429. }
  430. /**
  431. * Consumer that places provided elements into a container. Not using
  432. * `emplace(iterator)` for efficiency.
  433. */
  434. template <class Container>
  435. struct TContainerConsumer {
  436. using value_type = typename Container::value_type;
  437. TContainerConsumer(Container* c)
  438. : C_(c)
  439. {
  440. }
  441. // TODO: return bool (continue)
  442. template <class StringBuf>
  443. void operator()(StringBuf e) const {
  444. this->operator()(C_, e);
  445. }
  446. private:
  447. template <class OtherContainer, class StringBuf>
  448. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) {
  449. return c->emplace_back(value_type(e));
  450. }
  451. template <class OtherContainer, class StringBuf>
  452. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) {
  453. return c->emplace(value_type(e));
  454. }
  455. Container* C_;
  456. };
  457. /**
  458. * Consumer that converts provided elements via `FromString` and places them
  459. * into a container.
  460. */
  461. template <class Container>
  462. struct TContainerConvertingConsumer {
  463. using value_type = typename Container::value_type;
  464. TContainerConvertingConsumer(Container* c)
  465. : C_(c)
  466. {
  467. }
  468. template <class StringBuf>
  469. void operator()(StringBuf e) const {
  470. this->operator()(C_, e);
  471. }
  472. private:
  473. template <class OtherContainer, class StringBuf>
  474. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) {
  475. value_type v;
  476. DoFromString(e, &v);
  477. return c->emplace_back(std::move(v));
  478. }
  479. template <class OtherContainer, class StringBuf>
  480. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) {
  481. value_type v;
  482. DoFromString(e, &v);
  483. return c->emplace(std::move(v));
  484. }
  485. Container* C_;
  486. };
  487. template <class String>
  488. struct TStringBufOfImpl {
  489. using type = std::conditional_t<
  490. THasData<String>::value,
  491. TBasicStringBuf<typename String::value_type>,
  492. TIteratorRange<typename String::const_iterator>>;
  493. };
  494. template <class Char, class Traits, class Allocator>
  495. struct TStringBufOfImpl<std::basic_string<Char, Traits, Allocator>> {
  496. using type = std::basic_string_view<Char, Traits>;
  497. };
  498. template <class Char, class Traits>
  499. struct TStringBufOfImpl<std::basic_string_view<Char, Traits>> {
  500. using type = std::basic_string_view<Char, Traits>;
  501. };
  502. /**
  503. * Metafunction that returns a string buffer for the given type. This is to
  504. * make sure that splitting `std::string` returns `std::string_view`.
  505. */
  506. template <class String>
  507. using TStringBufOf = typename TStringBufOfImpl<String>::type;
  508. template <class StringBuf, class Iterator>
  509. StringBuf DoMakeStringBuf(Iterator b, Iterator e, StringBuf*) {
  510. return StringBuf(b, e);
  511. }
  512. template <class Char, class Traits, class Iterator>
  513. std::basic_string_view<Char, Traits> DoMakeStringBuf(Iterator b, Iterator e, std::basic_string_view<Char, Traits>*) {
  514. return std::basic_string_view<Char, Traits>(b, e - b);
  515. }
  516. template <class StringBuf, class Iterator>
  517. StringBuf MakeStringBuf(Iterator b, Iterator e) {
  518. return DoMakeStringBuf(b, e, static_cast<StringBuf*>(nullptr));
  519. }
  520. template <class String>
  521. struct TIteratorOfImpl {
  522. using type = std::conditional_t<
  523. THasData<String>::value,
  524. const typename String::value_type*,
  525. typename String::const_iterator>;
  526. };
  527. template <class String>
  528. using TIteratorOf = typename TIteratorOfImpl<String>::type;
  529. template <class String>
  530. class TStringSplitter;
  531. template <class String>
  532. struct TIterState: public TStringBufOf<String> {
  533. public:
  534. using TStringBufType = TStringBufOf<String>;
  535. using TIterator = TIteratorOf<String>;
  536. friend class TStringSplitter<String>;
  537. TIterState(const String& string) noexcept
  538. : TStringBufType()
  539. , DelimiterEnd_(std::begin(string))
  540. , OriginEnd_(std::end(string))
  541. {
  542. }
  543. template <
  544. typename Other,
  545. typename = std::enable_if_t<
  546. std::is_convertible<Other, TStringBufType>::value>>
  547. bool operator==(const Other& toCompare) const {
  548. return TStringBufType(*this) == TStringBufType(toCompare);
  549. }
  550. TIterator TokenStart() const noexcept {
  551. return this->begin();
  552. }
  553. TIterator TokenDelim() const noexcept {
  554. return this->end();
  555. }
  556. TStringBufType Token() const noexcept {
  557. return *this;
  558. }
  559. TStringBufType Delim() const noexcept {
  560. return MakeStringBuf<TStringBufType>(TokenDelim(), DelimiterEnd_);
  561. }
  562. private:
  563. void UpdateParentBuf(TIterator tokenStart, TIterator tokenDelim) noexcept {
  564. *static_cast<TStringBufType*>(this) = MakeStringBuf<TStringBufType>(tokenStart, tokenDelim);
  565. }
  566. bool DelimiterIsEmpty() const noexcept {
  567. return TokenDelim() == DelimiterEnd_;
  568. }
  569. private:
  570. TIterator DelimiterEnd_;
  571. const TIterator OriginEnd_;
  572. };
  573. template <class Base>
  574. class TSplitRange: public Base, public TInputRangeAdaptor<TSplitRange<Base>> {
  575. using TStringBufType = decltype(std::declval<Base>().Next()->Token());
  576. public:
  577. template <typename... Args>
  578. inline TSplitRange(Args&&... args)
  579. : Base(std::forward<Args>(args)...)
  580. {
  581. }
  582. template <class Consumer, std::enable_if_t<std::is_same<decltype(std::declval<Consumer>()(std::declval<TStringBufType>())), void>::value, int>* = nullptr>
  583. inline void Consume(Consumer&& f) {
  584. for (auto&& it : *this) {
  585. f(it.Token());
  586. }
  587. }
  588. template <class Consumer, std::enable_if_t<std::is_same<decltype(std::declval<Consumer>()(std::declval<TStringBufType>())), bool>::value, int>* = nullptr>
  589. inline bool Consume(Consumer&& f) {
  590. for (auto&& it : *this) {
  591. if (!f(it.Token())) {
  592. return false;
  593. }
  594. }
  595. return true;
  596. }
  597. template <class Container, class = std::enable_if_t<THasInsert<Container>::value || THasPushBack<Container>::value>>
  598. operator Container() {
  599. Container result;
  600. AddTo(&result);
  601. return result;
  602. }
  603. template <class S>
  604. inline TVector<S> ToList() {
  605. TVector<S> result;
  606. for (auto&& it : *this) {
  607. result.push_back(S(it.Token()));
  608. }
  609. return result;
  610. }
  611. template <class Container>
  612. inline void Collect(Container* c) {
  613. Y_ASSERT(c);
  614. c->clear();
  615. AddTo(c);
  616. }
  617. template <class Container>
  618. inline void AddTo(Container* c) {
  619. Y_ASSERT(c);
  620. TContainerConsumer<Container> consumer(c);
  621. Consume(consumer);
  622. }
  623. template <class Container>
  624. inline void ParseInto(Container* c) {
  625. Y_ASSERT(c);
  626. TContainerConvertingConsumer<Container> consumer(c);
  627. Consume(consumer);
  628. }
  629. // TODO: this is actually TryParseInto
  630. /**
  631. * Same as `CollectInto`, just doesn't throw.
  632. *
  633. * \param[out] args Output arguments.
  634. * \returns Whether parsing was successful.
  635. */
  636. template <typename... Args>
  637. inline bool TryCollectInto(Args*... args) noexcept {
  638. size_t successfullyFilled = 0;
  639. auto it = this->begin();
  640. //FIXME: actually, some kind of TryApplyToMany is needed in order to stop iteration upon first failure
  641. ApplyToMany([&](auto&& arg) {
  642. if (it != this->end()) {
  643. if (TryDoFromString(it->Token(), arg)) {
  644. ++successfullyFilled;
  645. }
  646. ++it;
  647. }
  648. }, args...);
  649. return successfullyFilled == sizeof...(args) && it == this->end();
  650. }
  651. // TODO: this is actually ParseInto
  652. /**
  653. * Splits and parses everything that's in this splitter into `args`.
  654. *
  655. * Example usage:
  656. * \code
  657. * int l, r;
  658. * StringSplitter("100*200").Split('*').CollectInto(&l, &r);
  659. * \endcode
  660. *
  661. * \param[out] args Output arguments.
  662. * \throws If not all items were parsed, or
  663. * if there were too many items in the split.
  664. */
  665. template <typename... Args>
  666. inline void CollectInto(Args*... args) {
  667. Y_ENSURE(TryCollectInto<Args...>(args...));
  668. }
  669. inline size_t Count() {
  670. size_t cnt = 0;
  671. for (auto&& it : *this) {
  672. Y_UNUSED(it);
  673. ++cnt;
  674. }
  675. return cnt;
  676. }
  677. };
  678. template <class String>
  679. class TStringSplitter {
  680. using TStringType = String;
  681. using TChar = typename TStringType::value_type;
  682. using TIteratorState = TIterState<TStringType>;
  683. using TStringBufType = typename TIteratorState::TStringBufType;
  684. using TIterator = typename TIteratorState::TIterator;
  685. /**
  686. * Base class for all split ranges that actually does the splitting.
  687. */
  688. template <class DelimStorage>
  689. struct TSplitRangeBase {
  690. template <class OtherString, class... Args>
  691. inline TSplitRangeBase(OtherString&& s, Args&&... args)
  692. : String_(std::forward<OtherString>(s))
  693. , State_(String_)
  694. , Delimiter_(std::forward<Args>(args)...)
  695. {
  696. }
  697. inline TIteratorState* Next() {
  698. if (State_.DelimiterIsEmpty()) {
  699. return nullptr;
  700. }
  701. const auto tokenBegin = State_.DelimiterEnd_;
  702. const auto tokenEnd = Delimiter_.Ptr()->Find(State_.DelimiterEnd_, State_.OriginEnd_);
  703. State_.UpdateParentBuf(tokenBegin, tokenEnd);
  704. return &State_;
  705. }
  706. private:
  707. TStringType String_;
  708. TIteratorState State_;
  709. DelimStorage Delimiter_;
  710. };
  711. template <class Base, class Filter>
  712. struct TFilterRange: public Base {
  713. template <class... Args>
  714. inline TFilterRange(const Base& base, Args&&... args)
  715. : Base(base)
  716. , Filter_(std::forward<Args>(args)...)
  717. {
  718. }
  719. inline TIteratorState* Next() {
  720. TIteratorState* ret;
  721. do {
  722. ret = Base::Next();
  723. } while (ret && !Filter_.Accept(ret));
  724. return ret;
  725. }
  726. Filter Filter_;
  727. };
  728. struct TNonEmptyFilter {
  729. template <class TToken>
  730. inline bool Accept(const TToken* token) noexcept {
  731. return !token->empty();
  732. }
  733. };
  734. template <class TIter>
  735. struct TStopIteration;
  736. template <class Base>
  737. struct TFilters: public Base {
  738. template <class TFilter>
  739. using TIt = TSplitRange<TStopIteration<TFilters<TFilterRange<Base, TFilter>>>>;
  740. template <typename... Args>
  741. inline TFilters(Args&&... args)
  742. : Base(std::forward<Args>(args)...)
  743. {
  744. }
  745. inline TIt<TNonEmptyFilter> SkipEmpty() const {
  746. return {*this};
  747. }
  748. };
  749. template <class Base, class Stopper>
  750. struct TStopRange: public Base {
  751. template <typename... Args>
  752. inline TStopRange(const Base& base, Args&&... args)
  753. : Base(base)
  754. , Stopper_(std::forward<Args>(args)...)
  755. {
  756. }
  757. inline TIteratorState* Next() {
  758. TIteratorState* ret = Base::Next();
  759. if (!ret || Stopper_.Stop(ret)) {
  760. return nullptr;
  761. }
  762. return ret;
  763. }
  764. Stopper Stopper_;
  765. };
  766. struct TTake {
  767. TTake() = default;
  768. TTake(size_t count)
  769. : Count(count)
  770. {
  771. }
  772. template <class TToken>
  773. inline bool Stop(TToken*) noexcept {
  774. if (Count > 0) {
  775. --Count;
  776. return false;
  777. } else {
  778. return true;
  779. }
  780. }
  781. size_t Count = 0;
  782. };
  783. struct TLimit {
  784. TLimit() = default;
  785. TLimit(size_t count)
  786. : Count(count)
  787. {
  788. Y_ASSERT(Count > 0);
  789. }
  790. template <class TToken>
  791. inline bool Stop(TToken* token) noexcept {
  792. if (Count > 1) {
  793. --Count;
  794. return false;
  795. } else if (Count == 1) {
  796. token->DelimiterEnd_ = token->OriginEnd_;
  797. token->UpdateParentBuf(token->TokenStart(), token->DelimiterEnd_);
  798. return false;
  799. }
  800. return true;
  801. }
  802. size_t Count = 0;
  803. };
  804. template <class Base>
  805. struct TStopIteration: public Base {
  806. template <class TStopper>
  807. using TIt = TSplitRange<TStopIteration<TFilters<TStopRange<Base, TStopper>>>>;
  808. template <typename... Args>
  809. inline TStopIteration(Args&&... args)
  810. : Base(std::forward<Args>(args)...)
  811. {
  812. }
  813. inline TIt<TTake> Take(size_t count) {
  814. return {*this, count};
  815. }
  816. inline TIt<TLimit> Limit(size_t count) {
  817. return {*this, count};
  818. }
  819. };
  820. template <class TPolicy>
  821. using TIt = TSplitRange<TStopIteration<TFilters<TSplitRangeBase<TPolicy>>>>;
  822. public:
  823. template <class OtherString>
  824. explicit TStringSplitter(OtherString&& s)
  825. : String_(std::forward<OtherString>(s))
  826. {
  827. }
  828. //does not own TDelim
  829. template <class TDelim>
  830. inline TIt<TPtrPolicy<const TDelim>> Split(const TDelim& d) const noexcept {
  831. return {String_, &d};
  832. }
  833. inline TIt<TEmbedPolicy<TCharDelimiter<const TChar>>> Split(TChar ch) const noexcept {
  834. return {String_, ch};
  835. }
  836. inline TIt<TSimpleRefPolicy<TSetDelimiter<const TChar>>> SplitBySet(const TChar* set) const noexcept {
  837. return {String_, set};
  838. }
  839. inline TIt<TEmbedPolicy<TStringDelimiter<const TChar>>> SplitByString(const TStringBufType& str) const noexcept {
  840. return {String_, str.data(), str.size()};
  841. }
  842. template <class TFunc>
  843. inline TIt<TEmbedPolicy<TFuncDelimiter<TIterator, TFunc>>> SplitByFunc(TFunc f) const noexcept {
  844. return {String_, f};
  845. }
  846. private:
  847. TStringType String_;
  848. };
  849. template <class String>
  850. auto MakeStringSplitter(String&& s) {
  851. return TStringSplitter<std::remove_reference_t<String>>(std::forward<String>(s));
  852. }
  853. }
  854. template <class Iterator>
  855. auto StringSplitter(Iterator begin, Iterator end) {
  856. return ::NStringSplitPrivate::MakeStringSplitter(TIteratorRange<Iterator>(begin, end));
  857. }
  858. template <class Char>
  859. auto StringSplitter(const Char* begin, const Char* end) {
  860. return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(begin, end));
  861. }
  862. template <class Char>
  863. auto StringSplitter(const Char* begin, size_t len) {
  864. return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(begin, len));
  865. }
  866. template <class Char>
  867. auto StringSplitter(const Char* str) {
  868. return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(str));
  869. }
  870. template <class String, std::enable_if_t<!std::is_pointer<std::remove_reference_t<String>>::value, int> = 0>
  871. auto StringSplitter(String& s) {
  872. return ::NStringSplitPrivate::MakeStringSplitter(::NStringSplitPrivate::TStringBufOf<String>(s.data(), s.size()));
  873. }
  874. template <class String, std::enable_if_t<!std::is_pointer<std::remove_reference_t<String>>::value, int> = 0>
  875. auto StringSplitter(String&& s) {
  876. return ::NStringSplitPrivate::MakeStringSplitter(std::move(s));
  877. }