split.h 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105
  1. #pragma once
  2. #include "strspn.h"
  3. #include "cast.h"
  4. #include <util/generic/algorithm.h>
  5. #include <util/generic/fwd.h>
  6. #include <util/generic/iterator.h>
  7. #include <util/generic/iterator_range.h>
  8. #include <util/generic/store_policy.h>
  9. #include <util/generic/strbuf.h>
  10. #include <util/generic/string.h>
  11. #include <util/generic/typetraits.h>
  12. #include <util/generic/vector.h>
  13. #include <util/generic/ylimits.h>
  14. #include <util/system/compat.h>
  15. #include <util/system/defaults.h>
  16. #include <utility>
  17. #include <stlfwd>
  18. // NOTE: Check StringSplitter below to get more convenient split string interface.
  19. namespace NStringSplitPrivate {
  20. template <class T, class I, class = void>
  21. struct TIsConsumer: std::false_type {};
  22. template <class T, class I>
  23. struct TIsConsumer<
  24. T, I,
  25. TVoidT<decltype(std::declval<T>().Consume(
  26. std::declval<I>(), std::declval<I>(), std::declval<I>()))>>
  27. : std::true_type {};
  28. template <class T, class I>
  29. constexpr bool TIsConsumerV = TIsConsumer<T, I>::value;
  30. template <class T>
  31. T* Find(T* str, std::common_type_t<T> ch) {
  32. for (; *str; ++str) {
  33. if (*str == ch) {
  34. return str;
  35. }
  36. }
  37. return nullptr;
  38. }
  39. } // namespace NStringSplitPrivate
  40. template <class I, class TDelim, class TConsumer>
  41. std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>>
  42. SplitString(I b, I e, const TDelim& d, TConsumer&& c) {
  43. I l, i;
  44. do {
  45. l = b;
  46. i = d.Find(b, e);
  47. } while (c.Consume(l, i, b) && (b != i));
  48. }
  49. template <class I, class TDelim, class TConsumer>
  50. std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>>
  51. SplitString(I b, const TDelim& d, TConsumer&& c) {
  52. I l, i;
  53. do {
  54. l = b;
  55. i = d.Find(b);
  56. } while (c.Consume(l, i, b) && (b != i));
  57. }
  58. template <class I1, class I2>
  59. static inline I1* FastStrChr(I1* str, I2 f) noexcept {
  60. I1* ret = NStringSplitPrivate::Find(str, f);
  61. if (!ret) {
  62. ret = str + std::char_traits<I1>::length(str);
  63. }
  64. return ret;
  65. }
  66. template <class I>
  67. static inline I* FastStrStr(I* str, I* f, size_t l) noexcept {
  68. std::basic_string_view<I> strView(str);
  69. const auto ret = strView.find(*f);
  70. if (ret != std::string::npos) {
  71. std::basic_string_view<I> fView(f, l);
  72. strView = strView.substr(ret);
  73. for (; strView.size() >= l; strView = strView.substr(1)) {
  74. if (strView.substr(0, l) == fView) {
  75. break;
  76. }
  77. }
  78. return strView.size() >= l ? strView.data() : strView.data() + strView.size();
  79. } else {
  80. return strView.data() + strView.size();
  81. }
  82. }
  83. template <class Char>
  84. struct TStringDelimiter {
  85. inline TStringDelimiter(Char* delim) noexcept
  86. : Delim(delim)
  87. , Len(std::char_traits<Char>::length(delim))
  88. {
  89. }
  90. inline TStringDelimiter(Char* delim, size_t len) noexcept
  91. : Delim(delim)
  92. , Len(len)
  93. {
  94. }
  95. inline Char* Find(Char*& b, Char* e) const noexcept {
  96. const auto ret = std::basic_string_view<Char>(b, e - b).find(Delim, 0, Len);
  97. if (ret != std::string::npos) {
  98. const auto result = b + ret;
  99. b = result + Len;
  100. return result;
  101. }
  102. return (b = e);
  103. }
  104. inline Char* Find(Char*& b) const noexcept {
  105. Char* ret = FastStrStr(b, Delim, Len);
  106. b = *ret ? ret + Len : ret;
  107. return ret;
  108. }
  109. Char* Delim;
  110. const size_t Len;
  111. };
  112. template <class Char>
  113. struct TCharDelimiter {
  114. inline TCharDelimiter(Char ch) noexcept
  115. : Ch(ch)
  116. {
  117. }
  118. inline Char* Find(Char*& b, Char* e) const noexcept {
  119. const auto ret = std::basic_string_view<Char>(b, e - b).find(Ch);
  120. if (ret != std::string::npos) {
  121. const auto result = b + ret;
  122. b = result + 1;
  123. return result;
  124. }
  125. return (b = e);
  126. }
  127. inline Char* Find(Char*& b) const noexcept {
  128. Char* ret = FastStrChr(b, Ch);
  129. if (*ret) {
  130. b = ret + 1;
  131. } else {
  132. b = ret;
  133. }
  134. return ret;
  135. }
  136. Char Ch;
  137. };
  138. template <class Iterator, class Condition>
  139. struct TFuncDelimiter {
  140. public:
  141. template <class... Args>
  142. TFuncDelimiter(Args&&... args)
  143. : Fn(std::forward<Args>(args)...)
  144. {
  145. }
  146. inline Iterator Find(Iterator& b, Iterator e) const noexcept {
  147. if ((b = std::find_if(b, e, Fn)) != e) {
  148. return b++;
  149. }
  150. return b;
  151. }
  152. private:
  153. Condition Fn;
  154. };
  155. template <class Char>
  156. struct TFindFirstOf {
  157. inline TFindFirstOf(Char* set)
  158. : Set(set)
  159. {
  160. }
  161. inline Char* FindFirstOf(Char* b, Char* e) const noexcept {
  162. Char* ret = b;
  163. for (; ret != e; ++ret) {
  164. if (NStringSplitPrivate::Find(Set, *ret)) {
  165. break;
  166. }
  167. }
  168. return ret;
  169. }
  170. inline Char* FindFirstOf(Char* b) const noexcept {
  171. const std::basic_string_view<Char> bView(b);
  172. const auto ret = bView.find_first_of(Set);
  173. return ret != std::string::npos ? b + ret : b + bView.size();
  174. }
  175. Char* Set;
  176. };
  177. template <>
  178. struct TFindFirstOf<const char>: public TCompactStrSpn {
  179. inline TFindFirstOf(const char* set, const char* e)
  180. : TCompactStrSpn(set, e)
  181. {
  182. }
  183. inline TFindFirstOf(const char* set)
  184. : TCompactStrSpn(set)
  185. {
  186. }
  187. };
  188. template <class Char>
  189. struct TSetDelimiter: private TFindFirstOf<const Char> {
  190. using TFindFirstOf<const Char>::TFindFirstOf;
  191. inline Char* Find(Char*& b, Char* e) const noexcept {
  192. Char* ret = const_cast<Char*>(this->FindFirstOf(b, e));
  193. if (ret != e) {
  194. b = ret + 1;
  195. return ret;
  196. }
  197. return (b = e);
  198. }
  199. inline Char* Find(Char*& b) const noexcept {
  200. Char* ret = const_cast<Char*>(this->FindFirstOf(b));
  201. if (*ret) {
  202. b = ret + 1;
  203. return ret;
  204. }
  205. return (b = ret);
  206. }
  207. };
  208. namespace NSplitTargetHasPushBack {
  209. Y_HAS_MEMBER(push_back, PushBack);
  210. } // namespace NSplitTargetHasPushBack
  211. template <class T, class = void>
  212. struct TConsumerBackInserter;
  213. template <class T>
  214. struct TConsumerBackInserter<T, std::enable_if_t<NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> {
  215. static void DoInsert(T* C, const typename T::value_type& i) {
  216. C->push_back(i);
  217. }
  218. };
  219. template <class T>
  220. struct TConsumerBackInserter<T, std::enable_if_t<!NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> {
  221. static void DoInsert(T* C, const typename T::value_type& i) {
  222. C->insert(C->end(), i);
  223. }
  224. };
  225. template <class T>
  226. struct TContainerConsumer {
  227. inline TContainerConsumer(T* c) noexcept
  228. : C(c)
  229. {
  230. }
  231. template <class I>
  232. inline bool Consume(I* b, I* d, I* /*e*/) {
  233. TConsumerBackInserter<T>::DoInsert(C, typename T::value_type(b, d));
  234. return true;
  235. }
  236. T* C;
  237. };
  238. template <class T>
  239. struct TContainerConvertingConsumer {
  240. inline TContainerConvertingConsumer(T* c) noexcept
  241. : C(c)
  242. {
  243. }
  244. template <class I>
  245. inline bool Consume(I* b, I* d, I* /*e*/) {
  246. TConsumerBackInserter<T>::DoInsert(C, FromString<typename T::value_type>(TStringBuf(b, d)));
  247. return true;
  248. }
  249. T* C;
  250. };
  251. template <class S, class I>
  252. struct TLimitingConsumer {
  253. inline TLimitingConsumer(size_t cnt, S* slave) noexcept
  254. : Cnt(cnt ? cnt - 1 : Max<size_t>())
  255. , Slave(slave)
  256. , Last(nullptr)
  257. {
  258. }
  259. inline bool Consume(I* b, I* d, I* e) {
  260. if (!Cnt) {
  261. Last = b;
  262. return false;
  263. }
  264. --Cnt;
  265. return Slave->Consume(b, d, e);
  266. }
  267. size_t Cnt;
  268. S* Slave;
  269. I* Last;
  270. };
  271. template <class S>
  272. struct TSkipEmptyTokens {
  273. inline TSkipEmptyTokens(S* slave) noexcept
  274. : Slave(slave)
  275. {
  276. }
  277. template <class I>
  278. inline bool Consume(I* b, I* d, I* e) {
  279. if (b != d) {
  280. return Slave->Consume(b, d, e);
  281. }
  282. return true;
  283. }
  284. S* Slave;
  285. };
  286. template <class S>
  287. struct TKeepDelimiters {
  288. inline TKeepDelimiters(S* slave) noexcept
  289. : Slave(slave)
  290. {
  291. }
  292. template <class I>
  293. inline bool Consume(I* b, I* d, I* e) {
  294. if (Slave->Consume(b, d, d)) {
  295. if (d != e) {
  296. return Slave->Consume(d, e, e);
  297. }
  298. return true;
  299. }
  300. return false;
  301. }
  302. S* Slave;
  303. };
  304. template <class T>
  305. struct TSimplePusher {
  306. inline bool Consume(char* b, char* d, char*) {
  307. *d = 0;
  308. C->push_back(b);
  309. return true;
  310. }
  311. T* C;
  312. };
  313. template <class T>
  314. static inline void Split(char* buf, char ch, T* res) {
  315. res->resize(0);
  316. if (*buf == 0) {
  317. return;
  318. }
  319. TCharDelimiter<char> delim(ch);
  320. TSimplePusher<T> pusher = {res};
  321. SplitString(buf, delim, pusher);
  322. }
  323. /// Split string into res vector. Res vector is cleared before split.
  324. /// Old good slow split function.
  325. /// Field delimter is any number of symbols specified in delim (no empty strings in res vector)
  326. /// @return number of elements created
  327. size_t Split(const char* in, const char* delim, TVector<TString>& res);
  328. size_t Split(const TString& in, const TString& delim, TVector<TString>& res);
  329. /// Old split reimplemented for TStringBuf using the new code
  330. /// Note that delim can be constructed from char* automatically (it is not cheap though)
  331. inline size_t Split(const TStringBuf s, const TSetDelimiter<const char>& delim, TVector<TStringBuf>& res) {
  332. res.clear();
  333. TContainerConsumer<TVector<TStringBuf>> res1(&res);
  334. TSkipEmptyTokens<TContainerConsumer<TVector<TStringBuf>>> consumer(&res1);
  335. SplitString(s.data(), s.data() + s.size(), delim, consumer);
  336. return res.size();
  337. }
  338. template <class P, class D>
  339. void GetNext(TStringBuf& s, D delim, P& param) {
  340. TStringBuf next = s.NextTok(delim);
  341. Y_ENSURE(next.IsInited(), TStringBuf("Split: number of fields less than number of Split output arguments"));
  342. param = FromString<P>(next);
  343. }
  344. template <class P, class D>
  345. void GetNext(TStringBuf& s, D delim, TMaybe<P>& param) {
  346. TStringBuf next = s.NextTok(delim);
  347. if (next.IsInited()) {
  348. param = FromString<P>(next);
  349. } else {
  350. param.Clear();
  351. }
  352. }
  353. // example:
  354. // Split(TStringBuf("Sherlock,2014,36.6"), ',', name, year, temperature);
  355. template <class D, class P1, class P2>
  356. void Split(TStringBuf s, D delim, P1& p1, P2& p2) {
  357. GetNext(s, delim, p1);
  358. GetNext(s, delim, p2);
  359. Y_ENSURE(!s.IsInited(), TStringBuf("Split: number of fields more than number of Split output arguments"));
  360. }
  361. template <class D, class P1, class P2, class... Other>
  362. void Split(TStringBuf s, D delim, P1& p1, P2& p2, Other&... other) {
  363. GetNext(s, delim, p1);
  364. Split(s, delim, p2, other...);
  365. }
  366. /**
  367. * \fn auto StringSplitter(...)
  368. *
  369. * Creates a string splitter object. The only use for it is to call one of its
  370. * `Split*` methods, and then do something with the resulting proxy range.
  371. *
  372. * Some examples:
  373. * \code
  374. * TVector<TStringBuf> values = StringSplitter("1\t2\t3").Split('\t');
  375. *
  376. * for(TStringBuf part: StringSplitter("1::2::::3").SplitByString("::").SkipEmpty()) {
  377. * Cerr << part;
  378. * }
  379. *
  380. * TVector<TString> firstTwoValues = StringSplitter("1\t2\t3").Split('\t').Take(2);
  381. * \endcode
  382. *
  383. * Use `Collect` or `AddTo` to store split results into an existing container:
  384. * \code
  385. * TVector<TStringBuf> values = {"0"};
  386. * StringSplitter("1\t2\t3").Split('\t').AddTo(&values);
  387. * \endcode
  388. * Note that `Collect` clears target container, while `AddTo` just inserts values.
  389. * You can use these methods with any container that has `emplace` / `emplace_back`.
  390. *
  391. * Use `ParseInto` to also perform string conversions before inserting values
  392. * into target container:
  393. * \code
  394. * TSet<int> values;
  395. * StringSplitter("1\t2\t3").Split('\t').ParseInto(&values);
  396. * \endcode
  397. */
  398. namespace NStringSplitPrivate {
  399. Y_HAS_MEMBER(push_back, PushBack);
  400. Y_HAS_MEMBER(insert, Insert);
  401. Y_HAS_MEMBER(data, Data);
  402. /**
  403. * This one is needed here so that `std::string_view -> std::string_view`
  404. * conversion works.
  405. */
  406. template <class Src, class Dst>
  407. inline void DoFromString(const Src& src, Dst* dst) {
  408. *dst = ::FromString<Dst>(src);
  409. }
  410. template <class T>
  411. inline void DoFromString(const T& src, T* dst) noexcept {
  412. *dst = src;
  413. }
  414. template <class T>
  415. inline void DoFromString(const T& src, decltype(std::ignore)* dst) noexcept {
  416. *dst = src;
  417. }
  418. template <class Src, class Dst>
  419. inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const Src& src, Dst* dst) noexcept {
  420. return ::TryFromString(src, *dst);
  421. }
  422. template <class T>
  423. inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, T* dst) noexcept {
  424. *dst = src;
  425. return true;
  426. }
  427. template <class T>
  428. inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, decltype(std::ignore)* dst) noexcept {
  429. *dst = src;
  430. return true;
  431. }
  432. /**
  433. * Consumer that places provided elements into a container. Not using
  434. * `emplace(iterator)` for efficiency.
  435. */
  436. template <class Container>
  437. struct TContainerConsumer {
  438. using value_type = typename Container::value_type;
  439. TContainerConsumer(Container* c)
  440. : C_(c)
  441. {
  442. }
  443. // TODO: return bool (continue)
  444. template <class StringBuf>
  445. void operator()(StringBuf e) const {
  446. this->operator()(C_, e);
  447. }
  448. private:
  449. template <class OtherContainer, class StringBuf>
  450. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) {
  451. return c->emplace_back(value_type(e));
  452. }
  453. template <class OtherContainer, class StringBuf>
  454. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) {
  455. return c->emplace(value_type(e));
  456. }
  457. Container* C_;
  458. };
  459. /**
  460. * Consumer that converts provided elements via `FromString` and places them
  461. * into a container.
  462. */
  463. template <class Container>
  464. struct TContainerConvertingConsumer {
  465. using value_type = typename Container::value_type;
  466. TContainerConvertingConsumer(Container* c)
  467. : C_(c)
  468. {
  469. }
  470. template <class StringBuf>
  471. void operator()(StringBuf e) const {
  472. this->operator()(C_, e);
  473. }
  474. private:
  475. template <class OtherContainer, class StringBuf>
  476. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) {
  477. value_type v;
  478. DoFromString(e, &v);
  479. return c->emplace_back(std::move(v));
  480. }
  481. template <class OtherContainer, class StringBuf>
  482. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) {
  483. value_type v;
  484. DoFromString(e, &v);
  485. return c->emplace(std::move(v));
  486. }
  487. Container* C_;
  488. };
  489. template <class String>
  490. struct TStringBufOfImpl {
  491. using type = std::conditional_t<
  492. THasData<String>::value,
  493. TBasicStringBuf<typename String::value_type>,
  494. TIteratorRange<typename String::const_iterator>>;
  495. };
  496. template <class Char, class Traits, class Allocator>
  497. struct TStringBufOfImpl<std::basic_string<Char, Traits, Allocator>> {
  498. using type = std::basic_string_view<Char, Traits>;
  499. };
  500. template <class Char, class Traits>
  501. struct TStringBufOfImpl<std::basic_string_view<Char, Traits>> {
  502. using type = std::basic_string_view<Char, Traits>;
  503. };
  504. /**
  505. * Metafunction that returns a string buffer for the given type. This is to
  506. * make sure that splitting `std::string` returns `std::string_view`.
  507. */
  508. template <class String>
  509. using TStringBufOf = typename TStringBufOfImpl<String>::type;
  510. template <class StringBuf, class Iterator>
  511. StringBuf DoMakeStringBuf(Iterator b, Iterator e, StringBuf*) {
  512. return StringBuf(b, e);
  513. }
  514. template <class Char, class Traits, class Iterator>
  515. std::basic_string_view<Char, Traits> DoMakeStringBuf(Iterator b, Iterator e, std::basic_string_view<Char, Traits>*) {
  516. return std::basic_string_view<Char, Traits>(b, e - b);
  517. }
  518. template <class StringBuf, class Iterator>
  519. StringBuf MakeStringBuf(Iterator b, Iterator e) {
  520. return DoMakeStringBuf(b, e, static_cast<StringBuf*>(nullptr));
  521. }
  522. template <class String>
  523. struct TIteratorOfImpl {
  524. using type = std::conditional_t<
  525. THasData<String>::value,
  526. const typename String::value_type*,
  527. typename String::const_iterator>;
  528. };
  529. template <class String>
  530. using TIteratorOf = typename TIteratorOfImpl<String>::type;
  531. template <class String>
  532. class TStringSplitter;
  533. template <class String>
  534. struct TIterState: public TStringBufOf<String> {
  535. public:
  536. using TStringBufType = TStringBufOf<String>;
  537. using TIterator = TIteratorOf<String>;
  538. friend class TStringSplitter<String>;
  539. template <typename S = String, std::enable_if_t<THasData<S>::value, int> = 0>
  540. TIterState(const String& string) noexcept
  541. : TStringBufType()
  542. , DelimiterEnd_(string.data())
  543. , OriginEnd_(string.data() + string.size())
  544. {
  545. }
  546. template <typename S = String, std::enable_if_t<!THasData<S>::value, int> = 0>
  547. TIterState(const String& string) noexcept
  548. : TStringBufType()
  549. , DelimiterEnd_(std::begin(string))
  550. , OriginEnd_(std::end(string))
  551. {
  552. }
  553. template <
  554. typename Other,
  555. typename = std::enable_if_t<
  556. std::is_convertible<Other, TStringBufType>::value>>
  557. bool operator==(const Other& toCompare) const {
  558. return TStringBufType(*this) == TStringBufType(toCompare);
  559. }
  560. TIterator TokenStart() const noexcept {
  561. return this->begin();
  562. }
  563. TIterator TokenDelim() const noexcept {
  564. return this->end();
  565. }
  566. TStringBufType Token() const noexcept {
  567. return *this;
  568. }
  569. TStringBufType Delim() const noexcept {
  570. return MakeStringBuf<TStringBufType>(TokenDelim(), DelimiterEnd_);
  571. }
  572. private:
  573. void UpdateParentBuf(TIterator tokenStart, TIterator tokenDelim) noexcept {
  574. *static_cast<TStringBufType*>(this) = MakeStringBuf<TStringBufType>(tokenStart, tokenDelim);
  575. }
  576. bool DelimiterIsEmpty() const noexcept {
  577. return TokenDelim() == DelimiterEnd_;
  578. }
  579. void MarkExhausted() noexcept {
  580. UpdateParentBuf(OriginEnd_, OriginEnd_);
  581. DelimiterEnd_ = OriginEnd_;
  582. }
  583. private:
  584. TIterator DelimiterEnd_;
  585. const TIterator OriginEnd_;
  586. };
  587. template <class Base>
  588. class TSplitRange: public Base, public TInputRangeAdaptor<TSplitRange<Base>> {
  589. using TStringBufType = decltype(std::declval<Base>().Next()->Token());
  590. public:
  591. template <typename... Args>
  592. inline TSplitRange(Args&&... args)
  593. : Base(std::forward<Args>(args)...)
  594. {
  595. }
  596. template <class Consumer, std::enable_if_t<std::is_same<decltype(std::declval<Consumer>()(std::declval<TStringBufType>())), void>::value, int>* = nullptr>
  597. inline void Consume(Consumer&& f) {
  598. for (auto&& it : *this) {
  599. f(it.Token());
  600. }
  601. }
  602. template <class Consumer, std::enable_if_t<std::is_same<decltype(std::declval<Consumer>()(std::declval<TStringBufType>())), bool>::value, int>* = nullptr>
  603. inline bool Consume(Consumer&& f) {
  604. for (auto&& it : *this) {
  605. if (!f(it.Token())) {
  606. return false;
  607. }
  608. }
  609. return true;
  610. }
  611. template <class Container, class = std::enable_if_t<THasInsert<Container>::value || THasPushBack<Container>::value>>
  612. operator Container() {
  613. Container result;
  614. AddTo(&result);
  615. return result;
  616. }
  617. template <class S>
  618. inline TVector<S> ToList() {
  619. TVector<S> result;
  620. for (auto&& it : *this) {
  621. result.push_back(S(it.Token()));
  622. }
  623. return result;
  624. }
  625. template <class Container>
  626. inline void Collect(Container* c) {
  627. Y_ASSERT(c);
  628. c->clear();
  629. AddTo(c);
  630. }
  631. template <class Container>
  632. inline void AddTo(Container* c) {
  633. Y_ASSERT(c);
  634. TContainerConsumer<Container> consumer(c);
  635. Consume(consumer);
  636. }
  637. template <class Container>
  638. inline void ParseInto(Container* c) {
  639. Y_ASSERT(c);
  640. TContainerConvertingConsumer<Container> consumer(c);
  641. Consume(consumer);
  642. }
  643. // TODO: this is actually TryParseInto
  644. /**
  645. * Same as `CollectInto`, just doesn't throw.
  646. *
  647. * \param[out] args Output arguments.
  648. * \returns Whether parsing was successful.
  649. */
  650. template <typename... Args>
  651. inline bool TryCollectInto(Args*... args) noexcept {
  652. size_t successfullyFilled = 0;
  653. auto it = this->begin();
  654. // FIXME: actually, some kind of TryApplyToMany is needed in order to stop iteration upon first failure
  655. ApplyToMany([&](auto&& arg) {
  656. if (it != this->end()) {
  657. if (TryDoFromString(it->Token(), arg)) {
  658. ++successfullyFilled;
  659. }
  660. ++it;
  661. }
  662. }, args...);
  663. return successfullyFilled == sizeof...(args) && it == this->end();
  664. }
  665. // TODO: this is actually ParseInto
  666. /**
  667. * Splits and parses everything that's in this splitter into `args`.
  668. *
  669. * Example usage:
  670. * \code
  671. * int l, r;
  672. * StringSplitter("100*200").Split('*').CollectInto(&l, &r);
  673. * \endcode
  674. *
  675. * \param[out] args Output arguments.
  676. * \throws If not all items were parsed, or
  677. * if there were too many items in the split.
  678. */
  679. template <typename... Args>
  680. inline void CollectInto(Args*... args) {
  681. Y_ENSURE(TryCollectInto<Args...>(args...));
  682. }
  683. inline size_t Count() {
  684. size_t cnt = 0;
  685. for (auto&& it : *this) {
  686. Y_UNUSED(it);
  687. ++cnt;
  688. }
  689. return cnt;
  690. }
  691. };
  692. template <class String>
  693. class TStringSplitter {
  694. using TStringType = String;
  695. using TChar = typename TStringType::value_type;
  696. using TIteratorState = TIterState<TStringType>;
  697. using TStringBufType = typename TIteratorState::TStringBufType;
  698. using TIterator = typename TIteratorState::TIterator;
  699. /**
  700. * Base class for all split ranges that actually does the splitting.
  701. */
  702. template <class DelimStorage>
  703. struct TSplitRangeBase {
  704. template <class OtherString, class... Args>
  705. inline TSplitRangeBase(OtherString&& s, Args&&... args)
  706. : String_(std::forward<OtherString>(s))
  707. , State_(String_)
  708. , Delimiter_(std::forward<Args>(args)...)
  709. {
  710. }
  711. TSplitRangeBase(const TSplitRangeBase& other)
  712. : String_(other.String_)
  713. , State_(String_)
  714. , Delimiter_(other.Delimiter_)
  715. {
  716. }
  717. TSplitRangeBase(TSplitRangeBase&& other)
  718. : String_(std::move(other.String_))
  719. , State_(String_)
  720. , Delimiter_(std::move(other.Delimiter_))
  721. {
  722. other.State_.MarkExhausted();
  723. }
  724. TSplitRangeBase& operator=(const TSplitRangeBase& other) = delete;
  725. TSplitRangeBase& operator=(TSplitRangeBase&& other) = delete;
  726. inline TIteratorState* Next() {
  727. if (State_.DelimiterIsEmpty()) {
  728. return nullptr;
  729. }
  730. const auto tokenBegin = State_.DelimiterEnd_;
  731. const auto tokenEnd = Delimiter_.Ptr()->Find(State_.DelimiterEnd_, State_.OriginEnd_);
  732. State_.UpdateParentBuf(tokenBegin, tokenEnd);
  733. return &State_;
  734. }
  735. private:
  736. TStringType String_;
  737. TIteratorState State_;
  738. DelimStorage Delimiter_;
  739. };
  740. template <class Base, class Filter>
  741. struct TFilterRange: public Base {
  742. template <class... Args>
  743. inline TFilterRange(const Base& base, Args&&... args)
  744. : Base(base)
  745. , Filter_(std::forward<Args>(args)...)
  746. {
  747. }
  748. inline TIteratorState* Next() {
  749. TIteratorState* ret;
  750. do {
  751. ret = Base::Next();
  752. } while (ret && !Filter_.Accept(ret));
  753. return ret;
  754. }
  755. Filter Filter_;
  756. };
  757. struct TNonEmptyFilter {
  758. template <class TToken>
  759. inline bool Accept(const TToken* token) noexcept {
  760. return !token->empty();
  761. }
  762. };
  763. template <class TIter>
  764. struct TStopIteration;
  765. template <class Base>
  766. struct TFilters: public Base {
  767. template <class TFilter>
  768. using TIt = TSplitRange<TStopIteration<TFilters<TFilterRange<Base, TFilter>>>>;
  769. template <typename... Args>
  770. inline TFilters(Args&&... args)
  771. : Base(std::forward<Args>(args)...)
  772. {
  773. }
  774. inline TIt<TNonEmptyFilter> SkipEmpty() const {
  775. return {*this};
  776. }
  777. };
  778. template <class Base, class Stopper>
  779. struct TStopRange: public Base {
  780. template <typename... Args>
  781. inline TStopRange(const Base& base, Args&&... args)
  782. : Base(base)
  783. , Stopper_(std::forward<Args>(args)...)
  784. {
  785. }
  786. inline TIteratorState* Next() {
  787. TIteratorState* ret = Base::Next();
  788. if (!ret || Stopper_.Stop(ret)) {
  789. return nullptr;
  790. }
  791. return ret;
  792. }
  793. Stopper Stopper_;
  794. };
  795. struct TTake {
  796. TTake() = default;
  797. TTake(size_t count)
  798. : Count(count)
  799. {
  800. }
  801. template <class TToken>
  802. inline bool Stop(TToken*) noexcept {
  803. if (Count > 0) {
  804. --Count;
  805. return false;
  806. } else {
  807. return true;
  808. }
  809. }
  810. size_t Count = 0;
  811. };
  812. struct TLimit {
  813. TLimit() = default;
  814. TLimit(size_t count)
  815. : Count(count)
  816. {
  817. Y_ASSERT(Count > 0);
  818. }
  819. template <class TToken>
  820. inline bool Stop(TToken* token) noexcept {
  821. if (Count > 1) {
  822. --Count;
  823. return false;
  824. } else if (Count == 1) {
  825. token->DelimiterEnd_ = token->OriginEnd_;
  826. token->UpdateParentBuf(token->TokenStart(), token->DelimiterEnd_);
  827. return false;
  828. }
  829. return true;
  830. }
  831. size_t Count = 0;
  832. };
  833. template <class Base>
  834. struct TStopIteration: public Base {
  835. template <class TStopper>
  836. using TIt = TSplitRange<TStopIteration<TFilters<TStopRange<Base, TStopper>>>>;
  837. template <typename... Args>
  838. inline TStopIteration(Args&&... args)
  839. : Base(std::forward<Args>(args)...)
  840. {
  841. }
  842. inline TIt<TTake> Take(size_t count) {
  843. return {*this, count};
  844. }
  845. inline TIt<TLimit> Limit(size_t count) {
  846. return {*this, count};
  847. }
  848. };
  849. template <class TPolicy>
  850. using TIt = TSplitRange<TStopIteration<TFilters<TSplitRangeBase<TPolicy>>>>;
  851. public:
  852. template <class OtherString>
  853. explicit TStringSplitter(OtherString&& s)
  854. : String_(std::forward<OtherString>(s))
  855. {
  856. }
  857. // does not own TDelim
  858. template <class TDelim>
  859. inline TIt<TPtrPolicy<const TDelim>> Split(const TDelim& d) const noexcept {
  860. return {String_, &d};
  861. }
  862. inline TIt<TEmbedPolicy<TCharDelimiter<const TChar>>> Split(TChar ch) const noexcept {
  863. return {String_, ch};
  864. }
  865. inline TIt<TSimpleRefPolicy<TSetDelimiter<const TChar>>> SplitBySet(const TChar* set) const noexcept {
  866. return {String_, set};
  867. }
  868. inline TIt<TEmbedPolicy<TStringDelimiter<const TChar>>> SplitByString(const TStringBufType& str) const noexcept {
  869. return {String_, str.data(), str.size()};
  870. }
  871. template <class TFunc>
  872. inline TIt<TEmbedPolicy<TFuncDelimiter<TIterator, TFunc>>> SplitByFunc(TFunc f) const noexcept {
  873. return {String_, f};
  874. }
  875. private:
  876. TStringType String_;
  877. };
  878. template <class String>
  879. auto MakeStringSplitter(String&& s) {
  880. return TStringSplitter<std::remove_reference_t<String>>(std::forward<String>(s));
  881. }
  882. } // namespace NStringSplitPrivate
  883. template <class Iterator>
  884. auto StringSplitter(Iterator begin, Iterator end) {
  885. return ::NStringSplitPrivate::MakeStringSplitter(TIteratorRange<Iterator>(begin, end));
  886. }
  887. template <class Char>
  888. auto StringSplitter(const Char* begin, const Char* end) {
  889. return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(begin, end));
  890. }
  891. template <class Char>
  892. auto StringSplitter(const Char* begin, size_t len) {
  893. return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(begin, len));
  894. }
  895. template <class Char>
  896. auto StringSplitter(const Char* str) {
  897. return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(str));
  898. }
  899. template <class String, std::enable_if_t<!std::is_pointer<std::remove_reference_t<String>>::value, int> = 0>
  900. auto StringSplitter(String& s) {
  901. return ::NStringSplitPrivate::MakeStringSplitter(::NStringSplitPrivate::TStringBufOf<String>(s.data(), s.size()));
  902. }
  903. template <class String, std::enable_if_t<!std::is_pointer<std::remove_reference_t<String>>::value, int> = 0>
  904. auto StringSplitter(String&& s) {
  905. return ::NStringSplitPrivate::MakeStringSplitter(std::move(s));
  906. }