split.h 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103
  1. #pragma once
  2. #include "strspn.h"
  3. #include "cast.h"
  4. #include <util/generic/algorithm.h>
  5. #include <util/generic/fwd.h>
  6. #include <util/generic/iterator.h>
  7. #include <util/generic/iterator_range.h>
  8. #include <util/generic/store_policy.h>
  9. #include <util/generic/strbuf.h>
  10. #include <util/generic/string.h>
  11. #include <util/generic/typetraits.h>
  12. #include <util/generic/vector.h>
  13. #include <util/generic/ylimits.h>
  14. #include <util/system/compat.h>
  15. #include <util/system/defaults.h>
  16. #include <utility>
  17. #include <stlfwd>
  18. // NOTE: Check StringSplitter below to get more convenient split string interface.
  19. namespace NStringSplitPrivate {
  20. template <class T, class I, class = void>
  21. struct TIsConsumer: std::false_type {};
  22. template <class T, class I>
  23. struct TIsConsumer<
  24. T, I,
  25. TVoidT<decltype(std::declval<T>().Consume(
  26. std::declval<I>(), std::declval<I>(), std::declval<I>()))>>
  27. : std::true_type {};
  28. template <class T, class I>
  29. constexpr bool TIsConsumerV = TIsConsumer<T, I>::value;
  30. template <class T>
  31. T* Find(T* str, std::common_type_t<T> ch) {
  32. for (; *str; ++str) {
  33. if (*str == ch) {
  34. return str;
  35. }
  36. }
  37. return nullptr;
  38. }
  39. } // namespace NStringSplitPrivate
  40. template <class I, class TDelim, class TConsumer>
  41. std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>>
  42. SplitString(I b, I e, const TDelim& d, TConsumer&& c) {
  43. I l, i;
  44. do {
  45. l = b;
  46. i = d.Find(b, e);
  47. } while (c.Consume(l, i, b) && (b != i));
  48. }
  49. template <class I, class TDelim, class TConsumer>
  50. std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>>
  51. SplitString(I b, const TDelim& d, TConsumer&& c) {
  52. I l, i;
  53. do {
  54. l = b;
  55. i = d.Find(b);
  56. } while (c.Consume(l, i, b) && (b != i));
  57. }
  58. template <class I1, class I2>
  59. static inline I1* FastStrChr(I1* str, I2 f) noexcept {
  60. I1* ret = NStringSplitPrivate::Find(str, f);
  61. if (!ret) {
  62. ret = str + std::char_traits<I1>::length(str);
  63. }
  64. return ret;
  65. }
  66. template <class I>
  67. static inline I* FastStrStr(I* str, I* f, size_t l) noexcept {
  68. std::basic_string_view<I> strView(str);
  69. const auto ret = strView.find(*f);
  70. if (ret != std::string::npos) {
  71. std::basic_string_view<I> fView(f, l);
  72. strView = strView.substr(ret);
  73. for (; strView.size() >= l; strView = strView.substr(1)) {
  74. if (strView.substr(0, l) == fView) {
  75. break;
  76. }
  77. }
  78. return strView.size() >= l ? strView.data() : strView.data() + strView.size();
  79. } else {
  80. return strView.data() + strView.size();
  81. }
  82. }
  83. template <class Char>
  84. struct TStringDelimiter {
  85. inline TStringDelimiter(Char* delim) noexcept
  86. : Delim(delim)
  87. , Len(std::char_traits<Char>::length(delim))
  88. {
  89. }
  90. inline TStringDelimiter(Char* delim, size_t len) noexcept
  91. : Delim(delim)
  92. , Len(len)
  93. {
  94. }
  95. inline Char* Find(Char*& b, Char* e) const noexcept {
  96. const auto ret = std::basic_string_view<Char>(b, e - b).find(Delim, 0, Len);
  97. if (ret != std::string::npos) {
  98. const auto result = b + ret;
  99. b = result + Len;
  100. return result;
  101. }
  102. return (b = e);
  103. }
  104. inline Char* Find(Char*& b) const noexcept {
  105. Char* ret = FastStrStr(b, Delim, Len);
  106. b = *ret ? ret + Len : ret;
  107. return ret;
  108. }
  109. Char* Delim;
  110. const size_t Len;
  111. };
  112. template <class Char>
  113. struct TCharDelimiter {
  114. inline TCharDelimiter(Char ch) noexcept
  115. : Ch(ch)
  116. {
  117. }
  118. inline Char* Find(Char*& b, Char* e) const noexcept {
  119. const auto ret = std::basic_string_view<Char>(b, e - b).find(Ch);
  120. if (ret != std::string::npos) {
  121. const auto result = b + ret;
  122. b = result + 1;
  123. return result;
  124. }
  125. return (b = e);
  126. }
  127. inline Char* Find(Char*& b) const noexcept {
  128. Char* ret = FastStrChr(b, Ch);
  129. if (*ret) {
  130. b = ret + 1;
  131. } else {
  132. b = ret;
  133. }
  134. return ret;
  135. }
  136. Char Ch;
  137. };
  138. template <class Iterator, class Condition>
  139. struct TFuncDelimiter {
  140. public:
  141. template <class... Args>
  142. TFuncDelimiter(Args&&... args)
  143. : Fn(std::forward<Args>(args)...)
  144. {
  145. }
  146. inline Iterator Find(Iterator& b, Iterator e) const noexcept {
  147. if ((b = std::find_if(b, e, Fn)) != e) {
  148. return b++;
  149. }
  150. return b;
  151. }
  152. private:
  153. Condition Fn;
  154. };
  155. template <class Char>
  156. struct TFindFirstOf {
  157. inline TFindFirstOf(Char* set)
  158. : Set(set)
  159. {
  160. }
  161. inline Char* FindFirstOf(Char* b, Char* e) const noexcept {
  162. Char* ret = b;
  163. for (; ret != e; ++ret) {
  164. if (NStringSplitPrivate::Find(Set, *ret))
  165. break;
  166. }
  167. return ret;
  168. }
  169. inline Char* FindFirstOf(Char* b) const noexcept {
  170. const std::basic_string_view<Char> bView(b);
  171. const auto ret = bView.find_first_of(Set);
  172. return ret != std::string::npos ? b + ret : b + bView.size();
  173. }
  174. Char* Set;
  175. };
  176. template <>
  177. struct TFindFirstOf<const char>: public TCompactStrSpn {
  178. inline TFindFirstOf(const char* set, const char* e)
  179. : TCompactStrSpn(set, e)
  180. {
  181. }
  182. inline TFindFirstOf(const char* set)
  183. : TCompactStrSpn(set)
  184. {
  185. }
  186. };
  187. template <class Char>
  188. struct TSetDelimiter: private TFindFirstOf<const Char> {
  189. using TFindFirstOf<const Char>::TFindFirstOf;
  190. inline Char* Find(Char*& b, Char* e) const noexcept {
  191. Char* ret = const_cast<Char*>(this->FindFirstOf(b, e));
  192. if (ret != e) {
  193. b = ret + 1;
  194. return ret;
  195. }
  196. return (b = e);
  197. }
  198. inline Char* Find(Char*& b) const noexcept {
  199. Char* ret = const_cast<Char*>(this->FindFirstOf(b));
  200. if (*ret) {
  201. b = ret + 1;
  202. return ret;
  203. }
  204. return (b = ret);
  205. }
  206. };
  207. namespace NSplitTargetHasPushBack {
  208. Y_HAS_MEMBER(push_back, PushBack);
  209. } // namespace NSplitTargetHasPushBack
  210. template <class T, class = void>
  211. struct TConsumerBackInserter;
  212. template <class T>
  213. struct TConsumerBackInserter<T, std::enable_if_t<NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> {
  214. static void DoInsert(T* C, const typename T::value_type& i) {
  215. C->push_back(i);
  216. }
  217. };
  218. template <class T>
  219. struct TConsumerBackInserter<T, std::enable_if_t<!NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> {
  220. static void DoInsert(T* C, const typename T::value_type& i) {
  221. C->insert(C->end(), i);
  222. }
  223. };
  224. template <class T>
  225. struct TContainerConsumer {
  226. inline TContainerConsumer(T* c) noexcept
  227. : C(c)
  228. {
  229. }
  230. template <class I>
  231. inline bool Consume(I* b, I* d, I* /*e*/) {
  232. TConsumerBackInserter<T>::DoInsert(C, typename T::value_type(b, d));
  233. return true;
  234. }
  235. T* C;
  236. };
  237. template <class T>
  238. struct TContainerConvertingConsumer {
  239. inline TContainerConvertingConsumer(T* c) noexcept
  240. : C(c)
  241. {
  242. }
  243. template <class I>
  244. inline bool Consume(I* b, I* d, I* /*e*/) {
  245. TConsumerBackInserter<T>::DoInsert(C, FromString<typename T::value_type>(TStringBuf(b, d)));
  246. return true;
  247. }
  248. T* C;
  249. };
  250. template <class S, class I>
  251. struct TLimitingConsumer {
  252. inline TLimitingConsumer(size_t cnt, S* slave) noexcept
  253. : Cnt(cnt ? cnt - 1 : Max<size_t>())
  254. , Slave(slave)
  255. , Last(nullptr)
  256. {
  257. }
  258. inline bool Consume(I* b, I* d, I* e) {
  259. if (!Cnt) {
  260. Last = b;
  261. return false;
  262. }
  263. --Cnt;
  264. return Slave->Consume(b, d, e);
  265. }
  266. size_t Cnt;
  267. S* Slave;
  268. I* Last;
  269. };
  270. template <class S>
  271. struct TSkipEmptyTokens {
  272. inline TSkipEmptyTokens(S* slave) noexcept
  273. : Slave(slave)
  274. {
  275. }
  276. template <class I>
  277. inline bool Consume(I* b, I* d, I* e) {
  278. if (b != d) {
  279. return Slave->Consume(b, d, e);
  280. }
  281. return true;
  282. }
  283. S* Slave;
  284. };
  285. template <class S>
  286. struct TKeepDelimiters {
  287. inline TKeepDelimiters(S* slave) noexcept
  288. : Slave(slave)
  289. {
  290. }
  291. template <class I>
  292. inline bool Consume(I* b, I* d, I* e) {
  293. if (Slave->Consume(b, d, d)) {
  294. if (d != e) {
  295. return Slave->Consume(d, e, e);
  296. }
  297. return true;
  298. }
  299. return false;
  300. }
  301. S* Slave;
  302. };
  303. template <class T>
  304. struct TSimplePusher {
  305. inline bool Consume(char* b, char* d, char*) {
  306. *d = 0;
  307. C->push_back(b);
  308. return true;
  309. }
  310. T* C;
  311. };
  312. template <class T>
  313. static inline void Split(char* buf, char ch, T* res) {
  314. res->resize(0);
  315. if (*buf == 0)
  316. return;
  317. TCharDelimiter<char> delim(ch);
  318. TSimplePusher<T> pusher = {res};
  319. SplitString(buf, delim, pusher);
  320. }
  321. /// Split string into res vector. Res vector is cleared before split.
  322. /// Old good slow split function.
  323. /// Field delimter is any number of symbols specified in delim (no empty strings in res vector)
  324. /// @return number of elements created
  325. size_t Split(const char* in, const char* delim, TVector<TString>& res);
  326. size_t Split(const TString& in, const TString& delim, TVector<TString>& res);
  327. /// Old split reimplemented for TStringBuf using the new code
  328. /// Note that delim can be constructed from char* automatically (it is not cheap though)
  329. inline size_t Split(const TStringBuf s, const TSetDelimiter<const char>& delim, TVector<TStringBuf>& res) {
  330. res.clear();
  331. TContainerConsumer<TVector<TStringBuf>> res1(&res);
  332. TSkipEmptyTokens<TContainerConsumer<TVector<TStringBuf>>> consumer(&res1);
  333. SplitString(s.data(), s.data() + s.size(), delim, consumer);
  334. return res.size();
  335. }
  336. template <class P, class D>
  337. void GetNext(TStringBuf& s, D delim, P& param) {
  338. TStringBuf next = s.NextTok(delim);
  339. Y_ENSURE(next.IsInited(), TStringBuf("Split: number of fields less than number of Split output arguments"));
  340. param = FromString<P>(next);
  341. }
  342. template <class P, class D>
  343. void GetNext(TStringBuf& s, D delim, TMaybe<P>& param) {
  344. TStringBuf next = s.NextTok(delim);
  345. if (next.IsInited()) {
  346. param = FromString<P>(next);
  347. } else {
  348. param.Clear();
  349. }
  350. }
  351. // example:
  352. // Split(TStringBuf("Sherlock,2014,36.6"), ',', name, year, temperature);
  353. template <class D, class P1, class P2>
  354. void Split(TStringBuf s, D delim, P1& p1, P2& p2) {
  355. GetNext(s, delim, p1);
  356. GetNext(s, delim, p2);
  357. Y_ENSURE(!s.IsInited(), TStringBuf("Split: number of fields more than number of Split output arguments"));
  358. }
  359. template <class D, class P1, class P2, class... Other>
  360. void Split(TStringBuf s, D delim, P1& p1, P2& p2, Other&... other) {
  361. GetNext(s, delim, p1);
  362. Split(s, delim, p2, other...);
  363. }
  364. /**
  365. * \fn auto StringSplitter(...)
  366. *
  367. * Creates a string splitter object. The only use for it is to call one of its
  368. * `Split*` methods, and then do something with the resulting proxy range.
  369. *
  370. * Some examples:
  371. * \code
  372. * TVector<TStringBuf> values = StringSplitter("1\t2\t3").Split('\t');
  373. *
  374. * for(TStringBuf part: StringSplitter("1::2::::3").SplitByString("::").SkipEmpty()) {
  375. * Cerr << part;
  376. * }
  377. *
  378. * TVector<TString> firstTwoValues = StringSplitter("1\t2\t3").Split('\t').Take(2);
  379. * \endcode
  380. *
  381. * Use `Collect` or `AddTo` to store split results into an existing container:
  382. * \code
  383. * TVector<TStringBuf> values = {"0"};
  384. * StringSplitter("1\t2\t3").Split('\t').AddTo(&values);
  385. * \endcode
  386. * Note that `Collect` clears target container, while `AddTo` just inserts values.
  387. * You can use these methods with any container that has `emplace` / `emplace_back`.
  388. *
  389. * Use `ParseInto` to also perform string conversions before inserting values
  390. * into target container:
  391. * \code
  392. * TSet<int> values;
  393. * StringSplitter("1\t2\t3").Split('\t').ParseInto(&values);
  394. * \endcode
  395. */
  396. namespace NStringSplitPrivate {
  397. Y_HAS_MEMBER(push_back, PushBack);
  398. Y_HAS_MEMBER(insert, Insert);
  399. Y_HAS_MEMBER(data, Data);
  400. /**
  401. * This one is needed here so that `std::string_view -> std::string_view`
  402. * conversion works.
  403. */
  404. template <class Src, class Dst>
  405. inline void DoFromString(const Src& src, Dst* dst) {
  406. *dst = ::FromString<Dst>(src);
  407. }
  408. template <class T>
  409. inline void DoFromString(const T& src, T* dst) noexcept {
  410. *dst = src;
  411. }
  412. template <class T>
  413. inline void DoFromString(const T& src, decltype(std::ignore)* dst) noexcept {
  414. *dst = src;
  415. }
  416. template <class Src, class Dst>
  417. inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const Src& src, Dst* dst) noexcept {
  418. return ::TryFromString(src, *dst);
  419. }
  420. template <class T>
  421. inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, T* dst) noexcept {
  422. *dst = src;
  423. return true;
  424. }
  425. template <class T>
  426. inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, decltype(std::ignore)* dst) noexcept {
  427. *dst = src;
  428. return true;
  429. }
  430. /**
  431. * Consumer that places provided elements into a container. Not using
  432. * `emplace(iterator)` for efficiency.
  433. */
  434. template <class Container>
  435. struct TContainerConsumer {
  436. using value_type = typename Container::value_type;
  437. TContainerConsumer(Container* c)
  438. : C_(c)
  439. {
  440. }
  441. // TODO: return bool (continue)
  442. template <class StringBuf>
  443. void operator()(StringBuf e) const {
  444. this->operator()(C_, e);
  445. }
  446. private:
  447. template <class OtherContainer, class StringBuf>
  448. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) {
  449. return c->emplace_back(value_type(e));
  450. }
  451. template <class OtherContainer, class StringBuf>
  452. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) {
  453. return c->emplace(value_type(e));
  454. }
  455. Container* C_;
  456. };
  457. /**
  458. * Consumer that converts provided elements via `FromString` and places them
  459. * into a container.
  460. */
  461. template <class Container>
  462. struct TContainerConvertingConsumer {
  463. using value_type = typename Container::value_type;
  464. TContainerConvertingConsumer(Container* c)
  465. : C_(c)
  466. {
  467. }
  468. template <class StringBuf>
  469. void operator()(StringBuf e) const {
  470. this->operator()(C_, e);
  471. }
  472. private:
  473. template <class OtherContainer, class StringBuf>
  474. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) {
  475. value_type v;
  476. DoFromString(e, &v);
  477. return c->emplace_back(std::move(v));
  478. }
  479. template <class OtherContainer, class StringBuf>
  480. auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) {
  481. value_type v;
  482. DoFromString(e, &v);
  483. return c->emplace(std::move(v));
  484. }
  485. Container* C_;
  486. };
  487. template <class String>
  488. struct TStringBufOfImpl {
  489. using type = std::conditional_t<
  490. THasData<String>::value,
  491. TBasicStringBuf<typename String::value_type>,
  492. TIteratorRange<typename String::const_iterator>>;
  493. };
  494. template <class Char, class Traits, class Allocator>
  495. struct TStringBufOfImpl<std::basic_string<Char, Traits, Allocator>> {
  496. using type = std::basic_string_view<Char, Traits>;
  497. };
  498. template <class Char, class Traits>
  499. struct TStringBufOfImpl<std::basic_string_view<Char, Traits>> {
  500. using type = std::basic_string_view<Char, Traits>;
  501. };
  502. /**
  503. * Metafunction that returns a string buffer for the given type. This is to
  504. * make sure that splitting `std::string` returns `std::string_view`.
  505. */
  506. template <class String>
  507. using TStringBufOf = typename TStringBufOfImpl<String>::type;
  508. template <class StringBuf, class Iterator>
  509. StringBuf DoMakeStringBuf(Iterator b, Iterator e, StringBuf*) {
  510. return StringBuf(b, e);
  511. }
  512. template <class Char, class Traits, class Iterator>
  513. std::basic_string_view<Char, Traits> DoMakeStringBuf(Iterator b, Iterator e, std::basic_string_view<Char, Traits>*) {
  514. return std::basic_string_view<Char, Traits>(b, e - b);
  515. }
  516. template <class StringBuf, class Iterator>
  517. StringBuf MakeStringBuf(Iterator b, Iterator e) {
  518. return DoMakeStringBuf(b, e, static_cast<StringBuf*>(nullptr));
  519. }
  520. template <class String>
  521. struct TIteratorOfImpl {
  522. using type = std::conditional_t<
  523. THasData<String>::value,
  524. const typename String::value_type*,
  525. typename String::const_iterator>;
  526. };
  527. template <class String>
  528. using TIteratorOf = typename TIteratorOfImpl<String>::type;
  529. template <class String>
  530. class TStringSplitter;
  531. template <class String>
  532. struct TIterState: public TStringBufOf<String> {
  533. public:
  534. using TStringBufType = TStringBufOf<String>;
  535. using TIterator = TIteratorOf<String>;
  536. friend class TStringSplitter<String>;
  537. template <typename S = String, std::enable_if_t<THasData<S>::value, int> = 0>
  538. TIterState(const String& string) noexcept
  539. : TStringBufType()
  540. , DelimiterEnd_(string.data())
  541. , OriginEnd_(string.data() + string.size())
  542. {
  543. }
  544. template <typename S = String, std::enable_if_t<!THasData<S>::value, int> = 0>
  545. TIterState(const String& string) noexcept
  546. : TStringBufType()
  547. , DelimiterEnd_(std::begin(string))
  548. , OriginEnd_(std::end(string))
  549. {
  550. }
  551. template <
  552. typename Other,
  553. typename = std::enable_if_t<
  554. std::is_convertible<Other, TStringBufType>::value>>
  555. bool operator==(const Other& toCompare) const {
  556. return TStringBufType(*this) == TStringBufType(toCompare);
  557. }
  558. TIterator TokenStart() const noexcept {
  559. return this->begin();
  560. }
  561. TIterator TokenDelim() const noexcept {
  562. return this->end();
  563. }
  564. TStringBufType Token() const noexcept {
  565. return *this;
  566. }
  567. TStringBufType Delim() const noexcept {
  568. return MakeStringBuf<TStringBufType>(TokenDelim(), DelimiterEnd_);
  569. }
  570. private:
  571. void UpdateParentBuf(TIterator tokenStart, TIterator tokenDelim) noexcept {
  572. *static_cast<TStringBufType*>(this) = MakeStringBuf<TStringBufType>(tokenStart, tokenDelim);
  573. }
  574. bool DelimiterIsEmpty() const noexcept {
  575. return TokenDelim() == DelimiterEnd_;
  576. }
  577. void MarkExhausted() noexcept {
  578. UpdateParentBuf(OriginEnd_, OriginEnd_);
  579. DelimiterEnd_ = OriginEnd_;
  580. }
  581. private:
  582. TIterator DelimiterEnd_;
  583. const TIterator OriginEnd_;
  584. };
  585. template <class Base>
  586. class TSplitRange: public Base, public TInputRangeAdaptor<TSplitRange<Base>> {
  587. using TStringBufType = decltype(std::declval<Base>().Next()->Token());
  588. public:
  589. template <typename... Args>
  590. inline TSplitRange(Args&&... args)
  591. : Base(std::forward<Args>(args)...)
  592. {
  593. }
  594. template <class Consumer, std::enable_if_t<std::is_same<decltype(std::declval<Consumer>()(std::declval<TStringBufType>())), void>::value, int>* = nullptr>
  595. inline void Consume(Consumer&& f) {
  596. for (auto&& it : *this) {
  597. f(it.Token());
  598. }
  599. }
  600. template <class Consumer, std::enable_if_t<std::is_same<decltype(std::declval<Consumer>()(std::declval<TStringBufType>())), bool>::value, int>* = nullptr>
  601. inline bool Consume(Consumer&& f) {
  602. for (auto&& it : *this) {
  603. if (!f(it.Token())) {
  604. return false;
  605. }
  606. }
  607. return true;
  608. }
  609. template <class Container, class = std::enable_if_t<THasInsert<Container>::value || THasPushBack<Container>::value>>
  610. operator Container() {
  611. Container result;
  612. AddTo(&result);
  613. return result;
  614. }
  615. template <class S>
  616. inline TVector<S> ToList() {
  617. TVector<S> result;
  618. for (auto&& it : *this) {
  619. result.push_back(S(it.Token()));
  620. }
  621. return result;
  622. }
  623. template <class Container>
  624. inline void Collect(Container* c) {
  625. Y_ASSERT(c);
  626. c->clear();
  627. AddTo(c);
  628. }
  629. template <class Container>
  630. inline void AddTo(Container* c) {
  631. Y_ASSERT(c);
  632. TContainerConsumer<Container> consumer(c);
  633. Consume(consumer);
  634. }
  635. template <class Container>
  636. inline void ParseInto(Container* c) {
  637. Y_ASSERT(c);
  638. TContainerConvertingConsumer<Container> consumer(c);
  639. Consume(consumer);
  640. }
  641. // TODO: this is actually TryParseInto
  642. /**
  643. * Same as `CollectInto`, just doesn't throw.
  644. *
  645. * \param[out] args Output arguments.
  646. * \returns Whether parsing was successful.
  647. */
  648. template <typename... Args>
  649. inline bool TryCollectInto(Args*... args) noexcept {
  650. size_t successfullyFilled = 0;
  651. auto it = this->begin();
  652. // FIXME: actually, some kind of TryApplyToMany is needed in order to stop iteration upon first failure
  653. ApplyToMany([&](auto&& arg) {
  654. if (it != this->end()) {
  655. if (TryDoFromString(it->Token(), arg)) {
  656. ++successfullyFilled;
  657. }
  658. ++it;
  659. }
  660. }, args...);
  661. return successfullyFilled == sizeof...(args) && it == this->end();
  662. }
  663. // TODO: this is actually ParseInto
  664. /**
  665. * Splits and parses everything that's in this splitter into `args`.
  666. *
  667. * Example usage:
  668. * \code
  669. * int l, r;
  670. * StringSplitter("100*200").Split('*').CollectInto(&l, &r);
  671. * \endcode
  672. *
  673. * \param[out] args Output arguments.
  674. * \throws If not all items were parsed, or
  675. * if there were too many items in the split.
  676. */
  677. template <typename... Args>
  678. inline void CollectInto(Args*... args) {
  679. Y_ENSURE(TryCollectInto<Args...>(args...));
  680. }
  681. inline size_t Count() {
  682. size_t cnt = 0;
  683. for (auto&& it : *this) {
  684. Y_UNUSED(it);
  685. ++cnt;
  686. }
  687. return cnt;
  688. }
  689. };
  690. template <class String>
  691. class TStringSplitter {
  692. using TStringType = String;
  693. using TChar = typename TStringType::value_type;
  694. using TIteratorState = TIterState<TStringType>;
  695. using TStringBufType = typename TIteratorState::TStringBufType;
  696. using TIterator = typename TIteratorState::TIterator;
  697. /**
  698. * Base class for all split ranges that actually does the splitting.
  699. */
  700. template <class DelimStorage>
  701. struct TSplitRangeBase {
  702. template <class OtherString, class... Args>
  703. inline TSplitRangeBase(OtherString&& s, Args&&... args)
  704. : String_(std::forward<OtherString>(s))
  705. , State_(String_)
  706. , Delimiter_(std::forward<Args>(args)...)
  707. {
  708. }
  709. TSplitRangeBase(const TSplitRangeBase& other)
  710. : String_(other.String_)
  711. , State_(String_)
  712. , Delimiter_(other.Delimiter_)
  713. {
  714. }
  715. TSplitRangeBase(TSplitRangeBase&& other)
  716. : String_(std::move(other.String_))
  717. , State_(String_)
  718. , Delimiter_(std::move(other.Delimiter_))
  719. {
  720. other.State_.MarkExhausted();
  721. }
  722. TSplitRangeBase& operator=(const TSplitRangeBase& other) = delete;
  723. TSplitRangeBase& operator=(TSplitRangeBase&& other) = delete;
  724. inline TIteratorState* Next() {
  725. if (State_.DelimiterIsEmpty()) {
  726. return nullptr;
  727. }
  728. const auto tokenBegin = State_.DelimiterEnd_;
  729. const auto tokenEnd = Delimiter_.Ptr()->Find(State_.DelimiterEnd_, State_.OriginEnd_);
  730. State_.UpdateParentBuf(tokenBegin, tokenEnd);
  731. return &State_;
  732. }
  733. private:
  734. TStringType String_;
  735. TIteratorState State_;
  736. DelimStorage Delimiter_;
  737. };
  738. template <class Base, class Filter>
  739. struct TFilterRange: public Base {
  740. template <class... Args>
  741. inline TFilterRange(const Base& base, Args&&... args)
  742. : Base(base)
  743. , Filter_(std::forward<Args>(args)...)
  744. {
  745. }
  746. inline TIteratorState* Next() {
  747. TIteratorState* ret;
  748. do {
  749. ret = Base::Next();
  750. } while (ret && !Filter_.Accept(ret));
  751. return ret;
  752. }
  753. Filter Filter_;
  754. };
  755. struct TNonEmptyFilter {
  756. template <class TToken>
  757. inline bool Accept(const TToken* token) noexcept {
  758. return !token->empty();
  759. }
  760. };
  761. template <class TIter>
  762. struct TStopIteration;
  763. template <class Base>
  764. struct TFilters: public Base {
  765. template <class TFilter>
  766. using TIt = TSplitRange<TStopIteration<TFilters<TFilterRange<Base, TFilter>>>>;
  767. template <typename... Args>
  768. inline TFilters(Args&&... args)
  769. : Base(std::forward<Args>(args)...)
  770. {
  771. }
  772. inline TIt<TNonEmptyFilter> SkipEmpty() const {
  773. return {*this};
  774. }
  775. };
  776. template <class Base, class Stopper>
  777. struct TStopRange: public Base {
  778. template <typename... Args>
  779. inline TStopRange(const Base& base, Args&&... args)
  780. : Base(base)
  781. , Stopper_(std::forward<Args>(args)...)
  782. {
  783. }
  784. inline TIteratorState* Next() {
  785. TIteratorState* ret = Base::Next();
  786. if (!ret || Stopper_.Stop(ret)) {
  787. return nullptr;
  788. }
  789. return ret;
  790. }
  791. Stopper Stopper_;
  792. };
  793. struct TTake {
  794. TTake() = default;
  795. TTake(size_t count)
  796. : Count(count)
  797. {
  798. }
  799. template <class TToken>
  800. inline bool Stop(TToken*) noexcept {
  801. if (Count > 0) {
  802. --Count;
  803. return false;
  804. } else {
  805. return true;
  806. }
  807. }
  808. size_t Count = 0;
  809. };
  810. struct TLimit {
  811. TLimit() = default;
  812. TLimit(size_t count)
  813. : Count(count)
  814. {
  815. Y_ASSERT(Count > 0);
  816. }
  817. template <class TToken>
  818. inline bool Stop(TToken* token) noexcept {
  819. if (Count > 1) {
  820. --Count;
  821. return false;
  822. } else if (Count == 1) {
  823. token->DelimiterEnd_ = token->OriginEnd_;
  824. token->UpdateParentBuf(token->TokenStart(), token->DelimiterEnd_);
  825. return false;
  826. }
  827. return true;
  828. }
  829. size_t Count = 0;
  830. };
  831. template <class Base>
  832. struct TStopIteration: public Base {
  833. template <class TStopper>
  834. using TIt = TSplitRange<TStopIteration<TFilters<TStopRange<Base, TStopper>>>>;
  835. template <typename... Args>
  836. inline TStopIteration(Args&&... args)
  837. : Base(std::forward<Args>(args)...)
  838. {
  839. }
  840. inline TIt<TTake> Take(size_t count) {
  841. return {*this, count};
  842. }
  843. inline TIt<TLimit> Limit(size_t count) {
  844. return {*this, count};
  845. }
  846. };
  847. template <class TPolicy>
  848. using TIt = TSplitRange<TStopIteration<TFilters<TSplitRangeBase<TPolicy>>>>;
  849. public:
  850. template <class OtherString>
  851. explicit TStringSplitter(OtherString&& s)
  852. : String_(std::forward<OtherString>(s))
  853. {
  854. }
  855. // does not own TDelim
  856. template <class TDelim>
  857. inline TIt<TPtrPolicy<const TDelim>> Split(const TDelim& d) const noexcept {
  858. return {String_, &d};
  859. }
  860. inline TIt<TEmbedPolicy<TCharDelimiter<const TChar>>> Split(TChar ch) const noexcept {
  861. return {String_, ch};
  862. }
  863. inline TIt<TSimpleRefPolicy<TSetDelimiter<const TChar>>> SplitBySet(const TChar* set) const noexcept {
  864. return {String_, set};
  865. }
  866. inline TIt<TEmbedPolicy<TStringDelimiter<const TChar>>> SplitByString(const TStringBufType& str) const noexcept {
  867. return {String_, str.data(), str.size()};
  868. }
  869. template <class TFunc>
  870. inline TIt<TEmbedPolicy<TFuncDelimiter<TIterator, TFunc>>> SplitByFunc(TFunc f) const noexcept {
  871. return {String_, f};
  872. }
  873. private:
  874. TStringType String_;
  875. };
  876. template <class String>
  877. auto MakeStringSplitter(String&& s) {
  878. return TStringSplitter<std::remove_reference_t<String>>(std::forward<String>(s));
  879. }
  880. } // namespace NStringSplitPrivate
  881. template <class Iterator>
  882. auto StringSplitter(Iterator begin, Iterator end) {
  883. return ::NStringSplitPrivate::MakeStringSplitter(TIteratorRange<Iterator>(begin, end));
  884. }
  885. template <class Char>
  886. auto StringSplitter(const Char* begin, const Char* end) {
  887. return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(begin, end));
  888. }
  889. template <class Char>
  890. auto StringSplitter(const Char* begin, size_t len) {
  891. return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(begin, len));
  892. }
  893. template <class Char>
  894. auto StringSplitter(const Char* str) {
  895. return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(str));
  896. }
  897. template <class String, std::enable_if_t<!std::is_pointer<std::remove_reference_t<String>>::value, int> = 0>
  898. auto StringSplitter(String& s) {
  899. return ::NStringSplitPrivate::MakeStringSplitter(::NStringSplitPrivate::TStringBufOf<String>(s.data(), s.size()));
  900. }
  901. template <class String, std::enable_if_t<!std::is_pointer<std::remove_reference_t<String>>::value, int> = 0>
  902. auto StringSplitter(String&& s) {
  903. return ::NStringSplitPrivate::MakeStringSplitter(std::move(s));
  904. }