split_ut.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860
  1. #include "split.h"
  2. #include <library/cpp/testing/unittest/registar.h>
  3. #include <util/stream/output.h>
  4. #include <util/charset/wide.h>
  5. #include <util/datetime/cputimer.h>
  6. #include <util/generic/maybe.h>
  7. #include <string>
  8. #include <string_view>
  9. template <typename T>
  10. static inline void OldSplit(char* pszBuf, T* pRes) {
  11. pRes->resize(0);
  12. pRes->push_back(pszBuf);
  13. for (char* pszData = pszBuf; *pszData; ++pszData) {
  14. if (*pszData == '\t') {
  15. *pszData = 0;
  16. pRes->push_back(pszData + 1);
  17. }
  18. }
  19. }
  20. template <class T1, class T2>
  21. inline void Cmp(const T1& t1, const T2& t2) {
  22. try {
  23. UNIT_ASSERT_EQUAL(t1.size(), t2.size());
  24. } catch (...) {
  25. Print(t1);
  26. Cerr << "---------------" << Endl;
  27. Print(t2);
  28. throw;
  29. }
  30. auto i = t1.begin();
  31. auto j = t2.begin();
  32. for (; i != t1.end() && j != t2.end(); ++i, ++j) {
  33. try {
  34. UNIT_ASSERT_EQUAL(*i, *j);
  35. } catch (...) {
  36. Cerr << "(" << *i << ")->(" << *j << ")" << Endl;
  37. throw;
  38. }
  39. }
  40. }
  41. template <class T>
  42. inline void Print(const T& t) {
  43. for (typename T::const_iterator i = t.begin(); i != t.end(); ++i) {
  44. Cerr << *i << Endl;
  45. }
  46. }
  47. template <template <typename> class TConsumer, typename TResult, typename I, typename TDelimiter>
  48. void TestDelimiterOnString(TResult& good, I* str, const TDelimiter& delim) {
  49. TResult test;
  50. TConsumer<TResult> consumer(&test);
  51. SplitString(str, delim, consumer);
  52. Cmp(good, test);
  53. UNIT_ASSERT_EQUAL(good, test);
  54. }
  55. template <template <typename> class TConsumer, typename TResult, typename I, typename TDelimiter>
  56. void TestDelimiterOnRange(TResult& good, I* b, I* e, const TDelimiter& delim) {
  57. TResult test;
  58. TConsumer<TResult> consumer(&test);
  59. SplitString(b, e, delim, consumer);
  60. Cmp(good, test);
  61. UNIT_ASSERT_EQUAL(good, test);
  62. }
  63. template <typename TConsumer, typename TResult, typename I>
  64. void TestConsumerOnString(TResult& good, I* str, I* d) {
  65. TResult test;
  66. TContainerConsumer<TResult> consumer(&test);
  67. TConsumer tested(&consumer);
  68. TCharDelimiter<const I> delim(*d);
  69. SplitString(str, delim, tested);
  70. Cmp(good, test);
  71. UNIT_ASSERT_EQUAL(good, test);
  72. }
  73. template <typename TConsumer, typename TResult, typename I>
  74. void TestConsumerOnRange(TResult& good, I* b, I* e, I* d) {
  75. TResult test;
  76. TContainerConsumer<TResult> consumer(&test);
  77. TConsumer tested(&consumer);
  78. TCharDelimiter<const I> delim(*d);
  79. SplitString(b, e, delim, tested);
  80. Cmp(good, test);
  81. UNIT_ASSERT_EQUAL(good, test);
  82. }
  83. using TStrokaConsumer = TContainerConsumer<TVector<TString>>;
  84. void TestLimitingConsumerOnString(TVector<TString>& good, const char* str, const char* d, size_t n, const char* last) {
  85. TVector<TString> test;
  86. TStrokaConsumer consumer(&test);
  87. TLimitingConsumer<TStrokaConsumer, const char> limits(n, &consumer);
  88. TCharDelimiter<const char> delim(*d);
  89. SplitString(str, delim, limits);
  90. Cmp(good, test);
  91. UNIT_ASSERT_EQUAL(good, test);
  92. UNIT_ASSERT_EQUAL(TString(limits.Last), TString(last)); // Quite unobvious behaviour. Why the last token is not added to slave consumer?
  93. }
  94. void TestLimitingConsumerOnRange(TVector<TString>& good, const char* b, const char* e, const char* d, size_t n, const char* last) {
  95. TVector<TString> test;
  96. TStrokaConsumer consumer(&test);
  97. TLimitingConsumer<TStrokaConsumer, const char> limits(n, &consumer);
  98. TCharDelimiter<const char> delim(*d);
  99. SplitString(b, e, delim, limits);
  100. Cmp(good, test);
  101. UNIT_ASSERT_EQUAL(good, test);
  102. UNIT_ASSERT_EQUAL(TString(limits.Last), TString(last));
  103. }
  104. Y_UNIT_TEST_SUITE(SplitStringTest) {
  105. Y_UNIT_TEST(TestCharSingleDelimiter) {
  106. TString data("qw ab qwabcab");
  107. TString canonic[] = {"qw", "ab", "", "qwabcab"};
  108. TVector<TString> good(canonic, canonic + 4);
  109. TCharDelimiter<const char> delim(' ');
  110. TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
  111. TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim);
  112. }
  113. Y_UNIT_TEST(TestWideSingleDelimiter) {
  114. TUtf16String data(u"qw ab qwabcab");
  115. TUtf16String canonic[] = {u"qw", u"ab", TUtf16String(), u"qwabcab"};
  116. TVector<TUtf16String> good(canonic, canonic + 4);
  117. TCharDelimiter<const wchar16> delim(' ');
  118. TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
  119. TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim);
  120. }
  121. Y_UNIT_TEST(TestConvertToIntCharSingleDelimiter) {
  122. TString data("42 4242 -12345 0");
  123. i32 canonic[] = {42, 4242, -12345, 0};
  124. TVector<i32> good(canonic, canonic + 4);
  125. TCharDelimiter<const char> delim(' ');
  126. TestDelimiterOnString<TContainerConvertingConsumer>(good, data.data(), delim);
  127. TestDelimiterOnRange<TContainerConvertingConsumer>(good, data.data(), data.end(), delim);
  128. }
  129. Y_UNIT_TEST(TestCharSkipEmpty) {
  130. TString data("qw ab qwabcab ");
  131. TString canonic[] = {"qw", "ab", "qwabcab"};
  132. TVector<TString> good(canonic, canonic + 3);
  133. TestConsumerOnString<TSkipEmptyTokens<TStrokaConsumer>>(good, data.data(), " ");
  134. TestConsumerOnRange<TSkipEmptyTokens<TStrokaConsumer>>(good, data.data(), data.end(), " ");
  135. }
  136. Y_UNIT_TEST(TestCharKeepDelimiters) {
  137. TString data("qw ab qwabcab ");
  138. TString canonic[] = {"qw", " ", "ab", " ", "", " ", "qwabcab", " ", ""};
  139. TVector<TString> good(canonic, canonic + 9);
  140. TestConsumerOnString<TKeepDelimiters<TStrokaConsumer>>(good, data.data(), " ");
  141. TestConsumerOnRange<TKeepDelimiters<TStrokaConsumer>>(good, data.data(), data.end(), " ");
  142. }
  143. Y_UNIT_TEST(TestCharLimit) {
  144. TString data("qw ab qwabcab ");
  145. TString canonic[] = {"qw", "ab"};
  146. TVector<TString> good(canonic, canonic + 2);
  147. TestLimitingConsumerOnString(good, data.data(), " ", 3, " qwabcab ");
  148. TestLimitingConsumerOnRange(good, data.data(), data.end(), " ", 3, " qwabcab ");
  149. }
  150. Y_UNIT_TEST(TestCharStringDelimiter) {
  151. TString data("qw ab qwababcab");
  152. TString canonic[] = {"qw ", " qw", "", "c", ""};
  153. TVector<TString> good(canonic, canonic + 5);
  154. TStringDelimiter<const char> delim("ab");
  155. TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
  156. TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim);
  157. }
  158. Y_UNIT_TEST(TestWideStringDelimiter) {
  159. TUtf16String data(u"qw ab qwababcab");
  160. TUtf16String canonic[] = {u"qw ", u" qw", TUtf16String(), u"c", TUtf16String()};
  161. TVector<TUtf16String> good(canonic, canonic + 5);
  162. TUtf16String wideDelim(u"ab");
  163. TStringDelimiter<const wchar16> delim(wideDelim.data());
  164. TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
  165. TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim);
  166. }
  167. Y_UNIT_TEST(TestCharSetDelimiter) {
  168. TString data("qw ab qwababccab");
  169. TString canonic[] = {"q", " ab q", "abab", "", "ab"};
  170. TVector<TString> good(canonic, canonic + 5);
  171. TSetDelimiter<const char> delim("wc");
  172. TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
  173. TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim);
  174. }
  175. Y_UNIT_TEST(TestWideSetDelimiter) {
  176. TUtf16String data(u"qw ab qwababccab");
  177. TUtf16String canonic[] = {u"q", u" ab q", u"abab", TUtf16String(), u"ab"};
  178. TVector<TUtf16String> good(canonic, canonic + 5);
  179. TUtf16String wideDelim(u"wc");
  180. TSetDelimiter<const wchar16> delim(wideDelim.data());
  181. TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
  182. }
  183. Y_UNIT_TEST(TestWideSetDelimiterRange) {
  184. TUtf16String data(u"qw ab qwababccab");
  185. TUtf16String canonic[] = {u"q", u" ab q", u"abab", TUtf16String(), u"ab"};
  186. TVector<TUtf16String> good(1);
  187. TUtf16String wideDelim(u"wc");
  188. TSetDelimiter<const wchar16> delim(wideDelim.data());
  189. TVector<TUtf16String> test;
  190. TContainerConsumer<TVector<TUtf16String>> consumer(&test);
  191. SplitString(data.data(), data.data(), delim, consumer); // Empty string is still inserted into consumer
  192. Cmp(good, test);
  193. good.assign(canonic, canonic + 4);
  194. good.push_back(TUtf16String());
  195. test.clear();
  196. SplitString(data.data(), data.end() - 2, delim, consumer);
  197. Cmp(good, test);
  198. }
  199. Y_UNIT_TEST(TestSplit) {
  200. TString data("qw ab qwababcba");
  201. TString canonic[] = {"qw ", " qw", "c"};
  202. TVector<TString> good(canonic, canonic + 3);
  203. TString delim = "ab";
  204. TVector<TString> test;
  205. Split(data, delim, test);
  206. Cmp(good, test);
  207. TVector<TStringBuf> test1;
  208. Split(data, delim.data(), test1);
  209. Cmp(good, test1);
  210. }
  211. Y_UNIT_TEST(ConvenientSplitTest) {
  212. TString data("abc 22 33.5 xyz");
  213. TString str;
  214. int num1 = 0;
  215. double num2 = 0;
  216. TStringBuf strBuf;
  217. Split(data, ' ', str, num1, num2, strBuf);
  218. UNIT_ASSERT_VALUES_EQUAL(str, "abc");
  219. UNIT_ASSERT_VALUES_EQUAL(num1, 22);
  220. UNIT_ASSERT_VALUES_EQUAL(num2, 33.5);
  221. UNIT_ASSERT_VALUES_EQUAL(strBuf, "xyz");
  222. }
  223. Y_UNIT_TEST(ConvenientSplitTestWithMaybe) {
  224. TString data("abc 42");
  225. TString str;
  226. TMaybe<double> num2 = 1;
  227. TMaybe<double> maybe = 1;
  228. Split(data, ' ', str, num2, maybe);
  229. UNIT_ASSERT_VALUES_EQUAL(str, "abc");
  230. UNIT_ASSERT_VALUES_EQUAL(*num2, 42);
  231. UNIT_ASSERT(!maybe);
  232. }
  233. Y_UNIT_TEST(ConvenientSplitTestExceptions) {
  234. TString data("abc 22 33");
  235. TString s1, s2, s3, s4;
  236. UNIT_ASSERT_EXCEPTION(Split(data, ' ', s1, s2), yexception);
  237. UNIT_ASSERT_NO_EXCEPTION(Split(data, ' ', s1, s2, s3));
  238. UNIT_ASSERT_EXCEPTION(Split(data, ' ', s1, s2, s3, s4), yexception);
  239. }
  240. Y_UNIT_TEST(ConvenientSplitTestMaybeExceptions) {
  241. TString data("abc 22 33");
  242. TString s1, s2;
  243. TMaybe<TString> m1, m2;
  244. UNIT_ASSERT_EXCEPTION(Split(data, ' ', s1, m1), yexception);
  245. UNIT_ASSERT_EXCEPTION(Split(data, ' ', m1, m2), yexception);
  246. UNIT_ASSERT_NO_EXCEPTION(Split(data, ' ', s1, s2, m1));
  247. UNIT_ASSERT_NO_EXCEPTION(Split(data, ' ', s1, s2, m1, m2));
  248. UNIT_ASSERT_EXCEPTION(Split(data, ' ', m1, m2, s1, s2), yexception);
  249. UNIT_ASSERT_NO_EXCEPTION(Split(data, ' ', s1, s2, m1, m2, m1, m1, m1, m1));
  250. UNIT_ASSERT_EXCEPTION(Split(data, ' ', s1, s2, m1, m2, m1, m1, m1, m1, s1), yexception);
  251. }
  252. } // Y_UNIT_TEST_SUITE(SplitStringTest)
  253. template <typename I, typename C>
  254. void TestStringSplitterCount(I* str, C delim, size_t good) {
  255. auto split = StringSplitter(str).Split(delim);
  256. size_t res = split.Count();
  257. UNIT_ASSERT_VALUES_EQUAL(res, good);
  258. res = split.Count();
  259. UNIT_ASSERT_VALUES_EQUAL(res, 0);
  260. }
  261. Y_UNIT_TEST_SUITE(StringSplitter) {
  262. Y_UNIT_TEST(TestSplit) {
  263. int sum = 0;
  264. for (const auto& it : StringSplitter("1,2,3").Split(',')) {
  265. sum += FromString<int>(it.Token());
  266. }
  267. UNIT_ASSERT_VALUES_EQUAL(sum, 6);
  268. }
  269. Y_UNIT_TEST(TestSplit1) {
  270. int cnt = 0;
  271. for (const auto& it : StringSplitter(" ").Split(' ')) {
  272. (void)it;
  273. ++cnt;
  274. }
  275. UNIT_ASSERT_VALUES_EQUAL(cnt, 2);
  276. }
  277. Y_UNIT_TEST(TestSplitLimited) {
  278. TVector<TString> expected = {"1", "2", "3,4,5"};
  279. TVector<TString> actual = StringSplitter("1,2,3,4,5").Split(',').Limit(3).ToList<TString>();
  280. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  281. }
  282. Y_UNIT_TEST(TestSplitLimitedWithEmptySkip) {
  283. TVector<TString> expected = {"1", "2", "3,4,5"};
  284. TVector<TString> actual = StringSplitter("1,,,2,,,,3,4,5").Split(',').SkipEmpty().Limit(3).ToList<TString>();
  285. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  286. expected = {"1", "2", ",,,3,4,5"};
  287. actual = StringSplitter("1,2,,,,3,4,5").Split(',').Limit(3).SkipEmpty().ToList<TString>();
  288. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  289. }
  290. Y_UNIT_TEST(TestSplitBySet) {
  291. int sum = 0;
  292. for (const auto& it : StringSplitter("1,2:3").SplitBySet(",:")) {
  293. sum += FromString<int>(it.Token());
  294. }
  295. UNIT_ASSERT_VALUES_EQUAL(sum, 6);
  296. }
  297. Y_UNIT_TEST(TestSplitBySetLimited) {
  298. TVector<TString> expected = {"1", "2", "3,4:5"};
  299. TVector<TString> actual = StringSplitter("1,2:3,4:5").SplitBySet(",:").Limit(3).ToList<TString>();
  300. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  301. }
  302. Y_UNIT_TEST(TestSplitBySetLimitedWithEmptySkip) {
  303. TVector<TString> expected = {"1", "2", "3,4:5"};
  304. TVector<TString> actual = StringSplitter("1,:,2::::,3,4:5").SplitBySet(",:").SkipEmpty().Limit(3).ToList<TString>();
  305. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  306. expected = {"1", ",2::::,3,4:5"};
  307. actual = StringSplitter("1,:,2::::,3,4:5").SplitBySet(",:").Limit(3).SkipEmpty().ToList<TString>();
  308. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  309. }
  310. Y_UNIT_TEST(TestSplitByString) {
  311. int sum = 0;
  312. for (const auto& it : StringSplitter("1ab2ab3").SplitByString("ab")) {
  313. sum += FromString<int>(it.Token());
  314. }
  315. UNIT_ASSERT_VALUES_EQUAL(sum, 6);
  316. }
  317. Y_UNIT_TEST(TestSplitByStringLimited) {
  318. TVector<TString> expected = {"1", "2", "3ab4ab5"};
  319. TVector<TString> actual = StringSplitter("1ab2ab3ab4ab5").SplitByString("ab").Limit(3).ToList<TString>();
  320. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  321. }
  322. Y_UNIT_TEST(TestSplitByStringLimitedWithEmptySkip) {
  323. TVector<TString> expected = {"1", "2", "3ab4ab5"};
  324. TVector<TString> actual = StringSplitter("1abab2ababababab3ab4ab5").SplitByString("ab").SkipEmpty().Limit(3).ToList<TString>();
  325. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  326. }
  327. Y_UNIT_TEST(TestSplitByFunc) {
  328. TString s = "123 456 \t\n789\n10\t 20";
  329. TVector<TString> pattern = {"123", "456", "789", "10", "20"};
  330. TVector<TString> tokens;
  331. auto f = [](char a) { return a == ' ' || a == '\t' || a == '\n'; };
  332. for (auto v : StringSplitter(s).SplitByFunc(f)) {
  333. if (v) {
  334. tokens.emplace_back(v);
  335. }
  336. }
  337. UNIT_ASSERT(tokens == pattern);
  338. }
  339. Y_UNIT_TEST(TestSplitByFuncLimited) {
  340. TVector<TString> expected = {"1", "2", "3a4b5"};
  341. auto f = [](char a) { return a == 'a' || a == 'b'; };
  342. TVector<TString> actual = StringSplitter("1a2b3a4b5").SplitByFunc(f).Limit(3).ToList<TString>();
  343. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  344. }
  345. Y_UNIT_TEST(TestSplitByFuncLimitedWithEmptySkip) {
  346. TVector<TString> expected = {"1", "2", "3a4b5"};
  347. auto f = [](char a) { return a == 'a' || a == 'b'; };
  348. TVector<TString> actual = StringSplitter("1aaba2bbababa3a4b5").SplitByFunc(f).SkipEmpty().Limit(3).Take(3).ToList<TString>();
  349. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  350. }
  351. Y_UNIT_TEST(TestSkipEmpty) {
  352. int sum = 0;
  353. for (const auto& it : StringSplitter(" 1 2 3 ").Split(' ').SkipEmpty()) {
  354. sum += FromString<int>(it.Token());
  355. }
  356. UNIT_ASSERT_VALUES_EQUAL(sum, 6);
  357. // double
  358. sum = 0;
  359. for (const auto& it : StringSplitter(" 1 2 3 ").Split(' ').SkipEmpty().SkipEmpty()) {
  360. sum += FromString<int>(it.Token());
  361. }
  362. UNIT_ASSERT_VALUES_EQUAL(sum, 6);
  363. TString ssum;
  364. for (const auto& it : StringSplitter(" 1 2 3 " + std::string(100, ' ')).Split(' ').SkipEmpty()) {
  365. ssum += FromString<TString>(it.Token());
  366. ssum += ';';
  367. }
  368. UNIT_ASSERT_VALUES_EQUAL(ssum, "1;2;3;");
  369. }
  370. Y_UNIT_TEST(TestTake) {
  371. TVector<TString> expected = {"1", "2", "3"};
  372. UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter("1 2 3 4 5 6 7 8 9 10").Split(' ').Take(3).ToList<TString>());
  373. expected = {"1", "2"};
  374. UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter(" 1 2 3 ").Split(' ').SkipEmpty().Take(2).ToList<TString>());
  375. expected = {"1", "2", "3"};
  376. UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter("1 2 3 4 5 6 7 8 9 10").Split(' ').Take(5).Take(3).ToList<TString>());
  377. UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter("1 2 3 4 5 6 7 8 9 10").Split(' ').Take(3).Take(5).ToList<TString>());
  378. expected = {"1", "2"};
  379. UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter(" 1 2 3 ").Split(' ').Take(4).SkipEmpty().ToList<TString>());
  380. expected = {"1"};
  381. UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter(" 1 2 3 ").Split(' ').Take(4).SkipEmpty().Take(1).ToList<TString>());
  382. }
  383. Y_UNIT_TEST(TestCompile) {
  384. (void)StringSplitter(TString());
  385. (void)StringSplitter(TStringBuf());
  386. (void)StringSplitter("", 0);
  387. }
  388. Y_UNIT_TEST(TestStringSplitterCountEmpty) {
  389. TCharDelimiter<const char> delim(' ');
  390. TestStringSplitterCount("", delim, 1);
  391. }
  392. Y_UNIT_TEST(TestStringSplitterCountOne) {
  393. TCharDelimiter<const char> delim(' ');
  394. TestStringSplitterCount("one", delim, 1);
  395. }
  396. Y_UNIT_TEST(TestStringSplitterCountWithOneDelimiter) {
  397. TCharDelimiter<const char> delim(' ');
  398. TestStringSplitterCount("one two", delim, 2);
  399. }
  400. Y_UNIT_TEST(TestStringSplitterCountWithTrailing) {
  401. TCharDelimiter<const char> delim(' ');
  402. TestStringSplitterCount(" one ", delim, 3);
  403. }
  404. Y_UNIT_TEST(TestStringSplitterConsume) {
  405. TVector<TString> expected = {"1", "2", "3"};
  406. TVector<TString> actual;
  407. auto func = [&actual](const TBasicStringBuf<char>& token) {
  408. actual.push_back(TString(token));
  409. };
  410. StringSplitter("1 2 3").Split(' ').Consume(func);
  411. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  412. }
  413. Y_UNIT_TEST(TestStringSplitterConsumeConditional) {
  414. TVector<TString> expected = {"1", "2"};
  415. TVector<TString> actual;
  416. auto func = [&actual](const TBasicStringBuf<char>& token) {
  417. if (token == "3") {
  418. return false;
  419. }
  420. actual.push_back(TString(token));
  421. return true;
  422. };
  423. bool completed = StringSplitter("1 2 3 4 5").Split(' ').Consume(func);
  424. UNIT_ASSERT(!completed);
  425. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  426. }
  427. Y_UNIT_TEST(TestStringSplitterToList) {
  428. TVector<TString> expected = {"1", "2", "3"};
  429. TVector<TString> actual = StringSplitter("1 2 3").Split(' ').ToList<TString>();
  430. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  431. }
  432. Y_UNIT_TEST(TestStringSplitterCollectPushBack) {
  433. TVector<TString> expected = {"1", "2", "3"};
  434. TVector<TString> actual;
  435. StringSplitter("1 2 3").Split(' ').Collect(&actual);
  436. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  437. }
  438. Y_UNIT_TEST(TestStringSplitterCollectInsert) {
  439. TSet<TString> expected = {"1", "2", "3"};
  440. TSet<TString> actual;
  441. StringSplitter("1 2 3 1 2 3").Split(' ').Collect(&actual);
  442. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  443. }
  444. Y_UNIT_TEST(TestStringSplitterCollectClears) {
  445. TVector<TString> v;
  446. StringSplitter("1 2 3").Split(' ').Collect(&v);
  447. UNIT_ASSERT_VALUES_EQUAL(v.size(), 3);
  448. StringSplitter("4 5").Split(' ').Collect(&v);
  449. UNIT_ASSERT_VALUES_EQUAL(v.size(), 2);
  450. }
  451. Y_UNIT_TEST(TestStringSplitterAddToDoesntClear) {
  452. TVector<TString> v;
  453. StringSplitter("1 2 3").Split(' ').AddTo(&v);
  454. UNIT_ASSERT_VALUES_EQUAL(v.size(), 3);
  455. StringSplitter("4 5").Split(' ').AddTo(&v);
  456. UNIT_ASSERT_VALUES_EQUAL(v.size(), 5);
  457. }
  458. Y_UNIT_TEST(TestSplitStringInto) {
  459. int a = -1;
  460. TStringBuf s;
  461. double d = -1;
  462. StringSplitter("2 substr 1.02").Split(' ').CollectInto(&a, &s, &d);
  463. UNIT_ASSERT_VALUES_EQUAL(a, 2);
  464. UNIT_ASSERT_VALUES_EQUAL(s, "substr");
  465. UNIT_ASSERT_DOUBLES_EQUAL(d, 1.02, 0.0001);
  466. UNIT_ASSERT_EXCEPTION(StringSplitter("1").Split(' ').CollectInto(&a, &a), yexception);
  467. UNIT_ASSERT_EXCEPTION(StringSplitter("1 2 3").Split(' ').CollectInto(&a, &a), yexception);
  468. }
  469. Y_UNIT_TEST(TestSplitStringWithIgnore) {
  470. TStringBuf s;
  471. StringSplitter("x y z").Split(' ').CollectInto(&std::ignore, &s, &std::ignore);
  472. UNIT_ASSERT_VALUES_EQUAL(s, "y");
  473. UNIT_ASSERT_EXCEPTION(StringSplitter("ignored != non-requred").Split(':').CollectInto(&s, &std::ignore), yexception);
  474. }
  475. Y_UNIT_TEST(TestTryCollectInto) {
  476. int a, b, c;
  477. bool parsingSucceeded;
  478. parsingSucceeded = StringSplitter("100,500,3").Split(',').TryCollectInto(&a, &b, &c);
  479. UNIT_ASSERT(parsingSucceeded);
  480. UNIT_ASSERT_VALUES_EQUAL(a, 100);
  481. UNIT_ASSERT_VALUES_EQUAL(b, 500);
  482. UNIT_ASSERT_VALUES_EQUAL(c, 3);
  483. // not enough tokens
  484. parsingSucceeded = StringSplitter("3,14").Split(',').TryCollectInto(&a, &b, &c);
  485. UNIT_ASSERT(!parsingSucceeded);
  486. // too many tokens
  487. parsingSucceeded = StringSplitter("3,14,15,92,6").Split(',').TryCollectInto(&a, &b, &c);
  488. UNIT_ASSERT(!parsingSucceeded);
  489. // where single TryFromString fails
  490. parsingSucceeded = StringSplitter("ot topota kopyt pyl po polu letit").Split(' ').TryCollectInto(&a, &b, &c);
  491. UNIT_ASSERT(!parsingSucceeded);
  492. }
  493. Y_UNIT_TEST(TestOwningSplit1) {
  494. int sum = 0;
  495. for (const auto& it : StringSplitter(TString("1,2,3")).Split(',')) {
  496. sum += FromString<int>(it.Token());
  497. }
  498. UNIT_ASSERT_VALUES_EQUAL(sum, 6);
  499. }
  500. Y_UNIT_TEST(TestOwningSplit2) {
  501. int sum = 0;
  502. TString str("1,2,3");
  503. for (const auto& it : StringSplitter(str).Split(',')) {
  504. sum += FromString<int>(it.Token());
  505. }
  506. UNIT_ASSERT_VALUES_EQUAL(sum, 6);
  507. }
  508. Y_UNIT_TEST(TestOwningSplit3) {
  509. int sum = 0;
  510. const TString str("1,2,3");
  511. for (const auto& it : StringSplitter(str).Split(',')) {
  512. sum += FromString<int>(it.Token());
  513. }
  514. UNIT_ASSERT_VALUES_EQUAL(sum, 6);
  515. }
  516. Y_UNIT_TEST(TestAssigment) {
  517. TVector<TString> expected0 = {"1", "2", "3", "4"};
  518. TVector<TString> actual0 = StringSplitter("1 2 3 4").Split(' ');
  519. UNIT_ASSERT_VALUES_EQUAL(expected0, actual0);
  520. TSet<TString> expected1 = {"11", "22", "33", "44"};
  521. TSet<TString> actual1 = StringSplitter("11 22 33 44").Split(' ');
  522. UNIT_ASSERT_VALUES_EQUAL(expected1, actual1);
  523. TSet<TString> expected2 = {"11", "aa"};
  524. auto actual2 = static_cast<TSet<TString>>(StringSplitter("11 aa 11 11 aa").Split(' '));
  525. UNIT_ASSERT_VALUES_EQUAL(expected2, actual2);
  526. TVector<TString> expected3 = {"dd", "bb"};
  527. auto actual3 = TVector<TString>(StringSplitter("dd\tbb").Split('\t'));
  528. UNIT_ASSERT_VALUES_EQUAL(expected3, actual3);
  529. }
  530. Y_UNIT_TEST(TestRangeBasedFor) {
  531. TVector<TString> actual0 = {"11", "22", "33", "44"};
  532. size_t num = 0;
  533. for (TStringBuf elem : StringSplitter("11 22 33 44").Split(' ')) {
  534. UNIT_ASSERT_VALUES_EQUAL(elem, actual0[num++]);
  535. }
  536. TVector<TString> actual1 = {"another", "one,", "and", "another", "one"};
  537. num = 0;
  538. for (TStringBuf elem : StringSplitter(TStringBuf("another one, and \n\n another one")).SplitBySet(" \n").SkipEmpty()) {
  539. UNIT_ASSERT_VALUES_EQUAL(elem, actual1[num++]);
  540. }
  541. TVector<TUtf16String> actual2 = {u"привет,", u"как", u"дела"};
  542. num = 0;
  543. for (TWtringBuf elem : StringSplitter(u"привет, как дела").Split(wchar16(' '))) {
  544. UNIT_ASSERT_VALUES_EQUAL(elem, actual2[num++]);
  545. }
  546. TVector<TString> copy(4);
  547. auto v = StringSplitter("11 22 33 44").Split(' ');
  548. Copy(v.begin(), v.end(), copy.begin());
  549. UNIT_ASSERT_VALUES_EQUAL(actual0, copy);
  550. }
  551. Y_UNIT_TEST(TestParseInto) {
  552. TVector<int> actual0 = {1, 2, 3, 4};
  553. TVector<int> answer0;
  554. StringSplitter("1 2 3 4").Split(' ').ParseInto(&answer0);
  555. UNIT_ASSERT_VALUES_EQUAL(actual0, answer0);
  556. TVector<int> actual1 = {42, 1, 2, 3, 4};
  557. TVector<int> answer1 = {42};
  558. StringSplitter("1 2 3 4").Split(' ').ParseInto(&answer1);
  559. UNIT_ASSERT_VALUES_EQUAL(actual1, answer1);
  560. answer1.clear();
  561. UNIT_ASSERT_EXCEPTION(StringSplitter("1 2 3 4").Split(' ').ParseInto(&answer1), yexception);
  562. answer1 = {42};
  563. StringSplitter(" 1 2 3 4").Split(' ').SkipEmpty().ParseInto(&answer1);
  564. UNIT_ASSERT_VALUES_EQUAL(actual1, answer1);
  565. answer1.clear();
  566. StringSplitter(" \n 1 2 \n\n\n 3 4\n ").SplitBySet(" \n").SkipEmpty().ParseInto(&answer1);
  567. UNIT_ASSERT_VALUES_EQUAL(actual0, answer1);
  568. }
  569. Y_UNIT_TEST(TestStdString) {
  570. std::vector<std::string_view> r0, r1, answer = {"lol", "zomg"};
  571. std::string s = "lol zomg";
  572. for (std::string_view ss : StringSplitter(s).Split(' ')) {
  573. r0.push_back(ss);
  574. }
  575. StringSplitter(s).Split(' ').Collect(&r1);
  576. UNIT_ASSERT_VALUES_EQUAL(r0, answer);
  577. UNIT_ASSERT_VALUES_EQUAL(r1, answer);
  578. }
  579. Y_UNIT_TEST(TestStdStringView) {
  580. std::string_view s = "aaacccbbb";
  581. std::vector<std::string_view> expected = {"aaa", "bbb"};
  582. std::vector<std::string_view> actual = StringSplitter(s).SplitByString("ccc");
  583. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  584. }
  585. Y_UNIT_TEST(TestStdSplitAfterSplit) {
  586. std::string_view input = "a*b+a*b";
  587. for (std::string_view summand : StringSplitter(input).Split('+')) {
  588. // FIXME: std::string is used to workaround MSVC ICE
  589. UNIT_ASSERT_VALUES_EQUAL(std::string(summand), "a*b");
  590. std::string_view multiplier1, multiplier2;
  591. bool splitResult = StringSplitter(summand).Split('*').TryCollectInto(&multiplier1, &multiplier2);
  592. UNIT_ASSERT(splitResult);
  593. UNIT_ASSERT_VALUES_EQUAL(std::string(multiplier1), "a");
  594. UNIT_ASSERT_VALUES_EQUAL(std::string(multiplier2), "b");
  595. }
  596. }
  597. Y_UNIT_TEST(TestStdSplitWithParsing) {
  598. std::string_view input = "1,2,3,4";
  599. TVector<ui64> numbers;
  600. const TVector<ui64> expected{1, 2, 3, 4};
  601. StringSplitter(input).Split(',').ParseInto(&numbers);
  602. UNIT_ASSERT_VALUES_EQUAL(numbers, expected);
  603. }
  604. Y_UNIT_TEST(TestArcadiaStdInterop) {
  605. TVector<TString> expected0 = {"a", "b"};
  606. TVector<TStringBuf> expected1 = {"a", "b"};
  607. std::string src1("a b");
  608. std::string_view src2("a b");
  609. TVector<TString> actual0 = StringSplitter(src1).Split(' ').SkipEmpty();
  610. TVector<TString> actual1 = StringSplitter(src2).Split(' ').SkipEmpty();
  611. TVector<TStringBuf> actual2 = StringSplitter(src1).Split(' ').SkipEmpty();
  612. TVector<TStringBuf> actual3 = StringSplitter(src2).Split(' ').SkipEmpty();
  613. UNIT_ASSERT_VALUES_EQUAL(expected0, actual0);
  614. UNIT_ASSERT_VALUES_EQUAL(expected0, actual1);
  615. UNIT_ASSERT_VALUES_EQUAL(expected1, actual2);
  616. UNIT_ASSERT_VALUES_EQUAL(expected1, actual3);
  617. }
  618. Y_UNIT_TEST(TesIterationAfterMove) {
  619. const TString src = TString::Join(
  620. "aaa",
  621. TString(250, 'c'),
  622. "bbb",
  623. "aaa",
  624. TString(250, 'c'),
  625. "bbb");
  626. auto s1 = StringSplitter(std::string(src)).SplitByString("c").SkipEmpty();
  627. {
  628. auto s2 = std::move(s1);
  629. const TVector<TString> expected2 = {"aaa", "bbbaaa", "bbb"};
  630. const auto result2 = s2.ToList<TString>();
  631. UNIT_ASSERT_VALUES_EQUAL(result2, expected2);
  632. }
  633. const auto result1 = s1.ToList<TString>();
  634. Y_UNUSED(result1); // valid but unspecified value
  635. }
  636. Y_UNIT_TEST(TestConstCString) {
  637. const char* b = "a;b";
  638. const char* e = b + 3;
  639. std::vector<TStringBuf> v;
  640. StringSplitter(b, e).Split(';').AddTo(&v);
  641. std::vector<TStringBuf> expected = {"a", "b"};
  642. UNIT_ASSERT_VALUES_EQUAL(v, expected);
  643. }
  644. Y_UNIT_TEST(TestCStringRef) {
  645. TString s = "lol";
  646. char* str = s.Detach();
  647. std::vector<TStringBuf> v = StringSplitter(str).Split('o');
  648. std::vector<TStringBuf> expected = {"l", "l"};
  649. UNIT_ASSERT_VALUES_EQUAL(v, expected);
  650. }
  651. Y_UNIT_TEST(TestSplitVector) {
  652. std::vector<char> buffer = {'a', ';', 'b'};
  653. std::vector<TStringBuf> v = StringSplitter(buffer).Split(';');
  654. std::vector<TStringBuf> expected = {"a", "b"};
  655. UNIT_ASSERT_VALUES_EQUAL(v, expected);
  656. }
  657. class TDoubleIterator {
  658. public:
  659. using iterator_category = std::input_iterator_tag;
  660. using value_type = int;
  661. using pointer = void;
  662. using reference = int;
  663. using const_reference = int;
  664. using difference_type = ptrdiff_t;
  665. TDoubleIterator() = default;
  666. TDoubleIterator(const char* ptr)
  667. : Ptr_(ptr)
  668. {
  669. }
  670. TDoubleIterator operator++() {
  671. Ptr_ += 2;
  672. return *this;
  673. }
  674. TDoubleIterator operator++(int) {
  675. TDoubleIterator tmp = *this;
  676. ++*this;
  677. return tmp;
  678. }
  679. friend bool operator==(TDoubleIterator l, TDoubleIterator r) {
  680. return l.Ptr_ == r.Ptr_;
  681. }
  682. friend bool operator!=(TDoubleIterator l, TDoubleIterator r) {
  683. return l.Ptr_ != r.Ptr_;
  684. }
  685. int operator*() const {
  686. return (*Ptr_ - '0') * 10 + *(Ptr_ + 1) - '0';
  687. }
  688. private:
  689. const char* Ptr_ = nullptr;
  690. };
  691. Y_UNIT_TEST(TestInputIterator) {
  692. const char* beg = "1213002233000011";
  693. const char* end = beg + strlen(beg);
  694. std::vector<std::vector<int>> expected = {{12, 13}, {22, 33}, {}, {11}};
  695. int i = 0;
  696. for (TIteratorRange<TDoubleIterator> part : StringSplitter(TDoubleIterator(beg), TDoubleIterator(end)).SplitByFunc([](int value) { return value == 0; })) {
  697. UNIT_ASSERT(std::equal(part.begin(), part.end(), expected[i].begin(), expected[i].end()));
  698. i++;
  699. }
  700. UNIT_ASSERT_VALUES_EQUAL(i, expected.size());
  701. }
  702. } // Y_UNIT_TEST_SUITE(StringSplitter)