strip_ut.cpp 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. #include "strip.h"
  2. #include <library/cpp/testing/unittest/registar.h>
  3. #include <util/charset/wide.h>
  4. Y_UNIT_TEST_SUITE(TStripStringTest) {
  5. struct TStripTest {
  6. TStringBuf Str;
  7. TStringBuf StripLeftRes;
  8. TStringBuf StripRightRes;
  9. TStringBuf StripRes;
  10. };
  11. static constexpr TStripTest StripTests[] = {
  12. {" 012 ", "012 ", " 012", "012"},
  13. {" 012", "012", " 012", "012"},
  14. {"012\t\t", "012\t\t", "012", "012"},
  15. {"\t012\t", "012\t", "\t012", "012"},
  16. {"012", "012", "012", "012"},
  17. {"012\r\n", "012\r\n", "012", "012"},
  18. {"\n012\r", "012\r", "\n012", "012"},
  19. {"\n \t\r", "", "", ""},
  20. {"", "", "", ""},
  21. {"abc", "abc", "abc", "abc"},
  22. {" abc ", "abc ", " abc", "abc"},
  23. {"a c", "a c", "a c", "a c"},
  24. {" long string to avoid SSO \n", "long string to avoid SSO \n", " long string to avoid SSO", "long string to avoid SSO"},
  25. {" набор не-ascii букв ", "набор не-ascii букв ", " набор не-ascii букв", "набор не-ascii букв"},
  26. // Russian "х" ends with \x85, whis is a space character in some encodings.
  27. {"последней буквой идет х ", "последней буквой идет х ", "последней буквой идет х", "последней буквой идет х"},
  28. };
  29. Y_UNIT_TEST(TestStrip) {
  30. for (const auto& test : StripTests) {
  31. TString inputStr(test.Str);
  32. TString s;
  33. Strip(inputStr, s);
  34. UNIT_ASSERT_VALUES_EQUAL(s, test.StripRes);
  35. UNIT_ASSERT_VALUES_EQUAL(StripString(inputStr), test.StripRes);
  36. UNIT_ASSERT_VALUES_EQUAL(StripStringLeft(inputStr), test.StripLeftRes);
  37. UNIT_ASSERT_VALUES_EQUAL(StripStringRight(inputStr), test.StripRightRes);
  38. TStringBuf inputStrBuf(test.Str);
  39. UNIT_ASSERT_VALUES_EQUAL(StripString(inputStrBuf), test.StripRes);
  40. UNIT_ASSERT_VALUES_EQUAL(StripStringLeft(inputStrBuf), test.StripLeftRes);
  41. UNIT_ASSERT_VALUES_EQUAL(StripStringRight(inputStrBuf), test.StripRightRes);
  42. };
  43. }
  44. Y_UNIT_TEST(TestStripInPlace) {
  45. // On Darwin default locale is set to a value which interprets certain cyrillic utf-8 sequences as spaces.
  46. // Which we do not use ::isspace and only strip ASCII spaces, we want to ensure that this will not change in the future.
  47. std::setlocale(LC_ALL, "");
  48. for (const auto& test : StripTests) {
  49. TString str(test.Str);
  50. Y_ASSERT(str.IsDetached() || str.empty()); // prerequisite of the test; check that we don't try to modify shared COW-string in-place by accident
  51. const void* stringPtrPrior = str.data();
  52. StripInPlace(str);
  53. const void* stringPtrAfter = str.data();
  54. UNIT_ASSERT_VALUES_EQUAL(str, test.StripRes);
  55. if (!test.Str.empty()) {
  56. UNIT_ASSERT_EQUAL_C(stringPtrPrior, stringPtrAfter, TString(test.Str).Quote()); // StripInPlace should reuse buffer of original string
  57. }
  58. }
  59. }
  60. Y_UNIT_TEST(TestCustomStrip) {
  61. struct TTest {
  62. const char* Str;
  63. const char* Result;
  64. };
  65. static const TTest tests[] = {
  66. {"//012//", "012"},
  67. {"//012", "012"},
  68. {"012", "012"},
  69. {"012//", "012"},
  70. };
  71. for (auto test : tests) {
  72. UNIT_ASSERT_EQUAL(
  73. StripString(TString(test.Str), EqualsStripAdapter('/')),
  74. test.Result);
  75. };
  76. }
  77. Y_UNIT_TEST(TestCustomStripLeftRight) {
  78. struct TTest {
  79. const char* Str;
  80. const char* ResultLeft;
  81. const char* ResultRight;
  82. };
  83. static const TTest tests[] = {
  84. {"//012//", "012//", "//012"},
  85. {"//012", "012", "//012"},
  86. {"012", "012", "012"},
  87. {"012//", "012//", "012"},
  88. };
  89. for (const auto& test : tests) {
  90. UNIT_ASSERT_EQUAL(
  91. StripStringLeft(TString(test.Str), EqualsStripAdapter('/')),
  92. test.ResultLeft);
  93. UNIT_ASSERT_EQUAL(
  94. StripStringRight(TString(test.Str), EqualsStripAdapter('/')),
  95. test.ResultRight);
  96. };
  97. }
  98. Y_UNIT_TEST(TestNullStringStrip) {
  99. TStringBuf nullString(nullptr, nullptr);
  100. UNIT_ASSERT_EQUAL(
  101. StripString(nullString),
  102. TString());
  103. }
  104. Y_UNIT_TEST(TestWtrokaStrip) {
  105. UNIT_ASSERT_EQUAL(StripString(TWtringBuf(u" abc ")), u"abc");
  106. UNIT_ASSERT_EQUAL(StripStringLeft(TWtringBuf(u" abc ")), u"abc ");
  107. UNIT_ASSERT_EQUAL(StripStringRight(TWtringBuf(u" abc ")), u" abc");
  108. }
  109. Y_UNIT_TEST(TestWtrokaCustomStrip) {
  110. UNIT_ASSERT_EQUAL(
  111. StripString(
  112. TWtringBuf(u"/abc/"),
  113. EqualsStripAdapter(u'/')),
  114. u"abc");
  115. }
  116. Y_UNIT_TEST(TestCollapseUtf32) {
  117. TUtf32String s;
  118. Collapse(UTF8ToUTF32<true>(" 123 456 "), s, IsWhitespace);
  119. UNIT_ASSERT(s == UTF8ToUTF32<true>(" 123 456 "));
  120. Collapse(UTF8ToUTF32<true>(" 123 456 "), s, IsWhitespace, 10);
  121. UNIT_ASSERT(s == UTF8ToUTF32<true>(" 123 456 "));
  122. s = UTF8ToUTF32<true>(" a b c ");
  123. TUtf32String s2 = s;
  124. CollapseInPlace(s2, IsWhitespace);
  125. UNIT_ASSERT(s == s2);
  126. #ifndef TSTRING_IS_STD_STRING
  127. UNIT_ASSERT(s.c_str() == s2.c_str()); // Collapse() does not change the string at all
  128. #endif
  129. }
  130. Y_UNIT_TEST(TestCollapseUtf16) {
  131. TUtf16String s;
  132. Collapse(UTF8ToWide<true>(" 123 456 "), s);
  133. UNIT_ASSERT(s == UTF8ToWide<true>(" 123 456 "));
  134. Collapse(UTF8ToWide<true>(" 123 456 "), s, 10);
  135. UNIT_ASSERT(s == UTF8ToWide<true>(" 123 456 "));
  136. s = UTF8ToWide<true>(" a b c ");
  137. TUtf16String s2 = s;
  138. CollapseInPlace(s2);
  139. UNIT_ASSERT(s == s2);
  140. #ifndef TSTRING_IS_STD_STRING
  141. UNIT_ASSERT(s.c_str() == s2.c_str()); // Collapse() does not change the string at all
  142. #endif
  143. }
  144. Y_UNIT_TEST(TestCollapse) {
  145. TString s;
  146. Collapse(TString(" 123 456 "), s);
  147. UNIT_ASSERT(s == " 123 456 ");
  148. Collapse(TString(" 123 456 "), s, 10);
  149. UNIT_ASSERT(s == " 123 456 ");
  150. s = TString(" a b c ");
  151. TString s2 = s;
  152. CollapseInPlace(s2);
  153. UNIT_ASSERT(s == s2);
  154. #ifndef TSTRING_IS_STD_STRING
  155. UNIT_ASSERT(s.c_str() == s2.c_str()); // Collapse() does not change the string at all
  156. #endif
  157. }
  158. Y_UNIT_TEST(TestCollapseText) {
  159. TString abs1("Very long description string written in unknown language.");
  160. TString abs2(abs1);
  161. TString abs3(abs1);
  162. CollapseText(abs1, 204);
  163. CollapseText(abs2, 54);
  164. CollapseText(abs3, 49);
  165. UNIT_ASSERT_EQUAL(abs1 == "Very long description string written in unknown language.", true);
  166. UNIT_ASSERT_EQUAL(abs2 == "Very long description string written in unknown ...", true);
  167. UNIT_ASSERT_EQUAL(abs3 == "Very long description string written in ...", true);
  168. }
  169. } // Y_UNIT_TEST_SUITE(TStripStringTest)