subst.cpp 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. #include "subst.h"
  2. #include <util/generic/strbuf.h>
  3. #include <util/generic/string.h>
  4. #include <util/system/compiler.h>
  5. #include <string>
  6. #include <type_traits>
  7. // a bit of template magic (to be fast and unreadable)
  8. template <class TStringType, class TTo, bool Main>
  9. static Y_FORCE_INLINE void MoveBlock(typename TStringType::value_type* ptr, size_t& srcPos, size_t& dstPos, const size_t off, const TTo to, const size_t toSize) {
  10. const size_t unchangedSize = off - srcPos;
  11. if (dstPos < srcPos) {
  12. for (size_t i = 0; i < unchangedSize; ++i) {
  13. ptr[dstPos++] = ptr[srcPos++];
  14. }
  15. } else {
  16. dstPos += unchangedSize;
  17. srcPos += unchangedSize;
  18. }
  19. if (Main) {
  20. for (size_t i = 0; i < toSize; ++i) {
  21. ptr[dstPos++] = to[i];
  22. }
  23. }
  24. }
  25. template <typename T, typename U>
  26. static bool IsIntersect(const T& a, const U& b) noexcept {
  27. if (b.data() < a.data()) {
  28. return IsIntersect(b, a);
  29. }
  30. return !a.empty() && !b.empty() &&
  31. ((a.data() <= b.data() && b.data() < a.data() + a.size()) ||
  32. (a.data() < b.data() + b.size() && b.data() + b.size() <= a.data() + a.size()));
  33. }
  34. /**
  35. * Replaces all occurences of substring @c from in string @c s to string @c to.
  36. * Uses two separate implementations (inplace for shrink and append for grow case)
  37. * See IGNIETFERRO-394
  38. **/
  39. template <class TStringType, typename TStringViewType = TBasicStringBuf<typename TStringType::value_type>>
  40. static inline size_t SubstGlobalImpl(TStringType& s, const TStringViewType from, const TStringViewType to, size_t fromPos = 0) {
  41. if (from.empty()) {
  42. return 0;
  43. }
  44. Y_ASSERT(!IsIntersect(s, from));
  45. Y_ASSERT(!IsIntersect(s, to));
  46. const size_t fromSize = from.size();
  47. const size_t toSize = to.size();
  48. size_t replacementsCount = 0;
  49. size_t off = fromPos;
  50. size_t srcPos = 0;
  51. if (toSize > fromSize) {
  52. // string will grow: append to another string
  53. TStringType result;
  54. for (; (off = TStringViewType(s).find(from, off)) != TStringType::npos; off += fromSize) {
  55. if (!replacementsCount) {
  56. // first replacement occured, we can prepare result string
  57. result.reserve(s.size() + s.size() / 3);
  58. }
  59. result.append(s.begin() + srcPos, s.begin() + off);
  60. result.append(to.data(), to.size());
  61. srcPos = off + fromSize;
  62. ++replacementsCount;
  63. }
  64. if (replacementsCount) {
  65. // append tail
  66. result.append(s.begin() + srcPos, s.end());
  67. s = std::move(result);
  68. }
  69. return replacementsCount;
  70. }
  71. // string will not grow: use inplace algo
  72. size_t dstPos = 0;
  73. typename TStringType::value_type* ptr = &*s.begin();
  74. for (; (off = TStringViewType(s).find(from, off)) != TStringType::npos; off += fromSize) {
  75. Y_ASSERT(dstPos <= srcPos);
  76. MoveBlock<TStringType, TStringViewType, true>(ptr, srcPos, dstPos, off, to, toSize);
  77. srcPos = off + fromSize;
  78. ++replacementsCount;
  79. }
  80. if (replacementsCount) {
  81. // append tail
  82. MoveBlock<TStringType, TStringViewType, false>(ptr, srcPos, dstPos, s.size(), to, toSize);
  83. s.resize(dstPos);
  84. }
  85. return replacementsCount;
  86. }
  87. /// Replaces all occurences of the 'from' symbol in a string to the 'to' symbol.
  88. template <class TStringType>
  89. inline size_t SubstCharGlobalImpl(TStringType& s, typename TStringType::value_type from, typename TStringType::value_type to, size_t fromPos = 0) {
  90. if (fromPos >= s.size()) {
  91. return 0;
  92. }
  93. size_t result = 0;
  94. fromPos = s.find(from, fromPos);
  95. // s.begin() might cause memory copying, so call it only if needed
  96. if (fromPos != TStringType::npos) {
  97. auto* it = &*s.begin() + fromPos;
  98. *it = to;
  99. ++result;
  100. // at this point string is copied and it's safe to use constant s.end() to iterate
  101. const auto* const sEnd = &*s.end();
  102. // unrolled loop goes first because it is more likely that `it` will be properly aligned
  103. for (const auto* const end = sEnd - (sEnd - it) % 4; it < end;) {
  104. if (*it == from) {
  105. *it = to;
  106. ++result;
  107. }
  108. ++it;
  109. if (*it == from) {
  110. *it = to;
  111. ++result;
  112. }
  113. ++it;
  114. if (*it == from) {
  115. *it = to;
  116. ++result;
  117. }
  118. ++it;
  119. if (*it == from) {
  120. *it = to;
  121. ++result;
  122. }
  123. ++it;
  124. }
  125. for (; it < sEnd; ++it) {
  126. if (*it == from) {
  127. *it = to;
  128. ++result;
  129. }
  130. }
  131. }
  132. return result;
  133. }
  134. /* Standard says that `char16_t` is a distinct type and has same size, signedness and alignment as
  135. * `std::uint_least16_t`, so we check if `char16_t` has same signedness and size as `wchar16` to be
  136. * sure that we can make safe casts between values of these types and pointers.
  137. */
  138. static_assert(sizeof(wchar16) == sizeof(char16_t), "");
  139. static_assert(sizeof(wchar32) == sizeof(char32_t), "");
  140. static_assert(std::is_unsigned<wchar16>::value == std::is_unsigned<char16_t>::value, "");
  141. static_assert(std::is_unsigned<wchar32>::value == std::is_unsigned<char32_t>::value, "");
  142. size_t SubstGlobal(TString& text, const TStringBuf what, const TStringBuf with, size_t from) {
  143. return SubstGlobalImpl(text, what, with, from);
  144. }
  145. size_t SubstGlobal(std::string& text, const TStringBuf what, const TStringBuf with, size_t from) {
  146. return SubstGlobalImpl(text, what, with, from);
  147. }
  148. size_t SubstGlobal(TUtf16String& text, const TWtringBuf what, const TWtringBuf with, size_t from) {
  149. return SubstGlobalImpl(text, what, with, from);
  150. }
  151. size_t SubstGlobal(TUtf32String& text, const TUtf32StringBuf what, const TUtf32StringBuf with, size_t from) {
  152. return SubstGlobalImpl(text, what, with, from);
  153. }
  154. size_t SubstGlobal(std::u16string& text, const TWtringBuf what, const TWtringBuf with, size_t from) {
  155. return SubstGlobalImpl(text,
  156. std::u16string_view(reinterpret_cast<const char16_t*>(what.data()), what.size()),
  157. std::u16string_view(reinterpret_cast<const char16_t*>(with.data()), with.size()),
  158. from);
  159. }
  160. size_t SubstGlobal(TString& text, char what, char with, size_t from) {
  161. return SubstCharGlobalImpl(text, what, with, from);
  162. }
  163. size_t SubstGlobal(std::string& text, char what, char with, size_t from) {
  164. return SubstCharGlobalImpl(text, what, with, from);
  165. }
  166. size_t SubstGlobal(TUtf16String& text, wchar16 what, wchar16 with, size_t from) {
  167. return SubstCharGlobalImpl(text, (char16_t)what, (char16_t)with, from);
  168. }
  169. size_t SubstGlobal(std::u16string& text, wchar16 what, wchar16 with, size_t from) {
  170. return SubstCharGlobalImpl(text, (char16_t)what, (char16_t)with, from);
  171. }
  172. size_t SubstGlobal(TUtf32String& text, wchar32 what, wchar32 with, size_t from) {
  173. return SubstCharGlobalImpl(text, (char32_t)what, (char32_t)with, from);
  174. }