strbase.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. #pragma once
  2. // Some of these includes are just a legacy from previous implementation.
  3. // We don't need them here, but removing them is tricky because it breaks all
  4. // kinds of builds downstream
  5. #include "mem_copy.h"
  6. #include "ptr.h"
  7. #include "utility.h"
  8. #include <util/charset/unidata.h>
  9. #include <util/system/platform.h>
  10. #include <util/system/yassert.h>
  11. #include <contrib/libs/libc_compat/string.h>
  12. #include <cctype>
  13. #include <cstring>
  14. #include <string>
  15. #include <string_view>
  16. namespace NStringPrivate {
  17. template <class TCharType>
  18. size_t GetStringLengthWithLimit(const TCharType* s, size_t maxlen) {
  19. Y_ASSERT(s);
  20. size_t i = 0;
  21. for (; i != maxlen && s[i]; ++i)
  22. ;
  23. return i;
  24. }
  25. inline size_t GetStringLengthWithLimit(const char* s, size_t maxlen) {
  26. Y_ASSERT(s);
  27. return strnlen(s, maxlen);
  28. }
  29. }
  30. template <typename TDerived, typename TCharType, typename TTraitsType = std::char_traits<TCharType>>
  31. class TStringBase {
  32. using TStringView = std::basic_string_view<TCharType>;
  33. using TStringViewWithTraits = std::basic_string_view<TCharType, TTraitsType>;
  34. public:
  35. using TChar = TCharType;
  36. using TTraits = TTraitsType;
  37. using TSelf = TStringBase<TDerived, TChar, TTraits>;
  38. using size_type = size_t;
  39. using difference_type = ptrdiff_t;
  40. static constexpr size_t npos = size_t(-1);
  41. using const_iterator = const TCharType*;
  42. using const_reference = const TCharType&;
  43. template <typename TBase>
  44. struct TReverseIteratorBase {
  45. constexpr TReverseIteratorBase() noexcept = default;
  46. explicit constexpr TReverseIteratorBase(TBase p)
  47. : P_(p)
  48. {
  49. }
  50. TReverseIteratorBase operator++() noexcept {
  51. --P_;
  52. return *this;
  53. }
  54. TReverseIteratorBase operator++(int) noexcept {
  55. TReverseIteratorBase old(*this);
  56. --P_;
  57. return old;
  58. }
  59. TReverseIteratorBase& operator--() noexcept {
  60. ++P_;
  61. return *this;
  62. }
  63. TReverseIteratorBase operator--(int) noexcept {
  64. TReverseIteratorBase old(*this);
  65. ++P_;
  66. return old;
  67. }
  68. constexpr auto operator*() const noexcept -> std::remove_pointer_t<TBase>& {
  69. return *TBase(*this);
  70. }
  71. explicit constexpr operator TBase() const noexcept {
  72. return TBase(P_ - 1);
  73. }
  74. constexpr auto operator-(const TReverseIteratorBase o) const noexcept {
  75. return o.P_ - P_;
  76. }
  77. constexpr bool operator==(const TReverseIteratorBase o) const noexcept {
  78. return P_ == o.P_;
  79. }
  80. constexpr bool operator!=(const TReverseIteratorBase o) const noexcept {
  81. return !(*this == o);
  82. }
  83. private:
  84. TBase P_ = nullptr;
  85. };
  86. using const_reverse_iterator = TReverseIteratorBase<const_iterator>;
  87. static constexpr size_t StrLen(const TCharType* s) noexcept {
  88. if (Y_LIKELY(s)) {
  89. return TTraits::length(s);
  90. }
  91. return 0;
  92. }
  93. template <class TCharTraits>
  94. inline constexpr operator std::basic_string_view<TCharType, TCharTraits>() const {
  95. return std::basic_string_view<TCharType, TCharTraits>(data(), size());
  96. }
  97. template <class TCharTraits, class Allocator>
  98. inline explicit operator std::basic_string<TCharType, TCharTraits, Allocator>() const {
  99. return std::basic_string<TCharType, TCharTraits, Allocator>(Ptr(), Len());
  100. }
  101. /**
  102. * @param Pointer to character inside the string, or nullptr.
  103. * @return Offset from string beginning (in chars), or npos on nullptr.
  104. */
  105. inline size_t off(const TCharType* ret) const noexcept {
  106. return ret ? (size_t)(ret - Ptr()) : npos;
  107. }
  108. inline size_t IterOff(const_iterator it) const noexcept {
  109. return begin() <= it && end() > it ? size_t(it - begin()) : npos;
  110. }
  111. inline const_iterator begin() const noexcept {
  112. return Ptr();
  113. }
  114. inline const_iterator end() const noexcept {
  115. return Ptr() + size();
  116. }
  117. inline const_iterator cbegin() const noexcept {
  118. return begin();
  119. }
  120. inline const_iterator cend() const noexcept {
  121. return end();
  122. }
  123. inline const_reverse_iterator rbegin() const noexcept {
  124. return const_reverse_iterator(Ptr() + size());
  125. }
  126. inline const_reverse_iterator rend() const noexcept {
  127. return const_reverse_iterator(Ptr());
  128. }
  129. inline const_reverse_iterator crbegin() const noexcept {
  130. return rbegin();
  131. }
  132. inline const_reverse_iterator crend() const noexcept {
  133. return rend();
  134. }
  135. inline TCharType back() const noexcept {
  136. Y_ASSERT(!this->empty());
  137. return Ptr()[Len() - 1];
  138. }
  139. inline TCharType front() const noexcept {
  140. Y_ASSERT(!empty());
  141. return Ptr()[0];
  142. }
  143. constexpr const TCharType* data() const noexcept {
  144. return Ptr();
  145. }
  146. constexpr inline size_t size() const noexcept {
  147. return Len();
  148. }
  149. constexpr inline bool is_null() const noexcept {
  150. return *Ptr() == 0;
  151. }
  152. Y_PURE_FUNCTION constexpr inline bool empty() const noexcept {
  153. return Len() == 0;
  154. }
  155. constexpr inline explicit operator bool() const noexcept {
  156. return !empty();
  157. }
  158. public: // style-guide compliant methods
  159. constexpr const TCharType* Data() const noexcept {
  160. return Ptr();
  161. }
  162. constexpr size_t Size() const noexcept {
  163. return Len();
  164. }
  165. Y_PURE_FUNCTION constexpr bool Empty() const noexcept {
  166. return 0 == Len();
  167. }
  168. private:
  169. static inline TStringView LegacySubString(const TStringView view, size_t p, size_t n) noexcept {
  170. p = Min(p, view.length());
  171. return view.substr(p, n);
  172. }
  173. public:
  174. // ~~~ Comparison ~~~ : FAMILY0(int, compare)
  175. static int compare(const TSelf& s1, const TSelf& s2) noexcept {
  176. return s1.AsStringView().compare(s2.AsStringView());
  177. }
  178. static int compare(const TCharType* p, const TSelf& s2) noexcept {
  179. TCharType null{0};
  180. return TStringViewWithTraits(p ? p : &null).compare(s2.AsStringView());
  181. }
  182. static int compare(const TSelf& s1, const TCharType* p) noexcept {
  183. TCharType null{0};
  184. return s1.AsStringView().compare(p ? p : &null);
  185. }
  186. static int compare(const TStringView s1, const TStringView s2) noexcept {
  187. return TStringViewWithTraits(s1.data(), s1.size()).compare(TStringViewWithTraits(s2.data(), s2.size()));
  188. }
  189. template <class T>
  190. inline int compare(const T& t) const noexcept {
  191. return compare(*this, t);
  192. }
  193. inline int compare(size_t p, size_t n, const TStringView t) const noexcept {
  194. return compare(LegacySubString(*this, p, n), t);
  195. }
  196. inline int compare(size_t p, size_t n, const TStringView t, size_t p1, size_t n1) const noexcept {
  197. return compare(LegacySubString(*this, p, n), LegacySubString(t, p1, n1));
  198. }
  199. inline int compare(size_t p, size_t n, const TStringView t, size_t n1) const noexcept {
  200. return compare(LegacySubString(*this, p, n), LegacySubString(t, 0, n1));
  201. }
  202. inline int compare(const TCharType* p, size_t len) const noexcept {
  203. return compare(*this, TStringView(p, len));
  204. }
  205. static bool equal(const TSelf& s1, const TSelf& s2) noexcept {
  206. return s1.AsStringView() == s2.AsStringView();
  207. }
  208. static bool equal(const TSelf& s1, const TCharType* p) noexcept {
  209. if (p == nullptr) {
  210. return s1.Len() == 0;
  211. }
  212. return s1.AsStringView() == p;
  213. }
  214. static bool equal(const TCharType* p, const TSelf& s2) noexcept {
  215. return equal(s2, p);
  216. }
  217. static bool equal(const TStringView s1, const TStringView s2) noexcept {
  218. return TStringViewWithTraits{s1.data(), s1.size()} == TStringViewWithTraits{s2.data(), s2.size()};
  219. }
  220. template <class T>
  221. inline bool equal(const T& t) const noexcept {
  222. return equal(*this, t);
  223. }
  224. inline bool equal(size_t p, size_t n, const TStringView t) const noexcept {
  225. return equal(LegacySubString(*this, p, n), t);
  226. }
  227. inline bool equal(size_t p, size_t n, const TStringView t, size_t p1, size_t n1) const noexcept {
  228. return equal(LegacySubString(*this, p, n), LegacySubString(t, p1, n1));
  229. }
  230. inline bool equal(size_t p, size_t n, const TStringView t, size_t n1) const noexcept {
  231. return equal(LegacySubString(*this, p, n), LegacySubString(t, 0, n1));
  232. }
  233. static inline bool StartsWith(const TCharType* what, size_t whatLen, const TCharType* with, size_t withLen) noexcept {
  234. return withLen <= whatLen && TStringViewWithTraits(what, withLen) == TStringViewWithTraits(with, withLen);
  235. }
  236. static inline bool EndsWith(const TCharType* what, size_t whatLen, const TCharType* with, size_t withLen) noexcept {
  237. return withLen <= whatLen && TStringViewWithTraits(what + whatLen - withLen, withLen) == TStringViewWithTraits(with, withLen);
  238. }
  239. inline bool StartsWith(const TCharType* s, size_t n) const noexcept {
  240. return StartsWith(Ptr(), Len(), s, n);
  241. }
  242. inline bool StartsWith(const TStringView s) const noexcept {
  243. return StartsWith(s.data(), s.length());
  244. }
  245. inline bool StartsWith(TCharType ch) const noexcept {
  246. return !empty() && TTraits::eq(*Ptr(), ch);
  247. }
  248. inline bool EndsWith(const TCharType* s, size_t n) const noexcept {
  249. return EndsWith(Ptr(), Len(), s, n);
  250. }
  251. inline bool EndsWith(const TStringView s) const noexcept {
  252. return EndsWith(s.data(), s.length());
  253. }
  254. inline bool EndsWith(TCharType ch) const noexcept {
  255. return !empty() && TTraits::eq(Ptr()[Len() - 1], ch);
  256. }
  257. template <typename TDerived2, typename TTraits2>
  258. bool operator==(const TStringBase<TDerived2, TChar, TTraits2>& s2) const noexcept {
  259. return equal(*this, s2);
  260. }
  261. bool operator==(TStringView s2) const noexcept {
  262. return equal(*this, s2);
  263. }
  264. bool operator==(const TCharType* pc) const noexcept {
  265. return equal(*this, pc);
  266. }
  267. #ifndef __cpp_impl_three_way_comparison
  268. friend bool operator==(const TCharType* pc, const TSelf& s) noexcept {
  269. return equal(pc, s);
  270. }
  271. template <typename TDerived2, typename TTraits2>
  272. friend bool operator!=(const TSelf& s1, const TStringBase<TDerived2, TChar, TTraits2>& s2) noexcept {
  273. return !(s1 == s2);
  274. }
  275. friend bool operator!=(const TSelf& s1, TStringView s2) noexcept {
  276. return !(s1 == s2);
  277. }
  278. friend bool operator!=(const TSelf& s, const TCharType* pc) noexcept {
  279. return !(s == pc);
  280. }
  281. friend bool operator!=(const TCharType* pc, const TSelf& s) noexcept {
  282. return !(pc == s);
  283. }
  284. #endif
  285. template <typename TDerived2, typename TTraits2>
  286. friend bool operator<(const TSelf& s1, const TStringBase<TDerived2, TChar, TTraits2>& s2) noexcept {
  287. return compare(s1, s2) < 0;
  288. }
  289. friend bool operator<(const TSelf& s1, TStringView s2) noexcept {
  290. return compare(s1, s2) < 0;
  291. }
  292. friend bool operator<(const TSelf& s, const TCharType* pc) noexcept {
  293. return compare(s, pc) < 0;
  294. }
  295. friend bool operator<(const TCharType* pc, const TSelf& s) noexcept {
  296. return compare(pc, s) < 0;
  297. }
  298. template <typename TDerived2, typename TTraits2>
  299. friend bool operator<=(const TSelf& s1, const TStringBase<TDerived2, TChar, TTraits2>& s2) noexcept {
  300. return compare(s1, s2) <= 0;
  301. }
  302. friend bool operator<=(const TSelf& s1, TStringView s2) noexcept {
  303. return compare(s1, s2) <= 0;
  304. }
  305. friend bool operator<=(const TSelf& s, const TCharType* pc) noexcept {
  306. return compare(s, pc) <= 0;
  307. }
  308. friend bool operator<=(const TCharType* pc, const TSelf& s) noexcept {
  309. return compare(pc, s) <= 0;
  310. }
  311. template <typename TDerived2, typename TTraits2>
  312. friend bool operator>(const TSelf& s1, const TStringBase<TDerived2, TChar, TTraits2>& s2) noexcept {
  313. return compare(s1, s2) > 0;
  314. }
  315. friend bool operator>(const TSelf& s1, TStringView s2) noexcept {
  316. return compare(s1, s2) > 0;
  317. }
  318. friend bool operator>(const TSelf& s, const TCharType* pc) noexcept {
  319. return compare(s, pc) > 0;
  320. }
  321. friend bool operator>(const TCharType* pc, const TSelf& s) noexcept {
  322. return compare(pc, s) > 0;
  323. }
  324. template <typename TDerived2, typename TTraits2>
  325. friend bool operator>=(const TSelf& s1, const TStringBase<TDerived2, TChar, TTraits2>& s2) noexcept {
  326. return compare(s1, s2) >= 0;
  327. }
  328. friend bool operator>=(const TSelf& s1, TStringView s2) noexcept {
  329. return compare(s1, s2) >= 0;
  330. }
  331. friend bool operator>=(const TSelf& s, const TCharType* pc) noexcept {
  332. return compare(s, pc) >= 0;
  333. }
  334. friend bool operator>=(const TCharType* pc, const TSelf& s) noexcept {
  335. return compare(pc, s) >= 0;
  336. }
  337. // ~~ Read access ~~
  338. inline TCharType at(size_t pos) const noexcept {
  339. if (Y_LIKELY(pos < Len())) {
  340. return (Ptr())[pos];
  341. }
  342. return 0;
  343. }
  344. inline TCharType operator[](size_t pos) const noexcept {
  345. Y_ASSERT(pos < this->size());
  346. return Ptr()[pos];
  347. }
  348. //~~~~Search~~~~
  349. /**
  350. * @return Position of the substring inside this string, or `npos` if not found.
  351. */
  352. inline size_t find(const TStringView s, size_t pos = 0) const noexcept {
  353. return find(s.data(), pos, s.size());
  354. }
  355. inline size_t find(const TCharType* s, size_t pos, size_t count) const noexcept {
  356. return AsStringView().find(s, pos, count);
  357. }
  358. inline size_t find(TCharType c, size_t pos = 0) const noexcept {
  359. return AsStringView().find(c, pos);
  360. }
  361. inline size_t rfind(TCharType c) const noexcept {
  362. return AsStringView().rfind(c);
  363. }
  364. inline size_t rfind(TCharType c, size_t pos) const noexcept {
  365. if (pos == 0) {
  366. return npos;
  367. }
  368. return AsStringView().rfind(c, pos - 1);
  369. }
  370. inline size_t rfind(const TStringView str, size_t pos = npos) const {
  371. return AsStringView().rfind(str.data(), pos, str.size());
  372. }
  373. //~~~~Contains~~~~
  374. /**
  375. * @returns Whether this string contains the provided substring.
  376. */
  377. inline bool Contains(const TStringView s, size_t pos = 0) const noexcept {
  378. return !s.length() || find(s, pos) != npos;
  379. }
  380. inline bool Contains(TChar c, size_t pos = 0) const noexcept {
  381. return find(c, pos) != npos;
  382. }
  383. inline void Contains(std::enable_if<std::is_unsigned<TCharType>::value, char> c, size_t pos = 0) const noexcept {
  384. return find(ui8(c), pos) != npos;
  385. }
  386. //~~~~Character Set Search~~~
  387. inline size_t find_first_of(TCharType c) const noexcept {
  388. return find_first_of(c, 0);
  389. }
  390. inline size_t find_first_of(TCharType c, size_t pos) const noexcept {
  391. return find(c, pos);
  392. }
  393. inline size_t find_first_of(const TStringView set) const noexcept {
  394. return find_first_of(set, 0);
  395. }
  396. inline size_t find_first_of(const TStringView set, size_t pos) const noexcept {
  397. return AsStringView().find_first_of(set.data(), pos, set.size());
  398. }
  399. inline size_t find_first_not_of(TCharType c) const noexcept {
  400. return find_first_not_of(c, 0);
  401. }
  402. inline size_t find_first_not_of(TCharType c, size_t pos) const noexcept {
  403. return find_first_not_of(TStringView(&c, 1), pos);
  404. }
  405. inline size_t find_first_not_of(const TStringView set) const noexcept {
  406. return find_first_not_of(set, 0);
  407. }
  408. inline size_t find_first_not_of(const TStringView set, size_t pos) const noexcept {
  409. return AsStringView().find_first_not_of(set.data(), pos, set.size());
  410. }
  411. inline size_t find_last_of(TCharType c, size_t pos = npos) const noexcept {
  412. return find_last_of(&c, pos, 1);
  413. }
  414. inline size_t find_last_of(const TStringView set, size_t pos = npos) const noexcept {
  415. return find_last_of(set.data(), pos, set.length());
  416. }
  417. inline size_t find_last_of(const TCharType* set, size_t pos, size_t n) const noexcept {
  418. return AsStringView().find_last_of(set, pos, n);
  419. }
  420. inline size_t find_last_not_of(TCharType c, size_t pos = npos) const noexcept {
  421. return AsStringView().find_last_not_of(c, pos);
  422. }
  423. inline size_t find_last_not_of(const TStringView set, size_t pos = npos) const noexcept {
  424. return find_last_not_of(set.data(), pos, set.length());
  425. }
  426. inline size_t find_last_not_of(const TCharType* set, size_t pos, size_t n) const noexcept {
  427. return AsStringView().find_last_not_of(set, pos, n);
  428. }
  429. inline size_t copy(TCharType* pc, size_t n, size_t pos) const {
  430. if (pos > Len()) {
  431. throw std::out_of_range("TStringBase::copy");
  432. }
  433. return CopyImpl(pc, n, pos);
  434. }
  435. inline size_t copy(TCharType* pc, size_t n) const noexcept {
  436. return CopyImpl(pc, n, 0);
  437. }
  438. inline size_t strcpy(TCharType* pc, size_t n) const noexcept {
  439. if (n) {
  440. n = copy(pc, n - 1);
  441. pc[n] = 0;
  442. }
  443. return n;
  444. }
  445. inline TDerived copy() const Y_WARN_UNUSED_RESULT {
  446. return TDerived(Ptr(), Len());
  447. }
  448. // ~~~ Partial copy ~~~~
  449. TDerived substr(size_t pos, size_t n = npos) const Y_WARN_UNUSED_RESULT {
  450. return TDerived(*This(), pos, n);
  451. }
  452. private:
  453. using GenericFinder = const TCharType* (*)(const TCharType*, size_t, const TCharType*, size_t);
  454. TStringViewWithTraits AsStringView() const {
  455. return static_cast<TStringViewWithTraits>(*this);
  456. }
  457. constexpr inline const TCharType* Ptr() const noexcept {
  458. return This()->data();
  459. }
  460. constexpr inline size_t Len() const noexcept {
  461. return This()->length();
  462. }
  463. constexpr inline const TDerived* This() const noexcept {
  464. return static_cast<const TDerived*>(this);
  465. }
  466. inline size_t CopyImpl(TCharType* pc, size_t n, size_t pos) const noexcept {
  467. const size_t toCopy = Min(Len() - pos, n);
  468. TTraits::copy(pc, Ptr() + pos, toCopy);
  469. return toCopy;
  470. }
  471. };