#pragma once #include "strspn.h" #include "cast.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include // NOTE: Check StringSplitter below to get more convenient split string interface. namespace NStringSplitPrivate { template struct TIsConsumer: std::false_type {}; template struct TIsConsumer< T, I, TVoidT().Consume( std::declval(), std::declval(), std::declval()))>> : std::true_type {}; template constexpr bool TIsConsumerV = TIsConsumer::value; template T* Find(T* str, std::common_type_t ch) { for (; *str; ++str) { if (*str == ch) { return str; } } return nullptr; } } template std::enable_if_t<::NStringSplitPrivate::TIsConsumerV> SplitString(I b, I e, const TDelim& d, TConsumer&& c) { I l, i; do { l = b; i = d.Find(b, e); } while (c.Consume(l, i, b) && (b != i)); } template std::enable_if_t<::NStringSplitPrivate::TIsConsumerV> SplitString(I b, const TDelim& d, TConsumer&& c) { I l, i; do { l = b; i = d.Find(b); } while (c.Consume(l, i, b) && (b != i)); } template static inline I1* FastStrChr(I1* str, I2 f) noexcept { I1* ret = NStringSplitPrivate::Find(str, f); if (!ret) { ret = str + std::char_traits::length(str); } return ret; } template static inline I* FastStrStr(I* str, I* f, size_t l) noexcept { std::basic_string_view strView(str); const auto ret = strView.find(*f); if (ret != std::string::npos) { std::basic_string_view fView(f, l); strView = strView.substr(ret); for (; strView.size() >= l; strView = strView.substr(1)) { if (strView.substr(0, l) == fView) { break; } } return strView.size() >= l ? strView.data() : strView.data() + strView.size(); } else { return strView.data() + strView.size(); } } template struct TStringDelimiter { inline TStringDelimiter(Char* delim) noexcept : Delim(delim) , Len(std::char_traits::length(delim)) { } inline TStringDelimiter(Char* delim, size_t len) noexcept : Delim(delim) , Len(len) { } inline Char* Find(Char*& b, Char* e) const noexcept { const auto ret = std::basic_string_view(b, e - b).find(Delim, 0, Len); if (ret != std::string::npos) { const auto result = b + ret; b = result + Len; return result; } return (b = e); } inline Char* Find(Char*& b) const noexcept { Char* ret = FastStrStr(b, Delim, Len); b = *ret ? ret + Len : ret; return ret; } Char* Delim; const size_t Len; }; template struct TCharDelimiter { inline TCharDelimiter(Char ch) noexcept : Ch(ch) { } inline Char* Find(Char*& b, Char* e) const noexcept { const auto ret = std::basic_string_view(b, e - b).find(Ch); if (ret != std::string::npos) { const auto result = b + ret; b = result + 1; return result; } return (b = e); } inline Char* Find(Char*& b) const noexcept { Char* ret = FastStrChr(b, Ch); if (*ret) { b = ret + 1; } else { b = ret; } return ret; } Char Ch; }; template struct TFuncDelimiter { public: template TFuncDelimiter(Args&&... args) : Fn(std::forward(args)...) { } inline Iterator Find(Iterator& b, Iterator e) const noexcept { if ((b = std::find_if(b, e, Fn)) != e) { return b++; } return b; } private: Condition Fn; }; template struct TFindFirstOf { inline TFindFirstOf(Char* set) : Set(set) { } inline Char* FindFirstOf(Char* b, Char* e) const noexcept { Char* ret = b; for (; ret != e; ++ret) { if (NStringSplitPrivate::Find(Set, *ret)) break; } return ret; } inline Char* FindFirstOf(Char* b) const noexcept { const std::basic_string_view bView(b); const auto ret = bView.find_first_of(Set); return ret != std::string::npos ? b + ret : b + bView.size(); } Char* Set; }; template <> struct TFindFirstOf: public TCompactStrSpn { inline TFindFirstOf(const char* set, const char* e) : TCompactStrSpn(set, e) { } inline TFindFirstOf(const char* set) : TCompactStrSpn(set) { } }; template struct TSetDelimiter: private TFindFirstOf { using TFindFirstOf::TFindFirstOf; inline Char* Find(Char*& b, Char* e) const noexcept { Char* ret = const_cast(this->FindFirstOf(b, e)); if (ret != e) { b = ret + 1; return ret; } return (b = e); } inline Char* Find(Char*& b) const noexcept { Char* ret = const_cast(this->FindFirstOf(b)); if (*ret) { b = ret + 1; return ret; } return (b = ret); } }; namespace NSplitTargetHasPushBack { Y_HAS_MEMBER(push_back, PushBack); } template struct TConsumerBackInserter; template struct TConsumerBackInserter::value>> { static void DoInsert(T* C, const typename T::value_type& i) { C->push_back(i); } }; template struct TConsumerBackInserter::value>> { static void DoInsert(T* C, const typename T::value_type& i) { C->insert(C->end(), i); } }; template struct TContainerConsumer { inline TContainerConsumer(T* c) noexcept : C(c) { } template inline bool Consume(I* b, I* d, I* /*e*/) { TConsumerBackInserter::DoInsert(C, typename T::value_type(b, d)); return true; } T* C; }; template struct TContainerConvertingConsumer { inline TContainerConvertingConsumer(T* c) noexcept : C(c) { } template inline bool Consume(I* b, I* d, I* /*e*/) { TConsumerBackInserter::DoInsert(C, FromString(TStringBuf(b, d))); return true; } T* C; }; template struct TLimitingConsumer { inline TLimitingConsumer(size_t cnt, S* slave) noexcept : Cnt(cnt ? cnt - 1 : Max()) , Slave(slave) , Last(nullptr) { } inline bool Consume(I* b, I* d, I* e) { if (!Cnt) { Last = b; return false; } --Cnt; return Slave->Consume(b, d, e); } size_t Cnt; S* Slave; I* Last; }; template struct TSkipEmptyTokens { inline TSkipEmptyTokens(S* slave) noexcept : Slave(slave) { } template inline bool Consume(I* b, I* d, I* e) { if (b != d) { return Slave->Consume(b, d, e); } return true; } S* Slave; }; template struct TKeepDelimiters { inline TKeepDelimiters(S* slave) noexcept : Slave(slave) { } template inline bool Consume(I* b, I* d, I* e) { if (Slave->Consume(b, d, d)) { if (d != e) { return Slave->Consume(d, e, e); } return true; } return false; } S* Slave; }; template struct TSimplePusher { inline bool Consume(char* b, char* d, char*) { *d = 0; C->push_back(b); return true; } T* C; }; template static inline void Split(char* buf, char ch, T* res) { res->resize(0); if (*buf == 0) return; TCharDelimiter delim(ch); TSimplePusher pusher = {res}; SplitString(buf, delim, pusher); } /// Split string into res vector. Res vector is cleared before split. /// Old good slow split function. /// Field delimter is any number of symbols specified in delim (no empty strings in res vector) /// @return number of elements created size_t Split(const char* in, const char* delim, TVector& res); size_t Split(const TString& in, const TString& delim, TVector& res); /// Old split reimplemented for TStringBuf using the new code /// Note that delim can be constructed from char* automatically (it is not cheap though) inline size_t Split(const TStringBuf s, const TSetDelimiter& delim, TVector& res) { res.clear(); TContainerConsumer> res1(&res); TSkipEmptyTokens>> consumer(&res1); SplitString(s.data(), s.data() + s.size(), delim, consumer); return res.size(); } template void GetNext(TStringBuf& s, D delim, P& param) { TStringBuf next = s.NextTok(delim); Y_ENSURE(next.IsInited(), TStringBuf("Split: number of fields less than number of Split output arguments")); param = FromString

(next); } template void GetNext(TStringBuf& s, D delim, TMaybe

& param) { TStringBuf next = s.NextTok(delim); if (next.IsInited()) { param = FromString

(next); } else { param.Clear(); } } // example: // Split(TStringBuf("Sherlock,2014,36.6"), ',', name, year, temperature); template void Split(TStringBuf s, D delim, P1& p1, P2& p2) { GetNext(s, delim, p1); GetNext(s, delim, p2); Y_ENSURE(!s.IsInited(), TStringBuf("Split: number of fields more than number of Split output arguments")); } template void Split(TStringBuf s, D delim, P1& p1, P2& p2, Other&... other) { GetNext(s, delim, p1); Split(s, delim, p2, other...); } /** * \fn auto StringSplitter(...) * * Creates a string splitter object. The only use for it is to call one of its * `Split*` methods, and then do something with the resulting proxy range. * * Some examples: * \code * TVector values = StringSplitter("1\t2\t3").Split('\t'); * * for(TStringBuf part: StringSplitter("1::2::::3").SplitByString("::").SkipEmpty()) { * Cerr << part; * } * * TVector firstTwoValues = StringSplitter("1\t2\t3").Split('\t').Take(2); * \endcode * * Use `Collect` or `AddTo` to store split results into an existing container: * \code * TVector values = {"0"}; * StringSplitter("1\t2\t3").Split('\t').AddTo(&values); * \endcode * Note that `Collect` clears target container, while `AddTo` just inserts values. * You can use these methods with any container that has `emplace` / `emplace_back`. * * Use `ParseInto` to also perform string conversions before inserting values * into target container: * \code * TSet values; * StringSplitter("1\t2\t3").Split('\t').ParseInto(&values); * \endcode */ namespace NStringSplitPrivate { Y_HAS_MEMBER(push_back, PushBack); Y_HAS_MEMBER(insert, Insert); Y_HAS_MEMBER(data, Data); /** * This one is needed here so that `std::string_view -> std::string_view` * conversion works. */ template inline void DoFromString(const Src& src, Dst* dst) { *dst = ::FromString(src); } template inline void DoFromString(const T& src, T* dst) noexcept { *dst = src; } template inline void DoFromString(const T& src, decltype(std::ignore)* dst) noexcept { *dst = src; } template inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const Src& src, Dst* dst) noexcept { return ::TryFromString(src, *dst); } template inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, T* dst) noexcept { *dst = src; return true; } template inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, decltype(std::ignore)* dst) noexcept { *dst = src; return true; } /** * Consumer that places provided elements into a container. Not using * `emplace(iterator)` for efficiency. */ template struct TContainerConsumer { using value_type = typename Container::value_type; TContainerConsumer(Container* c) : C_(c) { } // TODO: return bool (continue) template void operator()(StringBuf e) const { this->operator()(C_, e); } private: template auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) { return c->emplace_back(value_type(e)); } template auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) { return c->emplace(value_type(e)); } Container* C_; }; /** * Consumer that converts provided elements via `FromString` and places them * into a container. */ template struct TContainerConvertingConsumer { using value_type = typename Container::value_type; TContainerConvertingConsumer(Container* c) : C_(c) { } template void operator()(StringBuf e) const { this->operator()(C_, e); } private: template auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) { value_type v; DoFromString(e, &v); return c->emplace_back(std::move(v)); } template auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) { value_type v; DoFromString(e, &v); return c->emplace(std::move(v)); } Container* C_; }; template struct TStringBufOfImpl { using type = std::conditional_t< THasData::value, TBasicStringBuf, TIteratorRange>; }; template struct TStringBufOfImpl> { using type = std::basic_string_view; }; template struct TStringBufOfImpl> { using type = std::basic_string_view; }; /** * Metafunction that returns a string buffer for the given type. This is to * make sure that splitting `std::string` returns `std::string_view`. */ template using TStringBufOf = typename TStringBufOfImpl::type; template StringBuf DoMakeStringBuf(Iterator b, Iterator e, StringBuf*) { return StringBuf(b, e); } template std::basic_string_view DoMakeStringBuf(Iterator b, Iterator e, std::basic_string_view*) { return std::basic_string_view(b, e - b); } template StringBuf MakeStringBuf(Iterator b, Iterator e) { return DoMakeStringBuf(b, e, static_cast(nullptr)); } template struct TIteratorOfImpl { using type = std::conditional_t< THasData::value, const typename String::value_type*, typename String::const_iterator>; }; template using TIteratorOf = typename TIteratorOfImpl::type; template class TStringSplitter; template struct TIterState: public TStringBufOf { public: using TStringBufType = TStringBufOf; using TIterator = TIteratorOf; friend class TStringSplitter; TIterState(const String& string) noexcept : TStringBufType() , DelimiterEnd_(std::begin(string)) , OriginEnd_(std::end(string)) { } template < typename Other, typename = std::enable_if_t< std::is_convertible::value>> bool operator==(const Other& toCompare) const { return TStringBufType(*this) == TStringBufType(toCompare); } TIterator TokenStart() const noexcept { return this->begin(); } TIterator TokenDelim() const noexcept { return this->end(); } TStringBufType Token() const noexcept { return *this; } TStringBufType Delim() const noexcept { return MakeStringBuf(TokenDelim(), DelimiterEnd_); } private: void UpdateParentBuf(TIterator tokenStart, TIterator tokenDelim) noexcept { *static_cast(this) = MakeStringBuf(tokenStart, tokenDelim); } bool DelimiterIsEmpty() const noexcept { return TokenDelim() == DelimiterEnd_; } private: TIterator DelimiterEnd_; const TIterator OriginEnd_; }; template class TSplitRange: public Base, public TInputRangeAdaptor> { using TStringBufType = decltype(std::declval().Next()->Token()); public: template inline TSplitRange(Args&&... args) : Base(std::forward(args)...) { } template ()(std::declval())), void>::value, int>* = nullptr> inline void Consume(Consumer&& f) { for (auto&& it : *this) { f(it.Token()); } } template ()(std::declval())), bool>::value, int>* = nullptr> inline bool Consume(Consumer&& f) { for (auto&& it : *this) { if (!f(it.Token())) { return false; } } return true; } template ::value || THasPushBack::value>> operator Container() { Container result; AddTo(&result); return result; } template inline TVector ToList() { TVector result; for (auto&& it : *this) { result.push_back(S(it.Token())); } return result; } template inline void Collect(Container* c) { Y_ASSERT(c); c->clear(); AddTo(c); } template inline void AddTo(Container* c) { Y_ASSERT(c); TContainerConsumer consumer(c); Consume(consumer); } template inline void ParseInto(Container* c) { Y_ASSERT(c); TContainerConvertingConsumer consumer(c); Consume(consumer); } // TODO: this is actually TryParseInto /** * Same as `CollectInto`, just doesn't throw. * * \param[out] args Output arguments. * \returns Whether parsing was successful. */ template inline bool TryCollectInto(Args*... args) noexcept { size_t successfullyFilled = 0; auto it = this->begin(); //FIXME: actually, some kind of TryApplyToMany is needed in order to stop iteration upon first failure ApplyToMany([&](auto&& arg) { if (it != this->end()) { if (TryDoFromString(it->Token(), arg)) { ++successfullyFilled; } ++it; } }, args...); return successfullyFilled == sizeof...(args) && it == this->end(); } // TODO: this is actually ParseInto /** * Splits and parses everything that's in this splitter into `args`. * * Example usage: * \code * int l, r; * StringSplitter("100*200").Split('*').CollectInto(&l, &r); * \endcode * * \param[out] args Output arguments. * \throws If not all items were parsed, or * if there were too many items in the split. */ template inline void CollectInto(Args*... args) { Y_ENSURE(TryCollectInto(args...)); } inline size_t Count() { size_t cnt = 0; for (auto&& it : *this) { Y_UNUSED(it); ++cnt; } return cnt; } }; template class TStringSplitter { using TStringType = String; using TChar = typename TStringType::value_type; using TIteratorState = TIterState; using TStringBufType = typename TIteratorState::TStringBufType; using TIterator = typename TIteratorState::TIterator; /** * Base class for all split ranges that actually does the splitting. */ template struct TSplitRangeBase { template inline TSplitRangeBase(OtherString&& s, Args&&... args) : String_(std::forward(s)) , State_(String_) , Delimiter_(std::forward(args)...) { } inline TIteratorState* Next() { if (State_.DelimiterIsEmpty()) { return nullptr; } const auto tokenBegin = State_.DelimiterEnd_; const auto tokenEnd = Delimiter_.Ptr()->Find(State_.DelimiterEnd_, State_.OriginEnd_); State_.UpdateParentBuf(tokenBegin, tokenEnd); return &State_; } private: TStringType String_; TIteratorState State_; DelimStorage Delimiter_; }; template struct TFilterRange: public Base { template inline TFilterRange(const Base& base, Args&&... args) : Base(base) , Filter_(std::forward(args)...) { } inline TIteratorState* Next() { TIteratorState* ret; do { ret = Base::Next(); } while (ret && !Filter_.Accept(ret)); return ret; } Filter Filter_; }; struct TNonEmptyFilter { template inline bool Accept(const TToken* token) noexcept { return !token->empty(); } }; template struct TStopIteration; template struct TFilters: public Base { template using TIt = TSplitRange>>>; template inline TFilters(Args&&... args) : Base(std::forward(args)...) { } inline TIt SkipEmpty() const { return {*this}; } }; template struct TStopRange: public Base { template inline TStopRange(const Base& base, Args&&... args) : Base(base) , Stopper_(std::forward(args)...) { } inline TIteratorState* Next() { TIteratorState* ret = Base::Next(); if (!ret || Stopper_.Stop(ret)) { return nullptr; } return ret; } Stopper Stopper_; }; struct TTake { TTake() = default; TTake(size_t count) : Count(count) { } template inline bool Stop(TToken*) noexcept { if (Count > 0) { --Count; return false; } else { return true; } } size_t Count = 0; }; struct TLimit { TLimit() = default; TLimit(size_t count) : Count(count) { Y_ASSERT(Count > 0); } template inline bool Stop(TToken* token) noexcept { if (Count > 1) { --Count; return false; } else if (Count == 1) { token->DelimiterEnd_ = token->OriginEnd_; token->UpdateParentBuf(token->TokenStart(), token->DelimiterEnd_); return false; } return true; } size_t Count = 0; }; template struct TStopIteration: public Base { template using TIt = TSplitRange>>>; template inline TStopIteration(Args&&... args) : Base(std::forward(args)...) { } inline TIt Take(size_t count) { return {*this, count}; } inline TIt Limit(size_t count) { return {*this, count}; } }; template using TIt = TSplitRange>>>; public: template explicit TStringSplitter(OtherString&& s) : String_(std::forward(s)) { } //does not own TDelim template inline TIt> Split(const TDelim& d) const noexcept { return {String_, &d}; } inline TIt>> Split(TChar ch) const noexcept { return {String_, ch}; } inline TIt>> SplitBySet(const TChar* set) const noexcept { return {String_, set}; } inline TIt>> SplitByString(const TStringBufType& str) const noexcept { return {String_, str.data(), str.size()}; } template inline TIt>> SplitByFunc(TFunc f) const noexcept { return {String_, f}; } private: TStringType String_; }; template auto MakeStringSplitter(String&& s) { return TStringSplitter>(std::forward(s)); } } template auto StringSplitter(Iterator begin, Iterator end) { return ::NStringSplitPrivate::MakeStringSplitter(TIteratorRange(begin, end)); } template auto StringSplitter(const Char* begin, const Char* end) { return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf(begin, end)); } template auto StringSplitter(const Char* begin, size_t len) { return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf(begin, len)); } template auto StringSplitter(const Char* str) { return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf(str)); } template >::value, int> = 0> auto StringSplitter(String& s) { return ::NStringSplitPrivate::MakeStringSplitter(::NStringSplitPrivate::TStringBufOf(s.data(), s.size())); } template >::value, int> = 0> auto StringSplitter(String&& s) { return ::NStringSplitPrivate::MakeStringSplitter(std::move(s)); }