123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317 |
- #pragma once
- #include <library/cpp/deprecated/kmp/kmp.h>
- #include <util/string/cast.h>
- #include <util/string/util.h>
- #include <util/string/builder.h>
- #include <util/system/yassert.h>
- #include <util/system/defaults.h>
- #include <util/generic/strbuf.h>
- #include <util/generic/string.h>
- #include <util/generic/vector.h>
- #include <util/generic/yexception.h>
- #include <cstdio>
- template <typename T>
- struct TNumPair {
- T Begin;
- T End;
- TNumPair() = default;
- TNumPair(T begin, T end)
- : Begin(begin)
- , End(end)
- {
- Y_ASSERT(begin <= end);
- }
- T Length() const {
- return End - Begin + 1;
- }
- bool operator==(const TNumPair& r) const {
- return (Begin == r.Begin) && (End == r.End);
- }
- bool operator!=(const TNumPair& r) const {
- return (Begin != r.Begin) || (End != r.End);
- }
- };
- using TSizeTRegion = TNumPair<size_t>;
- using TUi32Region = TNumPair<ui32>;
- template <>
- inline TString ToString(const TUi32Region& r) {
- return TStringBuilder() << "(" << r.Begin << ", " << r.End << ")";
- }
- template <>
- inline TUi32Region FromString(const TString& s) {
- TUi32Region result;
- sscanf(s.data(), "(%" PRIu32 ", %" PRIu32 ")", &result.Begin, &result.End);
- return result;
- }
- class TSplitDelimiters {
- private:
- bool Delims[256];
- public:
- explicit TSplitDelimiters(const char* s);
- Y_FORCE_INLINE bool IsDelimiter(ui8 ch) const {
- return Delims[ch];
- }
- };
- template <class Split>
- class TSplitIterator;
- class TSplitBase {
- protected:
- const char* Str;
- size_t Len;
- public:
- TSplitBase(const char* str, size_t length);
- TSplitBase(const TString& s);
- Y_FORCE_INLINE const char* GetString() const {
- return Str;
- }
- Y_FORCE_INLINE size_t GetLength() const {
- return Len;
- }
- private:
- // we don't own Str, make sure that no one calls us with temporary object
- TSplitBase(TString&&) = delete;
- };
- #ifdef _MSC_VER
- #pragma warning(push)
- #pragma warning(disable : 4512)
- #endif
- class TDelimitersSplit: public TSplitBase {
- private:
- const TSplitDelimiters& Delimiters;
- public:
- using TIterator = TSplitIterator<TDelimitersSplit>;
- friend class TSplitIterator<TDelimitersSplit>;
- TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters);
- TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters);
- TIterator Iterator() const;
- TSizeTRegion Next(size_t& pos) const;
- size_t Begin() const;
- private:
- // we don't own Delimiters, make sure that no one calls us with temporary object
- TDelimitersSplit(const char*, size_t, TSplitDelimiters&&) = delete;
- TDelimitersSplit(const TString&, TSplitDelimiters&&) = delete;
- TDelimitersSplit(TString&&, const TSplitDelimiters&) = delete;
- };
- class TDelimitersStrictSplit: public TSplitBase {
- private:
- const TSplitDelimiters& Delimiters;
- public:
- using TIterator = TSplitIterator<TDelimitersStrictSplit>;
- friend class TSplitIterator<TDelimitersStrictSplit>;
- TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters);
- TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters);
- TIterator Iterator() const;
- TSizeTRegion Next(size_t& pos) const;
- size_t Begin() const;
- private:
- // we don't own Delimiters, make sure that no one calls us with temporary object
- TDelimitersStrictSplit(const char*, size_t, TSplitDelimiters&&) = delete;
- TDelimitersStrictSplit(const TString&, TSplitDelimiters&&) = delete;
- TDelimitersStrictSplit(TString&&, const TSplitDelimiters&) = delete;
- };
- class TScreenedDelimitersSplit: public TSplitBase {
- private:
- const TSplitDelimiters& Delimiters;
- const TSplitDelimiters& Screens;
- public:
- using TIterator = TSplitIterator<TScreenedDelimitersSplit>;
- friend class TSplitIterator<TScreenedDelimitersSplit>;
- TScreenedDelimitersSplit(const char*, size_t, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens);
- TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens);
- TIterator Iterator() const;
- TSizeTRegion Next(size_t& pos) const;
- size_t Begin() const;
- private:
- // we don't own Delimiters and Screens, make sure that no one calls us with temporary object
- TScreenedDelimitersSplit(TString&&, const TSplitDelimiters&, const TSplitDelimiters&) = delete;
- TScreenedDelimitersSplit(const TString&, TSplitDelimiters&&, const TSplitDelimiters&) = delete;
- TScreenedDelimitersSplit(const TString&, const TSplitDelimiters&, TSplitDelimiters&&) = delete;
- };
- class TDelimitersSplitWithoutTags: public TSplitBase {
- private:
- const TSplitDelimiters& Delimiters;
- size_t SkipTag(size_t pos) const;
- size_t SkipDelimiters(size_t pos) const;
- public:
- using TIterator = TSplitIterator<TDelimitersSplitWithoutTags>;
- friend class TSplitIterator<TDelimitersSplitWithoutTags>;
- TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters);
- TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters);
- TIterator Iterator() const;
- TSizeTRegion Next(size_t& pos) const;
- size_t Begin() const;
- private:
- // we don't own Delimiters, make sure that no one calls us with temporary object
- TDelimitersSplitWithoutTags(const char*, size_t, TSplitDelimiters&&) = delete;
- TDelimitersSplitWithoutTags(const TString&, TSplitDelimiters&&) = delete;
- TDelimitersSplitWithoutTags(TString&&, const TSplitDelimiters&) = delete;
- };
- class TCharSplit: public TSplitBase {
- public:
- using TIterator = TSplitIterator<TCharSplit>;
- friend class TSplitIterator<TCharSplit>;
- TCharSplit(const char* str, size_t length);
- TCharSplit(const TString& s);
- TIterator Iterator() const;
- TSizeTRegion Next(size_t& pos) const;
- size_t Begin() const;
- private:
- // we don't own Str, make sure that no one calls us with temporary object
- TCharSplit(TString&&) = delete;
- };
- #ifdef _MSC_VER
- #pragma warning(pop)
- #endif
- class TCharSplitWithoutTags: public TSplitBase {
- private:
- size_t SkipTag(size_t pos) const;
- size_t SkipDelimiters(size_t pos) const;
- public:
- using TIterator = TSplitIterator<TCharSplitWithoutTags>;
- friend class TSplitIterator<TCharSplitWithoutTags>;
- TCharSplitWithoutTags(const char* str, size_t length);
- TCharSplitWithoutTags(const TString& s);
- TIterator Iterator() const;
- TSizeTRegion Next(size_t& pos) const;
- size_t Begin() const;
- private:
- // we don't own Str, make sure that no one calls us with temporary object
- TCharSplitWithoutTags(TString&&) = delete;
- };
- class TSubstringSplitDelimiter {
- public:
- TKMPMatcher Matcher;
- size_t Len;
- TSubstringSplitDelimiter(const TString& s);
- };
- class TSubstringSplit: public TSplitBase {
- private:
- const TSubstringSplitDelimiter& Delimiter;
- public:
- using TIterator = TSplitIterator<TSubstringSplit>;
- friend class TSplitIterator<TSubstringSplit>;
- TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter);
- TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter);
- TIterator Iterator() const;
- TSizeTRegion Next(size_t& pos) const;
- size_t Begin() const;
- private:
- // we don't own Delimiters, make sure that no one calls us with temporary object
- TSubstringSplit(TString&&, const TSubstringSplitDelimiter&) = delete;
- TSubstringSplit(const TString&, TSubstringSplitDelimiter&&) = delete;
- };
- template <class TSplit>
- class TSplitIterator {
- protected:
- const TSplit& Split;
- size_t Pos;
- TString* CurrentStroka;
- public:
- TSplitIterator(const TSplit& split)
- : Split(split)
- , Pos(Split.Begin())
- , CurrentStroka(nullptr)
- {
- }
- virtual ~TSplitIterator() {
- delete CurrentStroka;
- }
- inline TSizeTRegion Next() {
- Y_ENSURE(!Eof(), TStringBuf("eof reached"));
- return Split.Next(Pos);
- }
- TStringBuf NextTok() {
- if (Eof())
- return TStringBuf();
- TSizeTRegion region = Next();
- return TStringBuf(Split.Str + region.Begin, region.End - region.Begin);
- }
- const TString& NextString() {
- if (!CurrentStroka)
- CurrentStroka = new TString();
- TSizeTRegion region = Next();
- CurrentStroka->assign(Split.Str, region.Begin, region.Length() - 1);
- return *CurrentStroka;
- }
- inline bool Eof() const {
- return Pos >= Split.Len;
- }
- TString GetTail() const {
- return TString(Split.Str + Pos);
- }
- void Skip(size_t count) {
- for (size_t i = 0; i < count; ++i)
- Next();
- }
- };
- using TSplitTokens = TVector<TString>;
- template <typename TSplit>
- void Split(const TSplit& split, TSplitTokens* words) {
- words->clear();
- TSplitIterator<TSplit> it(split);
- while (!it.Eof())
- words->push_back(it.NextString());
- }
|