#include "split_iterator.h" #include #include #include #include /****************** TSplitDelimiters2 ******************/ TSplitDelimiters::TSplitDelimiters(const char* s) { memset(Delims, 0, sizeof(Delims)); while (*s) Delims[(ui8) * (s++)] = true; } /****************** TSplitBase ******************/ TSplitBase::TSplitBase(const char* str, size_t length) : Str(str) , Len(length) { } TSplitBase::TSplitBase(const TString& s) : Str(s.data()) , Len(s.size()) { } /****************** TDelimitersSplit ******************/ TDelimitersSplit::TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters) : TSplitBase(str, length) , Delimiters(delimiters) { } TDelimitersSplit::TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters) : TSplitBase(s) , Delimiters(delimiters) { } size_t TDelimitersSplit::Begin() const { size_t pos = 0; while ((pos < Len) && Delimiters.IsDelimiter(Str[pos])) ++pos; return pos; } TSizeTRegion TDelimitersSplit::Next(size_t& pos) const { size_t begin = pos; while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos])) ++pos; TSizeTRegion result(begin, pos); while ((pos < Len) && Delimiters.IsDelimiter(Str[pos])) ++pos; return result; } TDelimitersSplit::TIterator TDelimitersSplit::Iterator() const { return TIterator(*this); } /****************** TDelimitersStrictSplit ******************/ TDelimitersStrictSplit::TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters) : TSplitBase(str, length) , Delimiters(delimiters) { } TDelimitersStrictSplit::TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters) : TSplitBase(s) , Delimiters(delimiters) { } TDelimitersStrictSplit::TIterator TDelimitersStrictSplit::Iterator() const { return TIterator(*this); } TSizeTRegion TDelimitersStrictSplit::Next(size_t& pos) const { size_t begin = pos; while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos])) ++pos; TSizeTRegion result(begin, pos); if (pos < Len) ++pos; return result; } size_t TDelimitersStrictSplit::Begin() const { return 0; } /****************** TScreenedDelimitersSplit ******************/ TScreenedDelimitersSplit::TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens) : TSplitBase(s) , Delimiters(delimiters) , Screens(screens) { } TScreenedDelimitersSplit::TScreenedDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens) : TSplitBase(str, length) , Delimiters(delimiters) , Screens(screens) { } TScreenedDelimitersSplit::TIterator TScreenedDelimitersSplit::Iterator() const { return TIterator(*this); } TSizeTRegion TScreenedDelimitersSplit::Next(size_t& pos) const { size_t begin = pos; bool screened = false; while (pos < Len) { if (Screens.IsDelimiter(Str[pos])) screened = !screened; if (Delimiters.IsDelimiter(Str[pos]) && !screened) break; ++pos; } TSizeTRegion result(begin, pos); if (pos < Len) ++pos; return result; } size_t TScreenedDelimitersSplit::Begin() const { return 0; } /****************** TDelimitersSplitWithoutTags ******************/ TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters) : TSplitBase(str, length) , Delimiters(delimiters) { } TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters) : TSplitBase(s) , Delimiters(delimiters) { } size_t TDelimitersSplitWithoutTags::SkipTag(size_t pos) const { Y_ASSERT('<' == Str[pos]); while ((pos < Len) && ('>' != Str[pos])) ++pos; return pos + 1; } size_t TDelimitersSplitWithoutTags::SkipDelimiters(size_t pos) const { while (true) { while ((pos < Len) && Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos])) ++pos; if (pos < Len) { if ('<' != Str[pos]) break; else pos = SkipTag(pos); } else break; } return pos; } size_t TDelimitersSplitWithoutTags::Begin() const { size_t pos = 0; pos = SkipDelimiters(pos); return pos; } TSizeTRegion TDelimitersSplitWithoutTags::Next(size_t& pos) const { size_t begin = pos; while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos])) ++pos; TSizeTRegion result(begin, pos); pos = SkipDelimiters(pos); return result; } TDelimitersSplitWithoutTags::TIterator TDelimitersSplitWithoutTags::Iterator() const { return TIterator(*this); } /****************** TCharSplit ******************/ TCharSplit::TCharSplit(const char* str, size_t length) : TSplitBase(str, length) { } TCharSplit::TCharSplit(const TString& s) : TSplitBase(s) { } TCharSplit::TIterator TCharSplit::Iterator() const { return TIterator(*this); } TSizeTRegion TCharSplit::Next(size_t& pos) const { TSizeTRegion result(pos, pos + 1); ++pos; return result; } size_t TCharSplit::Begin() const { return 0; } /****************** TCharSplitWithoutTags ******************/ TCharSplitWithoutTags::TCharSplitWithoutTags(const char* str, size_t length) : TSplitBase(str, length) { } TCharSplitWithoutTags::TCharSplitWithoutTags(const TString& s) : TSplitBase(s) { } size_t TCharSplitWithoutTags::SkipTag(size_t pos) const { Y_ASSERT('<' == Str[pos]); while ((pos < Len) && ('>' != Str[pos])) ++pos; return pos + 1; } size_t TCharSplitWithoutTags::SkipDelimiters(size_t pos) const { while (true) { if (pos < Len) { if ('<' != Str[pos]) break; else pos = SkipTag(pos); } else break; } return pos; } size_t TCharSplitWithoutTags::Begin() const { size_t pos = 0; pos = SkipDelimiters(pos); return pos; } TSizeTRegion TCharSplitWithoutTags::Next(size_t& pos) const { size_t begin = pos++; TSizeTRegion result(begin, pos); pos = SkipDelimiters(pos); return result; } TCharSplitWithoutTags::TIterator TCharSplitWithoutTags::Iterator() const { return TIterator(*this); } TSubstringSplitDelimiter::TSubstringSplitDelimiter(const TString& s) : Matcher(s) , Len(s.size()) { } /****************** TSubstringSplit ******************/ TSubstringSplit::TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter) : TSplitBase(str, length) , Delimiter(delimiter) { } TSubstringSplit::TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter) : TSplitBase(str) , Delimiter(delimiter) { } TSubstringSplit::TIterator TSubstringSplit::Iterator() const { return TIterator(*this); } TSizeTRegion TSubstringSplit::Next(size_t& pos) const { const char* begin = Str + pos; const char* end = Str + Len; const char* delim; if (Delimiter.Matcher.SubStr(begin, end, delim)) { TSizeTRegion result(pos, delim - begin + pos); pos += delim - begin + Delimiter.Len; return result; } else { TSizeTRegion result(pos, end - begin + pos); pos += end - begin; return result; } } size_t TSubstringSplit::Begin() const { return 0; }