123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318 |
- #include "split_iterator.h"
- #include <util/system/yassert.h>
- #include <cctype>
- #include <cstring>
- #include <cstdlib>
- /****************** TSplitDelimiters2 ******************/
- TSplitDelimiters::TSplitDelimiters(const char* s) {
- memset(Delims, 0, sizeof(Delims));
- while (*s)
- Delims[(ui8) * (s++)] = true;
- }
- /****************** TSplitBase ******************/
- TSplitBase::TSplitBase(const char* str, size_t length)
- : Str(str)
- , Len(length)
- {
- }
- TSplitBase::TSplitBase(const TString& s)
- : Str(s.data())
- , Len(s.size())
- {
- }
- /****************** TDelimitersSplit ******************/
- TDelimitersSplit::TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters)
- : TSplitBase(str, length)
- , Delimiters(delimiters)
- {
- }
- TDelimitersSplit::TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters)
- : TSplitBase(s)
- , Delimiters(delimiters)
- {
- }
- size_t TDelimitersSplit::Begin() const {
- size_t pos = 0;
- while ((pos < Len) && Delimiters.IsDelimiter(Str[pos]))
- ++pos;
- return pos;
- }
- TSizeTRegion TDelimitersSplit::Next(size_t& pos) const {
- size_t begin = pos;
- while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos]))
- ++pos;
- TSizeTRegion result(begin, pos);
- while ((pos < Len) && Delimiters.IsDelimiter(Str[pos]))
- ++pos;
- return result;
- }
- TDelimitersSplit::TIterator TDelimitersSplit::Iterator() const {
- return TIterator(*this);
- }
- /****************** TDelimitersStrictSplit ******************/
- TDelimitersStrictSplit::TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters)
- : TSplitBase(str, length)
- , Delimiters(delimiters)
- {
- }
- TDelimitersStrictSplit::TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters)
- : TSplitBase(s)
- , Delimiters(delimiters)
- {
- }
- TDelimitersStrictSplit::TIterator TDelimitersStrictSplit::Iterator() const {
- return TIterator(*this);
- }
- TSizeTRegion TDelimitersStrictSplit::Next(size_t& pos) const {
- size_t begin = pos;
- while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos]))
- ++pos;
- TSizeTRegion result(begin, pos);
- if (pos < Len)
- ++pos;
- return result;
- }
- size_t TDelimitersStrictSplit::Begin() const {
- return 0;
- }
- /****************** TScreenedDelimitersSplit ******************/
- TScreenedDelimitersSplit::TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens)
- : TSplitBase(s)
- , Delimiters(delimiters)
- , Screens(screens)
- {
- }
- TScreenedDelimitersSplit::TScreenedDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens)
- : TSplitBase(str, length)
- , Delimiters(delimiters)
- , Screens(screens)
- {
- }
- TScreenedDelimitersSplit::TIterator TScreenedDelimitersSplit::Iterator() const {
- return TIterator(*this);
- }
- TSizeTRegion TScreenedDelimitersSplit::Next(size_t& pos) const {
- size_t begin = pos;
- bool screened = false;
- while (pos < Len) {
- if (Screens.IsDelimiter(Str[pos]))
- screened = !screened;
- if (Delimiters.IsDelimiter(Str[pos]) && !screened)
- break;
- ++pos;
- }
- TSizeTRegion result(begin, pos);
- if (pos < Len)
- ++pos;
- return result;
- }
- size_t TScreenedDelimitersSplit::Begin() const {
- return 0;
- }
- /****************** TDelimitersSplitWithoutTags ******************/
- TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters)
- : TSplitBase(str, length)
- , Delimiters(delimiters)
- {
- }
- TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters)
- : TSplitBase(s)
- , Delimiters(delimiters)
- {
- }
- size_t TDelimitersSplitWithoutTags::SkipTag(size_t pos) const {
- Y_ASSERT('<' == Str[pos]);
- while ((pos < Len) && ('>' != Str[pos]))
- ++pos;
- return pos + 1;
- }
- size_t TDelimitersSplitWithoutTags::SkipDelimiters(size_t pos) const {
- while (true) {
- while ((pos < Len) && Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos]))
- ++pos;
- if (pos < Len) {
- if ('<' != Str[pos])
- break;
- else
- pos = SkipTag(pos);
- } else
- break;
- }
- return pos;
- }
- size_t TDelimitersSplitWithoutTags::Begin() const {
- size_t pos = 0;
- pos = SkipDelimiters(pos);
- return pos;
- }
- TSizeTRegion TDelimitersSplitWithoutTags::Next(size_t& pos) const {
- size_t begin = pos;
- while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos]))
- ++pos;
- TSizeTRegion result(begin, pos);
- pos = SkipDelimiters(pos);
- return result;
- }
- TDelimitersSplitWithoutTags::TIterator TDelimitersSplitWithoutTags::Iterator() const {
- return TIterator(*this);
- }
- /****************** TCharSplit ******************/
- TCharSplit::TCharSplit(const char* str, size_t length)
- : TSplitBase(str, length)
- {
- }
- TCharSplit::TCharSplit(const TString& s)
- : TSplitBase(s)
- {
- }
- TCharSplit::TIterator TCharSplit::Iterator() const {
- return TIterator(*this);
- }
- TSizeTRegion TCharSplit::Next(size_t& pos) const {
- TSizeTRegion result(pos, pos + 1);
- ++pos;
- return result;
- }
- size_t TCharSplit::Begin() const {
- return 0;
- }
- /****************** TCharSplitWithoutTags ******************/
- TCharSplitWithoutTags::TCharSplitWithoutTags(const char* str, size_t length)
- : TSplitBase(str, length)
- {
- }
- TCharSplitWithoutTags::TCharSplitWithoutTags(const TString& s)
- : TSplitBase(s)
- {
- }
- size_t TCharSplitWithoutTags::SkipTag(size_t pos) const {
- Y_ASSERT('<' == Str[pos]);
- while ((pos < Len) && ('>' != Str[pos]))
- ++pos;
- return pos + 1;
- }
- size_t TCharSplitWithoutTags::SkipDelimiters(size_t pos) const {
- while (true) {
- if (pos < Len) {
- if ('<' != Str[pos])
- break;
- else
- pos = SkipTag(pos);
- } else
- break;
- }
- return pos;
- }
- size_t TCharSplitWithoutTags::Begin() const {
- size_t pos = 0;
- pos = SkipDelimiters(pos);
- return pos;
- }
- TSizeTRegion TCharSplitWithoutTags::Next(size_t& pos) const {
- size_t begin = pos++;
- TSizeTRegion result(begin, pos);
- pos = SkipDelimiters(pos);
- return result;
- }
- TCharSplitWithoutTags::TIterator TCharSplitWithoutTags::Iterator() const {
- return TIterator(*this);
- }
- TSubstringSplitDelimiter::TSubstringSplitDelimiter(const TString& s)
- : Matcher(s)
- , Len(s.size())
- {
- }
- /****************** TSubstringSplit ******************/
- TSubstringSplit::TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter)
- : TSplitBase(str, length)
- , Delimiter(delimiter)
- {
- }
- TSubstringSplit::TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter)
- : TSplitBase(str)
- , Delimiter(delimiter)
- {
- }
- TSubstringSplit::TIterator TSubstringSplit::Iterator() const {
- return TIterator(*this);
- }
- TSizeTRegion TSubstringSplit::Next(size_t& pos) const {
- const char* begin = Str + pos;
- const char* end = Str + Len;
- const char* delim;
- if (Delimiter.Matcher.SubStr(begin, end, delim)) {
- TSizeTRegion result(pos, delim - begin + pos);
- pos += delim - begin + Delimiter.Len;
- return result;
- } else {
- TSizeTRegion result(pos, end - begin + pos);
- pos += end - begin;
- return result;
- }
- }
- size_t TSubstringSplit::Begin() const {
- return 0;
- }
|