123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214 |
- #pragma once
- #include "input.h"
- #include <util/generic/buffer.h>
- #include <util/generic/mem_copy.h>
- #include <util/generic/strbuf.h>
- #include <util/system/compiler.h>
- #include <util/system/yassert.h>
- /**
- * @addtogroup Streams
- * @{
- */
- /**
- * Simple stream tokenizer. Splits the stream into tokens that are available
- * via iterator interface.
- *
- * @tparam TEndOfToken Predicate for token delimiter characters.
- * @see TEol
- */
- template <typename TEndOfToken>
- class TStreamTokenizer {
- public:
- class TIterator {
- public:
- inline TIterator(TStreamTokenizer* const parent)
- : Parent_(parent)
- , AtEnd_(!Parent_->Next(Data_, Len_))
- {
- }
- inline TIterator() noexcept
- : Parent_(nullptr)
- , Data_(nullptr)
- , Len_(0)
- , AtEnd_(true)
- {
- }
- inline ~TIterator() = default;
- inline void operator++() {
- Next();
- }
- inline bool operator==(const TIterator& l) const noexcept {
- return AtEnd_ == l.AtEnd_;
- }
- inline bool operator!=(const TIterator& l) const noexcept {
- return !(*this == l);
- }
- /**
- * @return Return null-terminated character array with current token.
- * The pointer may be invalid after iterator increment.
- */
- inline const char* Data() const noexcept {
- Y_ASSERT(!AtEnd_);
- return Data_;
- }
- /**
- * @return Length of current token.
- */
- inline size_t Length() const noexcept {
- Y_ASSERT(!AtEnd_);
- return Len_;
- }
- inline TIterator* operator->() noexcept {
- return this;
- }
- inline TStringBuf operator*() noexcept {
- return TStringBuf{Data_, Len_};
- }
- private:
- inline void Next() {
- Y_ASSERT(Parent_);
- AtEnd_ = !Parent_->Next(Data_, Len_);
- }
- private:
- TStreamTokenizer* const Parent_;
- char* Data_;
- size_t Len_;
- bool AtEnd_;
- };
- inline TStreamTokenizer(IInputStream* const input, const TEndOfToken& eot = TEndOfToken(),
- const size_t initial = 1024)
- : Input_(input)
- , Buf_(initial)
- , Cur_(BufBegin())
- , End_(BufBegin())
- , Eot_(eot)
- {
- CheckBuf();
- }
- inline bool Next(char*& buf, size_t& len) {
- char* it = Cur_;
- while (true) {
- do {
- while (it != End_) {
- if (Eot_(*it)) {
- *it = '\0';
- buf = Cur_;
- len = it - Cur_;
- Cur_ = it + 1;
- return true;
- } else {
- ++it;
- }
- }
- if (Fill() == 0 && End_ != BufEnd()) {
- *it = '\0';
- buf = Cur_;
- len = it - Cur_;
- Cur_ = End_;
- return len;
- }
- } while (it != BufEnd());
- Y_ASSERT(it == BufEnd());
- Y_ASSERT(End_ == BufEnd());
- const size_t blen = End_ - Cur_;
- if (Cur_ == BufBegin()) {
- Y_ASSERT(blen == Buf_.Capacity());
- /*
- * do reallocate
- */
- Buf_.Reserve(Buf_.Capacity() * 4);
- CheckBuf();
- } else {
- /*
- * do move
- */
- MemMove(BufBegin(), Cur_, blen);
- }
- Cur_ = BufBegin();
- End_ = Cur_ + blen;
- it = End_;
- }
- }
- inline TIterator begin() {
- return TIterator{this};
- }
- inline TIterator end() noexcept {
- return {};
- }
- private:
- inline size_t Fill() {
- const size_t avail = BufEnd() - End_;
- const size_t bytesRead = Input_->Read(End_, avail);
- End_ += bytesRead;
- return bytesRead;
- }
- inline char* BufBegin() noexcept {
- return Buf_.Data();
- }
- inline char* BufEnd() noexcept {
- return Buf_.Data() + Buf_.Capacity();
- }
- inline void CheckBuf() const {
- if (!Buf_.Data()) {
- throw std::bad_alloc();
- }
- }
- private:
- IInputStream* const Input_;
- TBuffer Buf_;
- char* Cur_;
- char* End_;
- TEndOfToken Eot_;
- };
- /**
- * Predicate for `TStreamTokenizer` that uses '\\n' as a delimiter.
- */
- struct TEol {
- inline bool operator()(char ch) const noexcept {
- return ch == '\n';
- }
- };
- /** @} */
|