123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264 |
- #include <library/cpp/testing/unittest/registar.h>
- #include <util/generic/array_size.h>
- #include <util/generic/strbuf.h>
- #include "mem.h"
- #include "null.h"
- #include "tokenizer.h"
- static inline void CheckIfNullTerminated(const TStringBuf str) {
- UNIT_ASSERT_VALUES_EQUAL('\0', *(str.data() + str.size()));
- }
- Y_UNIT_TEST_SUITE(TStreamTokenizerTests) {
- Y_UNIT_TEST(EmptyStreamTest) {
- auto&& input = TNullInput{};
- auto&& tokenizer = TStreamTokenizer<TEol>{&input};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()});
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(0, tokensCount);
- }
- Y_UNIT_TEST(EmptyTokensTest) {
- const char data[] = "\n\n";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TEol>{&input};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()});
- UNIT_ASSERT_VALUES_EQUAL(0, it->Length());
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(2, tokensCount);
- }
- Y_UNIT_TEST(LastTokenendDoesntSatisfyPredicateTest) {
- const char data[] = "abc\ndef\nxxxxxx";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")};
- const auto tokensSize = Y_ARRAY_SIZE(tokens);
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TEol>{&input};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- UNIT_ASSERT(tokensCount < tokensSize);
- const auto token = TStringBuf{it->Data(), it->Length()};
- CheckIfNullTerminated(token);
- UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
- }
- Y_UNIT_TEST(FirstTokenIsEmptyTest) {
- const char data[] = "\ndef\nxxxxxx";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- const TStringBuf tokens[] = {TStringBuf(), TStringBuf("def"), TStringBuf("xxxxxx")};
- const auto tokensSize = Y_ARRAY_SIZE(tokens);
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TEol>{&input};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- UNIT_ASSERT(tokensCount < tokensSize);
- const auto token = TStringBuf{it->Data(), it->Length()};
- CheckIfNullTerminated(token);
- UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
- }
- Y_UNIT_TEST(PredicateDoesntMatch) {
- const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TEol>{&input};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- const auto token = TStringBuf{it->Data(), it->Length()};
- CheckIfNullTerminated(token);
- UNIT_ASSERT_VALUES_EQUAL(data, token);
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(1, tokensCount);
- }
- Y_UNIT_TEST(SimpleTest) {
- const char data[] = "qwerty\n1234567890\n";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- const TStringBuf tokens[] = {TStringBuf("qwerty"), TStringBuf("1234567890")};
- const auto tokensSize = Y_ARRAY_SIZE(tokens);
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TEol>{&input};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- UNIT_ASSERT(tokensCount < tokensSize);
- const auto token = TStringBuf{it->Data(), it->Length()};
- CheckIfNullTerminated(token);
- UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
- }
- Y_UNIT_TEST(CustomPredicateTest) {
- struct TIsVerticalBar {
- inline bool operator()(const char ch) const noexcept {
- return '|' == ch;
- }
- };
- const char data[] = "abc|def|xxxxxx";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")};
- const auto tokensSize = Y_ARRAY_SIZE(tokens);
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TIsVerticalBar>{&input};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- UNIT_ASSERT(tokensCount < tokensSize);
- const auto token = TStringBuf{it->Data(), it->Length()};
- CheckIfNullTerminated(token);
- UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
- }
- Y_UNIT_TEST(CustomPredicateSecondTest) {
- struct TIsVerticalBar {
- inline bool operator()(const char ch) const noexcept {
- return '|' == ch || ',' == ch;
- }
- };
- const char data[] = "abc|def|xxxxxx,abc|def|xxxxxx";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx"),
- TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")};
- const auto tokensSize = Y_ARRAY_SIZE(tokens);
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TIsVerticalBar>{&input};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- UNIT_ASSERT(tokensCount < tokensSize);
- const auto token = TStringBuf{it->Data(), it->Length()};
- CheckIfNullTerminated(token);
- UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
- }
- Y_UNIT_TEST(FalsePredicateTest) {
- struct TAlwaysFalse {
- inline bool operator()(const char) const noexcept {
- return false;
- }
- };
- const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TAlwaysFalse>{&input};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- const auto token = TStringBuf{it->Data(), it->Length()};
- CheckIfNullTerminated(token);
- UNIT_ASSERT_VALUES_EQUAL(data, token);
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(1, tokensCount);
- }
- Y_UNIT_TEST(TruePredicateTest) {
- struct TAlwaysTrue {
- inline bool operator()(const char) const noexcept {
- return true;
- }
- };
- const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TAlwaysTrue>{&input};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()});
- UNIT_ASSERT_VALUES_EQUAL(0, it->Length());
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(dataSize, tokensCount);
- }
- Y_UNIT_TEST(FirstTokenHasSizeOfTheBufferTest) {
- const char data[] = "xxxxx\nxx";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- const TStringBuf tokens[] = {TStringBuf("xxxxx"), TStringBuf("xx")};
- const auto tokensSize = Y_ARRAY_SIZE(tokens);
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TEol>{&input, TEol{}, tokens[0].size()};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- const auto token = TStringBuf{it->Data(), it->Length()};
- CheckIfNullTerminated(token);
- UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
- }
- Y_UNIT_TEST(OnlyTokenHasSizeOfTheBufferTest) {
- const char data[] = "xxxxx";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TEol>{&input, TEol{}, dataSize};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- const auto token = TStringBuf{it->Data(), it->Length()};
- CheckIfNullTerminated(token);
- UNIT_ASSERT_VALUES_EQUAL(data, token);
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(1, tokensCount);
- }
- Y_UNIT_TEST(BufferSizeInitialSizeSmallerThanTokenTest) {
- const char data[] = "xxxxx\nxx";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- const TStringBuf tokens[] = {TStringBuf("xxxxx"), TStringBuf("xx")};
- const auto tokensSize = Y_ARRAY_SIZE(tokens);
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TEol>{&input, TEol{}, 1};
- auto tokensCount = size_t{};
- for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) {
- const auto token = TStringBuf{it->Data(), it->Length()};
- CheckIfNullTerminated(token);
- UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
- }
- Y_UNIT_TEST(RangeBasedForTest) {
- const char data[] = "abc\ndef\nxxxxxx";
- const auto dataSize = Y_ARRAY_SIZE(data) - 1;
- const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")};
- const auto tokensSize = Y_ARRAY_SIZE(tokens);
- auto&& input = TMemoryInput{data, dataSize};
- auto&& tokenizer = TStreamTokenizer<TEol>{&input};
- auto tokensCount = size_t{};
- for (const auto& token : tokenizer) {
- UNIT_ASSERT(tokensCount < tokensSize);
- CheckIfNullTerminated(token);
- UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token);
- ++tokensCount;
- }
- UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount);
- }
- }
|