#include #include #include #include "mem.h" #include "null.h" #include "tokenizer.h" static inline void CheckIfNullTerminated(const TStringBuf str) { UNIT_ASSERT_VALUES_EQUAL('\0', *(str.data() + str.size())); } Y_UNIT_TEST_SUITE(TStreamTokenizerTests) { Y_UNIT_TEST(EmptyStreamTest) { auto&& input = TNullInput{}; auto&& tokenizer = TStreamTokenizer{&input}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()}); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(0, tokensCount); } Y_UNIT_TEST(EmptyTokensTest) { const char data[] = "\n\n"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()}); UNIT_ASSERT_VALUES_EQUAL(0, it->Length()); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(2, tokensCount); } Y_UNIT_TEST(LastTokenendDoesntSatisfyPredicateTest) { const char data[] = "abc\ndef\nxxxxxx"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")}; const auto tokensSize = Y_ARRAY_SIZE(tokens); auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { UNIT_ASSERT(tokensCount < tokensSize); const auto token = TStringBuf{it->Data(), it->Length()}; CheckIfNullTerminated(token); UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); } Y_UNIT_TEST(FirstTokenIsEmptyTest) { const char data[] = "\ndef\nxxxxxx"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; const TStringBuf tokens[] = {TStringBuf(), TStringBuf("def"), TStringBuf("xxxxxx")}; const auto tokensSize = Y_ARRAY_SIZE(tokens); auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { UNIT_ASSERT(tokensCount < tokensSize); const auto token = TStringBuf{it->Data(), it->Length()}; CheckIfNullTerminated(token); UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); } Y_UNIT_TEST(PredicateDoesntMatch) { const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { const auto token = TStringBuf{it->Data(), it->Length()}; CheckIfNullTerminated(token); UNIT_ASSERT_VALUES_EQUAL(data, token); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(1, tokensCount); } Y_UNIT_TEST(SimpleTest) { const char data[] = "qwerty\n1234567890\n"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; const TStringBuf tokens[] = {TStringBuf("qwerty"), TStringBuf("1234567890")}; const auto tokensSize = Y_ARRAY_SIZE(tokens); auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { UNIT_ASSERT(tokensCount < tokensSize); const auto token = TStringBuf{it->Data(), it->Length()}; CheckIfNullTerminated(token); UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); } Y_UNIT_TEST(CustomPredicateTest) { struct TIsVerticalBar { inline bool operator()(const char ch) const noexcept { return '|' == ch; } }; const char data[] = "abc|def|xxxxxx"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")}; const auto tokensSize = Y_ARRAY_SIZE(tokens); auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { UNIT_ASSERT(tokensCount < tokensSize); const auto token = TStringBuf{it->Data(), it->Length()}; CheckIfNullTerminated(token); UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); } Y_UNIT_TEST(CustomPredicateSecondTest) { struct TIsVerticalBar { inline bool operator()(const char ch) const noexcept { return '|' == ch || ',' == ch; } }; const char data[] = "abc|def|xxxxxx,abc|def|xxxxxx"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx"), TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")}; const auto tokensSize = Y_ARRAY_SIZE(tokens); auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { UNIT_ASSERT(tokensCount < tokensSize); const auto token = TStringBuf{it->Data(), it->Length()}; CheckIfNullTerminated(token); UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); } Y_UNIT_TEST(FalsePredicateTest) { struct TAlwaysFalse { inline bool operator()(const char) const noexcept { return false; } }; const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { const auto token = TStringBuf{it->Data(), it->Length()}; CheckIfNullTerminated(token); UNIT_ASSERT_VALUES_EQUAL(data, token); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(1, tokensCount); } Y_UNIT_TEST(TruePredicateTest) { struct TAlwaysTrue { inline bool operator()(const char) const noexcept { return true; } }; const char data[] = "1234567890-=!@#$%^&*()_+QWERTYUIOP{}qwertyuiop[]ASDFGHJKL:"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { CheckIfNullTerminated(TStringBuf{it->Data(), it->Length()}); UNIT_ASSERT_VALUES_EQUAL(0, it->Length()); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(dataSize, tokensCount); } Y_UNIT_TEST(FirstTokenHasSizeOfTheBufferTest) { const char data[] = "xxxxx\nxx"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; const TStringBuf tokens[] = {TStringBuf("xxxxx"), TStringBuf("xx")}; const auto tokensSize = Y_ARRAY_SIZE(tokens); auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input, TEol{}, tokens[0].size()}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { const auto token = TStringBuf{it->Data(), it->Length()}; CheckIfNullTerminated(token); UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); } Y_UNIT_TEST(OnlyTokenHasSizeOfTheBufferTest) { const char data[] = "xxxxx"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input, TEol{}, dataSize}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { const auto token = TStringBuf{it->Data(), it->Length()}; CheckIfNullTerminated(token); UNIT_ASSERT_VALUES_EQUAL(data, token); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(1, tokensCount); } Y_UNIT_TEST(BufferSizeInitialSizeSmallerThanTokenTest) { const char data[] = "xxxxx\nxx"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; const TStringBuf tokens[] = {TStringBuf("xxxxx"), TStringBuf("xx")}; const auto tokensSize = Y_ARRAY_SIZE(tokens); auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input, TEol{}, 1}; auto tokensCount = size_t{}; for (auto it = tokenizer.begin(); tokenizer.end() != it; ++it) { const auto token = TStringBuf{it->Data(), it->Length()}; CheckIfNullTerminated(token); UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); } Y_UNIT_TEST(RangeBasedForTest) { const char data[] = "abc\ndef\nxxxxxx"; const auto dataSize = Y_ARRAY_SIZE(data) - 1; const TStringBuf tokens[] = {TStringBuf("abc"), TStringBuf("def"), TStringBuf("xxxxxx")}; const auto tokensSize = Y_ARRAY_SIZE(tokens); auto&& input = TMemoryInput{data, dataSize}; auto&& tokenizer = TStreamTokenizer{&input}; auto tokensCount = size_t{}; for (const auto& token : tokenizer) { UNIT_ASSERT(tokensCount < tokensSize); CheckIfNullTerminated(token); UNIT_ASSERT_VALUES_EQUAL(tokens[tokensCount], token); ++tokensCount; } UNIT_ASSERT_VALUES_EQUAL(tokensSize, tokensCount); } } // Y_UNIT_TEST_SUITE(TStreamTokenizerTests)