123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436 |
- #include "text_yson.h"
- #include "error.h"
- #include <library/cpp/yt/assert/assert.h>
- #include <library/cpp/yt/string/format.h>
- #include <library/cpp/yt/coding/varint.h>
- #include <library/cpp/yt/misc/cast.h>
- #include <array>
- #include <util/string/escape.h>
- #include <util/stream/mem.h>
- namespace NYT::NDetail {
- ////////////////////////////////////////////////////////////////////////////////
- size_t FloatToStringWithNanInf(double value, char* buf, size_t size)
- {
- if (std::isfinite(value)) {
- return FloatToString(value, buf, size);
- }
- static const TStringBuf nanLiteral = "%nan";
- static const TStringBuf infLiteral = "%inf";
- static const TStringBuf negativeInfLiteral = "%-inf";
- TStringBuf str;
- if (std::isnan(value)) {
- str = nanLiteral;
- } else if (std::isinf(value) && value > 0) {
- str = infLiteral;
- } else {
- str = negativeInfLiteral;
- }
- YT_VERIFY(str.size() + 1 <= size);
- ::memcpy(buf, str.data(), str.size() + 1);
- return str.size();
- }
- ////////////////////////////////////////////////////////////////////////////////
- // NB(arkady-e1ppa): Copied from library/cpp/yt/yson_string/format.h
- // to avoid direct dependency on it.
- //! Indicates an entity.
- constexpr char EntitySymbol = '#';
- //! Marks the beginning of a binary string literal.
- constexpr char StringMarker = '\x01';
- //! Marks the beginning of a binary i64 literal.
- constexpr char Int64Marker = '\x02';
- //! Marks the beginning of a binary double literal.
- constexpr char DoubleMarker = '\x03';
- //! Marks |false| boolean value.
- constexpr char FalseMarker = '\x04';
- //! Marks |true| boolean value.
- constexpr char TrueMarker = '\x05';
- //! Marks the beginning of a binary ui64 literal.
- constexpr char Uint64Marker = '\x06';
- ////////////////////////////////////////////////////////////////////////////////
- bool IsBinaryYson(TStringBuf str)
- {
- return
- std::ssize(str) != 0 &&
- (str.front() == EntitySymbol ||
- str.front() == StringMarker ||
- str.front() == Int64Marker ||
- str.front() == DoubleMarker ||
- str.front() == FalseMarker ||
- str.front() == TrueMarker ||
- str.front() == Uint64Marker);
- }
- ////////////////////////////////////////////////////////////////////////////////
- template <>
- std::string ConvertToTextYsonString<i8>(const i8& value)
- {
- return ConvertToTextYsonString(static_cast<i64>(value));
- }
- template <>
- std::string ConvertToTextYsonString<i32>(const i32& value)
- {
- return ConvertToTextYsonString(static_cast<i64>(value));
- }
- template <>
- std::string ConvertToTextYsonString<i64>(const i64& value)
- {
- return std::string{::ToString(value)};
- }
- template <>
- std::string ConvertToTextYsonString<ui8>(const ui8& value)
- {
- return ConvertToTextYsonString(static_cast<ui64>(value));
- }
- template <>
- std::string ConvertToTextYsonString<ui32>(const ui32& value)
- {
- return ConvertToTextYsonString(static_cast<ui64>(value));
- }
- template <>
- std::string ConvertToTextYsonString<ui64>(const ui64& value)
- {
- return std::string{::ToString(value) + 'u'};
- }
- template <>
- std::string ConvertToTextYsonString<TStringBuf>(const TStringBuf& value)
- {
- return std::string(NYT::Format("\"%v\"", ::EscapeC(value)));
- }
- template <>
- std::string ConvertToTextYsonString<float>(const float& value)
- {
- return ConvertToTextYsonString(static_cast<double>(value));
- }
- template <>
- std::string ConvertToTextYsonString<double>(const double& value)
- {
- char buf[256];
- auto str = TStringBuf(buf, NDetail::FloatToStringWithNanInf(value, buf, sizeof(buf)));
- auto ret = NYT::Format(
- "%v%v",
- str,
- MakeFormatterWrapper([&] (TStringBuilderBase* builder) {
- if (str.find('.') == TString::npos && str.find('e') == TString::npos && std::isfinite(value)) {
- builder->AppendChar('.');
- }
- }));
- return std::string(std::move(ret));
- }
- template <>
- std::string ConvertToTextYsonString<bool>(const bool& value)
- {
- return value
- ? std::string(TStringBuf("%true"))
- : std::string(TStringBuf("%false"));
- }
- template <>
- std::string ConvertToTextYsonString<TInstant>(const TInstant& value)
- {
- return ConvertToTextYsonString(TStringBuf(value.ToString()));
- }
- template <>
- std::string ConvertToTextYsonString<TDuration>(const TDuration& value)
- {
- // ConvertTo does unchecked cast to i64 :(.
- return ConvertToTextYsonString(static_cast<i64>(value.MilliSeconds()));
- }
- template <>
- std::string ConvertToTextYsonString<TGuid>(const TGuid& value)
- {
- return ConvertToTextYsonString(TStringBuf(NYT::ToString(value)));
- }
- ////////////////////////////////////////////////////////////////////////////////
- namespace {
- template <class TSomeInt>
- TSomeInt ReadTextUint(TStringBuf strBuf)
- {
- // Drop 'u'
- return ::FromString<TSomeInt>(TStringBuf{strBuf.data(), strBuf.length() - 1});
- }
- template <class TSomeInt>
- TSomeInt ReadTextInt(TStringBuf strBuf)
- {
- return ::FromString<TSomeInt>(TStringBuf{strBuf.data(), strBuf.length()});
- }
- bool IsNumeric(TStringBuf strBuf)
- {
- bool isNumeric = true;
- bool isNegative = false;
- for (int i = 0; i < std::ssize(strBuf); ++i) {
- char c = strBuf[i];
- if (!('0' <= c && c <= '9')) {
- if (i == 0 && c == '-') {
- isNegative = true;
- continue;
- }
- if (i == std::ssize(strBuf) - 1 && c == 'u' && !isNegative) {
- continue;
- }
- isNumeric = false;
- break;
- }
- }
- return isNumeric;
- }
- ////////////////////////////////////////////////////////////////////////////////
- template <class TSomeInt>
- TSomeInt ParseSomeIntFromTextYsonString(TStringBuf strBuf)
- {
- if (std::ssize(strBuf) == 0 || !IsNumeric(strBuf)) {
- THROW_ERROR_EXCEPTION(
- "Unexpected %v\n"
- "Value is not numeric",
- strBuf);
- }
- if (strBuf.back() == 'u') {
- // Drop 'u'
- return ReadTextUint<TSomeInt>(strBuf);
- } else {
- return ReadTextInt<TSomeInt>(strBuf);
- }
- }
- ////////////////////////////////////////////////////////////////////////////////
- TString DoParseStringFromTextYson(TStringBuf strBuf)
- {
- // Remove quotation marks.
- return ::UnescapeC(TStringBuf{strBuf.data() + 1, strBuf.length() - 2});
- }
- TString ParseStringFromTextYsonString(TStringBuf strBuf)
- {
- if (std::ssize(strBuf) < 2 || strBuf.front() != '\"' || strBuf.back() != '\"') {
- THROW_ERROR_EXCEPTION(
- "Unexpected %v\n"
- "Text yson string must begin and end with \\\"",
- strBuf);
- }
- return DoParseStringFromTextYson(strBuf);
- }
- ////////////////////////////////////////////////////////////////////////////////
- double ParseDoubleFromTextYsonString(TStringBuf strBuf)
- {
- if (std::ssize(strBuf) < 2) {
- THROW_ERROR_EXCEPTION(
- "Incorrect remaining string length: expected at least 2, got %v",
- std::ssize(strBuf));
- }
- // Check special values first.
- // %nan
- // %inf, %+inf, %-inf
- if (strBuf[0] == '%') {
- switch (strBuf[1]) {
- case '+':
- case 'i':
- return std::numeric_limits<double>::infinity();
- case '-':
- return -std::numeric_limits<double>::infinity();
- case 'n':
- return std::numeric_limits<double>::quiet_NaN();
- default:
- THROW_ERROR_EXCEPTION(
- "Incorrect %%-literal %v",
- strBuf);
- }
- }
- return ::FromString<double>(strBuf);
- }
- } // namespace
- ////////////////////////////////////////////////////////////////////////////////
- #define PARSE_INT(type, underlyingType) \
- template <> \
- type ConvertFromTextYsonString<type>(TStringBuf str) \
- { \
- try { \
- return CheckedIntegralCast<type>(ParseSomeIntFromTextYsonString<underlyingType>(str)); \
- } catch (const std::exception& ex) { \
- THROW_ERROR_EXCEPTION("Error parsing \"" #type "\" value from YSON") << ex; \
- } \
- }
- PARSE_INT(i8, i64)
- PARSE_INT(i16, i64)
- PARSE_INT(i32, i64)
- PARSE_INT(i64, i64)
- PARSE_INT(ui8, ui64)
- PARSE_INT(ui16, ui64)
- PARSE_INT(ui32, ui64)
- PARSE_INT(ui64, ui64)
- #undef PARSE
- ////////////////////////////////////////////////////////////////////////////////
- template <>
- TString ConvertFromTextYsonString<TString>(TStringBuf str)
- {
- try {
- return ParseStringFromTextYsonString(str);
- } catch (const std::exception& ex) {
- THROW_ERROR_EXCEPTION("Error parsing \"string\" value from YSON") << ex;
- }
- }
- template <>
- std::string ConvertFromTextYsonString<std::string>(TStringBuf str)
- {
- return std::string(ConvertFromTextYsonString<TString>(str));
- }
- template <>
- float ConvertFromTextYsonString<float>(TStringBuf str)
- {
- try {
- return static_cast<float>(ParseDoubleFromTextYsonString(str));
- } catch (const std::exception& ex) {
- THROW_ERROR_EXCEPTION("Error parsing \"float\" value from YSON") << ex;
- }
- }
- template <>
- double ConvertFromTextYsonString<double>(TStringBuf str)
- {
- try {
- return ParseDoubleFromTextYsonString(str);
- } catch (const std::exception& ex) {
- THROW_ERROR_EXCEPTION("Error parsing \"double\" value from YSON") << ex;
- }
- }
- template <>
- bool ConvertFromTextYsonString<bool>(TStringBuf strBuf)
- {
- try {
- if (std::ssize(strBuf) == 0) {
- THROW_ERROR_EXCEPTION("Empty string");
- }
- char ch = strBuf.front();
- if (ch == '%') {
- if (strBuf != "%true" && strBuf != "%false") {
- THROW_ERROR_EXCEPTION(
- "Expected %%true or %%false but found %v",
- strBuf);
- }
- return strBuf == "%true";
- }
- if (ch == '\"') {
- return ParseBool(DoParseStringFromTextYson(strBuf));
- }
- // NB(arkady-e1ppa): This check is linear in size(strBuf)
- // And thus is tried as the last resort.
- if (IsNumeric(strBuf)) {
- auto checkValue = [&] (const auto& functor) {
- auto value = functor(strBuf);
- if (value != 0 && value != 1) {
- THROW_ERROR_EXCEPTION(
- "Expected 0 or 1 but found %v",
- value);
- }
- return static_cast<bool>(value);
- };
- if (strBuf.back() == 'u') {
- return checkValue(&ReadTextUint<ui64>);
- } else {
- return checkValue(&ReadTextInt<i64>);
- }
- }
- THROW_ERROR_EXCEPTION(
- "Unexpected %v\n"
- "No known conversion to \"boolean\" value",
- strBuf);
- } catch (const std::exception& ex) {
- THROW_ERROR_EXCEPTION("Error parsing \"boolean\" value from YSON") << ex;
- }
- }
- template <>
- TInstant ConvertFromTextYsonString<TInstant>(TStringBuf str)
- {
- try {
- return TInstant::ParseIso8601(ParseStringFromTextYsonString(str));
- } catch (const std::exception& ex) {
- THROW_ERROR_EXCEPTION("Error parsing \"instant\" value from YSON") << ex;
- }
- }
- template <>
- TDuration ConvertFromTextYsonString<TDuration>(TStringBuf str)
- {
- try {
- return TDuration::MilliSeconds(ParseSomeIntFromTextYsonString<i64>(str));
- } catch (const std::exception& ex) {
- THROW_ERROR_EXCEPTION("Error parsing \"duration\" value from YSON") << ex;
- }
- }
- template <>
- TGuid ConvertFromTextYsonString<TGuid>(TStringBuf str)
- {
- try {
- return TGuid::FromString(ParseStringFromTextYsonString(str));
- } catch (const std::exception& ex) {
- THROW_ERROR_EXCEPTION("Error parsing \"guid\" value from YSON") << ex;
- }
- }
- ////////////////////////////////////////////////////////////////////////////////
- } // namespace NYT::NDetail
|