#include "text_yson.h" #include "error.h" #include #include #include #include #include #include #include namespace NYT::NDetail { //////////////////////////////////////////////////////////////////////////////// size_t FloatToStringWithNanInf(double value, char* buf, size_t size) { if (std::isfinite(value)) { return FloatToString(value, buf, size); } static const TStringBuf nanLiteral = "%nan"; static const TStringBuf infLiteral = "%inf"; static const TStringBuf negativeInfLiteral = "%-inf"; TStringBuf str; if (std::isnan(value)) { str = nanLiteral; } else if (std::isinf(value) && value > 0) { str = infLiteral; } else { str = negativeInfLiteral; } YT_VERIFY(str.size() + 1 <= size); ::memcpy(buf, str.data(), str.size() + 1); return str.size(); } //////////////////////////////////////////////////////////////////////////////// // NB(arkady-e1ppa): Copied from library/cpp/yt/yson_string/format.h // to avoid direct dependency on it. //! Indicates an entity. constexpr char EntitySymbol = '#'; //! Marks the beginning of a binary string literal. constexpr char StringMarker = '\x01'; //! Marks the beginning of a binary i64 literal. constexpr char Int64Marker = '\x02'; //! Marks the beginning of a binary double literal. constexpr char DoubleMarker = '\x03'; //! Marks |false| boolean value. constexpr char FalseMarker = '\x04'; //! Marks |true| boolean value. constexpr char TrueMarker = '\x05'; //! Marks the beginning of a binary ui64 literal. constexpr char Uint64Marker = '\x06'; //////////////////////////////////////////////////////////////////////////////// bool IsBinaryYson(TStringBuf str) { return std::ssize(str) != 0 && (str.front() == EntitySymbol || str.front() == StringMarker || str.front() == Int64Marker || str.front() == DoubleMarker || str.front() == FalseMarker || str.front() == TrueMarker || str.front() == Uint64Marker); } //////////////////////////////////////////////////////////////////////////////// template <> std::string ConvertToTextYsonString(const i8& value) { return ConvertToTextYsonString(static_cast(value)); } template <> std::string ConvertToTextYsonString(const i32& value) { return ConvertToTextYsonString(static_cast(value)); } template <> std::string ConvertToTextYsonString(const i64& value) { return std::string{::ToString(value)}; } template <> std::string ConvertToTextYsonString(const ui8& value) { return ConvertToTextYsonString(static_cast(value)); } template <> std::string ConvertToTextYsonString(const ui32& value) { return ConvertToTextYsonString(static_cast(value)); } template <> std::string ConvertToTextYsonString(const ui64& value) { return std::string{::ToString(value) + 'u'}; } template <> std::string ConvertToTextYsonString(const TStringBuf& value) { return std::string(NYT::Format("\"%v\"", ::EscapeC(value))); } template <> std::string ConvertToTextYsonString(const float& value) { return ConvertToTextYsonString(static_cast(value)); } template <> std::string ConvertToTextYsonString(const double& value) { char buf[256]; auto str = TStringBuf(buf, NDetail::FloatToStringWithNanInf(value, buf, sizeof(buf))); auto ret = NYT::Format( "%v%v", str, MakeFormatterWrapper([&] (TStringBuilderBase* builder) { if (str.find('.') == TString::npos && str.find('e') == TString::npos && std::isfinite(value)) { builder->AppendChar('.'); } })); return std::string(std::move(ret)); } template <> std::string ConvertToTextYsonString(const bool& value) { return value ? std::string(TStringBuf("%true")) : std::string(TStringBuf("%false")); } template <> std::string ConvertToTextYsonString(const TInstant& value) { return ConvertToTextYsonString(TStringBuf(value.ToString())); } template <> std::string ConvertToTextYsonString(const TDuration& value) { // ConvertTo does unchecked cast to i64 :(. return ConvertToTextYsonString(static_cast(value.MilliSeconds())); } template <> std::string ConvertToTextYsonString(const TGuid& value) { return ConvertToTextYsonString(TStringBuf(NYT::ToString(value))); } //////////////////////////////////////////////////////////////////////////////// namespace { template TSomeInt ReadTextUint(TStringBuf strBuf) { // Drop 'u' return ::FromString(TStringBuf{strBuf.data(), strBuf.length() - 1}); } template TSomeInt ReadTextInt(TStringBuf strBuf) { return ::FromString(TStringBuf{strBuf.data(), strBuf.length()}); } bool IsNumeric(TStringBuf strBuf) { bool isNumeric = true; bool isNegative = false; for (int i = 0; i < std::ssize(strBuf); ++i) { char c = strBuf[i]; if (!('0' <= c && c <= '9')) { if (i == 0 && c == '-') { isNegative = true; continue; } if (i == std::ssize(strBuf) - 1 && c == 'u' && !isNegative) { continue; } isNumeric = false; break; } } return isNumeric; } //////////////////////////////////////////////////////////////////////////////// template TSomeInt ParseSomeIntFromTextYsonString(TStringBuf strBuf) { if (std::ssize(strBuf) == 0 || !IsNumeric(strBuf)) { THROW_ERROR_EXCEPTION( "Unexpected %v\n" "Value is not numeric", strBuf); } if (strBuf.back() == 'u') { // Drop 'u' return ReadTextUint(strBuf); } else { return ReadTextInt(strBuf); } } //////////////////////////////////////////////////////////////////////////////// TString DoParseStringFromTextYson(TStringBuf strBuf) { // Remove quotation marks. return ::UnescapeC(TStringBuf{strBuf.data() + 1, strBuf.length() - 2}); } TString ParseStringFromTextYsonString(TStringBuf strBuf) { if (std::ssize(strBuf) < 2 || strBuf.front() != '\"' || strBuf.back() != '\"') { THROW_ERROR_EXCEPTION( "Unexpected %v\n" "Text yson string must begin and end with \\\"", strBuf); } return DoParseStringFromTextYson(strBuf); } //////////////////////////////////////////////////////////////////////////////// double ParseDoubleFromTextYsonString(TStringBuf strBuf) { if (std::ssize(strBuf) < 2) { THROW_ERROR_EXCEPTION( "Incorrect remaining string length: expected at least 2, got %v", std::ssize(strBuf)); } // Check special values first. // %nan // %inf, %+inf, %-inf if (strBuf[0] == '%') { switch (strBuf[1]) { case '+': case 'i': return std::numeric_limits::infinity(); case '-': return -std::numeric_limits::infinity(); case 'n': return std::numeric_limits::quiet_NaN(); default: THROW_ERROR_EXCEPTION( "Incorrect %%-literal %v", strBuf); } } return ::FromString(strBuf); } } // namespace //////////////////////////////////////////////////////////////////////////////// #define PARSE_INT(type, underlyingType) \ template <> \ type ConvertFromTextYsonString(TStringBuf str) \ { \ try { \ return CheckedIntegralCast(ParseSomeIntFromTextYsonString(str)); \ } catch (const std::exception& ex) { \ THROW_ERROR_EXCEPTION("Error parsing \"" #type "\" value from YSON") << ex; \ } \ } PARSE_INT(i8, i64) PARSE_INT(i16, i64) PARSE_INT(i32, i64) PARSE_INT(i64, i64) PARSE_INT(ui8, ui64) PARSE_INT(ui16, ui64) PARSE_INT(ui32, ui64) PARSE_INT(ui64, ui64) #undef PARSE //////////////////////////////////////////////////////////////////////////////// template <> TString ConvertFromTextYsonString(TStringBuf str) { try { return ParseStringFromTextYsonString(str); } catch (const std::exception& ex) { THROW_ERROR_EXCEPTION("Error parsing \"string\" value from YSON") << ex; } } template <> std::string ConvertFromTextYsonString(TStringBuf str) { return std::string(ConvertFromTextYsonString(str)); } template <> float ConvertFromTextYsonString(TStringBuf str) { try { return static_cast(ParseDoubleFromTextYsonString(str)); } catch (const std::exception& ex) { THROW_ERROR_EXCEPTION("Error parsing \"float\" value from YSON") << ex; } } template <> double ConvertFromTextYsonString(TStringBuf str) { try { return ParseDoubleFromTextYsonString(str); } catch (const std::exception& ex) { THROW_ERROR_EXCEPTION("Error parsing \"double\" value from YSON") << ex; } } template <> bool ConvertFromTextYsonString(TStringBuf strBuf) { try { if (std::ssize(strBuf) == 0) { THROW_ERROR_EXCEPTION("Empty string"); } char ch = strBuf.front(); if (ch == '%') { if (strBuf != "%true" && strBuf != "%false") { THROW_ERROR_EXCEPTION( "Expected %%true or %%false but found %v", strBuf); } return strBuf == "%true"; } if (ch == '\"') { return ParseBool(DoParseStringFromTextYson(strBuf)); } // NB(arkady-e1ppa): This check is linear in size(strBuf) // And thus is tried as the last resort. if (IsNumeric(strBuf)) { auto checkValue = [&] (const auto& functor) { auto value = functor(strBuf); if (value != 0 && value != 1) { THROW_ERROR_EXCEPTION( "Expected 0 or 1 but found %v", value); } return static_cast(value); }; if (strBuf.back() == 'u') { return checkValue(&ReadTextUint); } else { return checkValue(&ReadTextInt); } } THROW_ERROR_EXCEPTION( "Unexpected %v\n" "No known conversion to \"boolean\" value", strBuf); } catch (const std::exception& ex) { THROW_ERROR_EXCEPTION("Error parsing \"boolean\" value from YSON") << ex; } } template <> TInstant ConvertFromTextYsonString(TStringBuf str) { try { return TInstant::ParseIso8601(ParseStringFromTextYsonString(str)); } catch (const std::exception& ex) { THROW_ERROR_EXCEPTION("Error parsing \"instant\" value from YSON") << ex; } } template <> TDuration ConvertFromTextYsonString(TStringBuf str) { try { return TDuration::MilliSeconds(ParseSomeIntFromTextYsonString(str)); } catch (const std::exception& ex) { THROW_ERROR_EXCEPTION("Error parsing \"duration\" value from YSON") << ex; } } template <> TGuid ConvertFromTextYsonString(TStringBuf str) { try { return TGuid::FromString(ParseStringFromTextYsonString(str)); } catch (const std::exception& ex) { THROW_ERROR_EXCEPTION("Error parsing \"guid\" value from YSON") << ex; } } //////////////////////////////////////////////////////////////////////////////// } // namespace NYT::NDetail