#include #if defined(_freebsd_) && !defined(__LONG_LONG_SUPPORTED) #define __LONG_LONG_SUPPORTED #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using double_conversion::DoubleToStringConverter; using double_conversion::StringBuilder; using double_conversion::StringToDoubleConverter; /* * ------------------------------ formatters ------------------------------ */ namespace { // clang-format off constexpr int LetterToIntMap[] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 20, 20, 20, 20, 20, 20, 20, 10, 11, 12, 13, 14, 15, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 10, 11, 12, 13, 14, 15, }; // clang-format on template std::make_signed_t NegatePositiveSigned(T value) noexcept { return value > 0 ? (-std::make_signed_t(value - 1) - 1) : 0; } template struct TFltModifiers; template Y_NO_INLINE size_t FormatInt(T value, TChar* buf, size_t len) { return TIntStringBuf::Convert(value, buf, len); } template inline size_t FormatFlt(T t, char* buf, size_t len) { const int ret = snprintf(buf, len, TFltModifiers::ModifierWrite, t); Y_ENSURE(ret >= 0 && (size_t)ret <= len, TStringBuf("cannot format float")); return (size_t)ret; } enum EParseStatus { PS_OK = 0, PS_EMPTY_STRING, PS_PLUS_STRING, PS_MINUS_STRING, PS_BAD_SYMBOL, PS_OVERFLOW, }; constexpr ui8 SAFE_LENS[4][17] = { {0, 0, 7, 5, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1}, {0, 0, 15, 10, 7, 6, 6, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3}, {0, 0, 31, 20, 15, 13, 12, 11, 10, 10, 9, 9, 8, 8, 8, 8, 7}, {0, 0, 63, 40, 31, 27, 24, 22, 21, 20, 19, 18, 17, 17, 16, 16, 15}, }; inline constexpr ui8 ConstLog2(ui8 x) noexcept { return x == 1 ? 0 : 1 + ConstLog2(x / 2); } template inline std::enable_if_t<(BASE > 10), bool> CharToDigit(TChar c, T* digit) noexcept { unsigned uc = c; if (uc >= Y_ARRAY_SIZE(LetterToIntMap)) { return false; } *digit = LetterToIntMap[uc]; return *digit < BASE; } template inline std::enable_if_t<(BASE <= 10), bool> CharToDigit(TChar c, T* digit) noexcept { return (c >= '0') && ((*digit = (c - '0')) < BASE); } template struct TBasicIntParser { static_assert(1 < base && base < 17, "Expect 1 < base && base < 17."); static_assert(std::is_unsigned::value, "TBasicIntParser can only handle unsigned integers."); enum : unsigned { BASE_POW_2 = base * base, }; static inline EParseStatus Parse(const TChar** ppos, const TChar* end, T max, T* target) noexcept { Y_ASSERT(*ppos != end); /* This check should be somewhere up the stack. */ const size_t maxSafeLen = SAFE_LENS[ConstLog2(sizeof(T))][base]; // can parse without overflow if (size_t(end - *ppos) <= maxSafeLen) { T result; if (ParseFast(*ppos, end, &result) && result <= max) { *target = result; return PS_OK; } } return ParseSlow(ppos, end, max, target); } static inline bool ParseFast(const TChar* pos, const TChar* end, T* target) noexcept { T result = T(); T d1; T d2; // we have end > pos auto beforeEnd = end - 1; while (pos < beforeEnd && CharToDigit(*pos, &d1) && CharToDigit(*(pos + 1), &d2)) { result = result * BASE_POW_2 + d1 * base + d2; pos += 2; } while (pos != end && CharToDigit(*pos, &d1)) { result = result * base + d1; ++pos; } *target = result; return pos == end; } static inline EParseStatus ParseSlow(const TChar** ppos, const TChar* end, T max, T* target) noexcept { T result = T(); T preMulMax = max / base; const TChar* pos = *ppos; while (pos != end) { T digit; if (!CharToDigit(*pos, &digit)) { *ppos = pos; return PS_BAD_SYMBOL; } if (result > preMulMax) { return PS_OVERFLOW; } result *= base; if (result > max - digit) { return PS_OVERFLOW; } result += digit; pos++; } *target = result; return PS_OK; } }; template struct TBounds { T PositiveMax; T NegativeMax; }; template struct TIntParser { static_assert(1 < base && base < 17, "Expect 1 < base && base < 17."); static_assert(std::is_integral::value, "T must be an integral type."); enum { IsSigned = std::is_signed::value }; using TUnsigned = std::make_unsigned_t; static inline EParseStatus Parse(const TChar** ppos, const TChar* end, const TBounds& bounds, T* target) { const TChar* pos = *ppos; if (pos == end) { return PS_EMPTY_STRING; } bool negative = false; TUnsigned max; if (*pos == '+') { pos++; max = bounds.PositiveMax; if (pos == end) { return PS_PLUS_STRING; } } else if (IsSigned && *pos == '-') { pos++; max = bounds.NegativeMax; negative = true; if (pos == end) { return PS_MINUS_STRING; } } else { max = bounds.PositiveMax; } TUnsigned result; EParseStatus error = TBasicIntParser::Parse(&pos, end, max, &result); if (error != PS_OK) { *ppos = pos; return error; } if (IsSigned) { *target = negative ? NegatePositiveSigned(result) : static_cast(result); } else { *target = result; } return PS_OK; } }; template [[noreturn]] static Y_NO_INLINE void ThrowParseError(EParseStatus status, const TChar* data, size_t len, const TChar* pos) { Y_ASSERT(status != PS_OK); typedef TBasicString TStringType; switch (status) { case PS_EMPTY_STRING: ythrow TFromStringException() << TStringBuf("Cannot parse empty string as number. "); case PS_PLUS_STRING: ythrow TFromStringException() << TStringBuf("Cannot parse string \"+\" as number. "); case PS_MINUS_STRING: ythrow TFromStringException() << TStringBuf("Cannot parse string \"-\" as number. "); case PS_BAD_SYMBOL: ythrow TFromStringException() << TStringBuf("Unexpected symbol \"") << EscapeC(*pos) << TStringBuf("\" at pos ") << (pos - data) << TStringBuf(" in string ") << TStringType(data, len).Quote() << TStringBuf(". "); case PS_OVERFLOW: ythrow TFromStringException() << TStringBuf("Integer overflow in string ") << TStringType(data, len).Quote() << TStringBuf(". "); default: ythrow yexception() << TStringBuf("Unknown error code in string converter. "); } } template Y_NO_INLINE T ParseInt(const TChar* data, size_t len, const TBounds& bounds) { T result; const TChar* pos = data; EParseStatus status = TIntParser::Parse(&pos, pos + len, bounds, &result); if (status == PS_OK) { return result; } else { ThrowParseError(status, data, len, pos); } } template Y_NO_INLINE bool TryParseInt(const TChar* data, size_t len, const TBounds& bounds, T* result) { return TIntParser::Parse(&data, data + len, bounds, result) == PS_OK; } template inline T ParseFlt(const char* data, size_t len) { /* * TODO */ if (len > 256) { len = 256; } char* c = (char*)alloca(len + 1); memcpy(c, data, len); c[len] = 0; T ret; char ec; // try to read a value and an extra character in order to catch cases when // the string start with a valid float but is followed by unexpected characters if (sscanf(c, TFltModifiers::ModifierReadAndChar, &ret, &ec) == 1) { return ret; } ythrow TFromStringException() << TStringBuf("cannot parse float(") << TStringBuf(data, len) << TStringBuf(")"); } #define DEF_FLT_MOD(type, modifierWrite, modifierRead) \ template <> \ struct TFltModifiers { \ static const char* const ModifierWrite; \ static const char* const ModifierReadAndChar; \ }; \ \ const char* const TFltModifiers::ModifierWrite = modifierWrite; \ const char* const TFltModifiers::ModifierReadAndChar = modifierRead "%c"; DEF_FLT_MOD(long double, "%.10Lg", "%Lg") #undef DEF_FLT_MOD /* The following constants are initialized in terms of constants to make * sure they go into binary as actual values and there is no associated * initialization code. * */ constexpr TBounds bSBounds = {static_cast(SCHAR_MAX), static_cast(UCHAR_MAX - SCHAR_MAX)}; constexpr TBounds bUBounds = {static_cast(UCHAR_MAX), 0}; constexpr TBounds sSBounds = {static_cast(SHRT_MAX), static_cast(USHRT_MAX - SHRT_MAX)}; constexpr TBounds sUBounds = {static_cast(USHRT_MAX), 0}; constexpr TBounds iSBounds = {static_cast(INT_MAX), static_cast(UINT_MAX - INT_MAX)}; constexpr TBounds iUBounds = {static_cast(UINT_MAX), 0}; constexpr TBounds lSBounds = {static_cast(LONG_MAX), static_cast(ULONG_MAX - LONG_MAX)}; constexpr TBounds lUBounds = {static_cast(ULONG_MAX), 0}; constexpr TBounds llSBounds = {static_cast(LLONG_MAX), static_cast(ULLONG_MAX - LLONG_MAX)}; constexpr TBounds llUBounds = {static_cast(ULLONG_MAX), 0}; } #define DEF_INT_SPEC_II(TYPE, ITYPE, BASE) \ template <> \ size_t IntToString(TYPE value, char* buf, size_t len) { \ return FormatInt(value, buf, len); \ } #define DEF_INT_SPEC_I(TYPE, ITYPE) \ template <> \ size_t ToStringImpl(TYPE value, char* buf, size_t len) { \ return FormatInt(value, buf, len); \ } \ DEF_INT_SPEC_II(TYPE, ITYPE, 2) \ DEF_INT_SPEC_II(TYPE, ITYPE, 8) \ DEF_INT_SPEC_II(TYPE, ITYPE, 10) \ DEF_INT_SPEC_II(TYPE, ITYPE, 16) #define DEF_INT_SPEC(TYPE) \ DEF_INT_SPEC_I(signed TYPE, i64) \ DEF_INT_SPEC_I(unsigned TYPE, ui64) DEF_INT_SPEC(char) DEF_INT_SPEC(short) DEF_INT_SPEC(int) DEF_INT_SPEC(long) DEF_INT_SPEC(long long) #ifdef __cpp_char8_t template <> size_t ToStringImpl(char8_t value, char* buf, size_t len) { return FormatInt(value, buf, len); } #endif using TCharIType = std::conditional_t::value, i64, ui64>; using TWCharIType = std::conditional_t::value, i64, ui64>; DEF_INT_SPEC_I(char, TCharIType) DEF_INT_SPEC_I(wchar_t, TWCharIType) DEF_INT_SPEC_I(wchar16, ui64) // wchar16 is always unsigned DEF_INT_SPEC_I(wchar32, ui64) // wchar32 is always unsigned #undef DEF_INT_SPEC #undef DEF_INT_SPEC_I #undef DEF_INT_SPEC_II #define DEF_FLT_SPEC(type) \ template <> \ size_t ToStringImpl(type t, char* buf, size_t len) { \ return FormatFlt(t, buf, len); \ } DEF_FLT_SPEC(long double) #undef DEF_FLT_SPEC template <> size_t ToStringImpl(bool t, char* buf, size_t len) { Y_ENSURE(len, TStringBuf("zero length")); *buf = t ? '1' : '0'; return 1; } /* * ------------------------------ parsers ------------------------------ */ template <> bool TryFromStringImpl(const char* data, size_t len, bool& result) { if (len == 1) { if (data[0] == '0') { result = false; return true; } else if (data[0] == '1') { result = true; return true; } } TStringBuf buf(data, len); if (IsTrue(buf)) { result = true; return true; } else if (IsFalse(buf)) { result = false; return true; } return false; } template <> bool FromStringImpl(const char* data, size_t len) { bool result; if (!TryFromStringImpl(data, len, result)) { ythrow TFromStringException() << TStringBuf("Cannot parse bool(") << TStringBuf(data, len) << TStringBuf("). "); } return result; } template <> TString FromStringImpl(const char* data, size_t len) { return TString(data, len); } template <> TStringBuf FromStringImpl(const char* data, size_t len) { return TStringBuf(data, len); } template <> std::string FromStringImpl(const char* data, size_t len) { return std::string(data, len); } #ifndef USE_STL_SYSTEM // FIXME thegeorg@: remove #ifndef upon raising minimal macOS version to 10.15 in https://st.yandex-team.ru/DTCC-836 template <> std::filesystem::path FromStringImpl(const char* data, size_t len) { return std::filesystem::path(std::string(data, len)); } #endif template <> TUtf16String FromStringImpl(const wchar16* data, size_t len) { return TUtf16String(data, len); } template <> TWtringBuf FromStringImpl(const wchar16* data, size_t len) { return TWtringBuf(data, len); } // Try-versions template <> bool TryFromStringImpl(const char* data, size_t len, TStringBuf& result) { result = {data, len}; return true; } template <> bool TryFromStringImpl(const char* data, size_t len, TString& result) { result = TString(data, len); return true; } template <> bool TryFromStringImpl(const char* data, size_t len, std::string& result) { result.assign(data, len); return true; } template <> bool TryFromStringImpl(const wchar16* data, size_t len, TWtringBuf& result) { result = {data, len}; return true; } template <> bool TryFromStringImpl(const wchar16* data, size_t len, TUtf16String& result) { result = TUtf16String(data, len); return true; } #define DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, BASE) \ template <> \ TYPE IntFromString(const CHAR* data, size_t len) { \ return ParseInt(data, len, BOUNDS); \ } \ template <> \ bool TryIntFromString(const CHAR* data, size_t len, TYPE& result) { \ ITYPE tmp; \ bool status = TryParseInt(data, len, BOUNDS, &tmp); \ if (status) { \ result = tmp; \ } \ return status; \ } #define DEF_INT_SPEC_II(CHAR, TYPE, ITYPE, BOUNDS) \ template <> \ TYPE FromStringImpl(const CHAR* data, size_t len) { \ return ParseInt(data, len, BOUNDS); \ } \ template <> \ bool TryFromStringImpl(const CHAR* data, size_t len, TYPE& result) { \ ITYPE tmp; \ bool status = TryParseInt(data, len, BOUNDS, &tmp); \ if (status) { \ result = tmp; \ } \ return status; \ } \ DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 2) \ DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 8) \ DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 10) \ DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 16) #define DEF_INT_SPEC_I(TYPE, ITYPE, BOUNDS) \ DEF_INT_SPEC_II(char, TYPE, ITYPE, BOUNDS) \ DEF_INT_SPEC_II(wchar16, TYPE, ITYPE, BOUNDS) #define DEF_INT_SPEC(TYPE, ID) \ DEF_INT_SPEC_I(signed TYPE, i64, ID##SBounds) \ DEF_INT_SPEC_I(unsigned TYPE, ui64, ID##UBounds) #define DEF_INT_SPEC_FIXED_WIDTH(TYPE, ID) \ DEF_INT_SPEC_I(TYPE, i64, ID##SBounds) \ DEF_INT_SPEC_I(u##TYPE, ui64, ID##UBounds) DEF_INT_SPEC_FIXED_WIDTH(i8, b) DEF_INT_SPEC(short, s) DEF_INT_SPEC(int, i) DEF_INT_SPEC(long, l) DEF_INT_SPEC(long long, ll) #undef DEF_INT_SPEC_FIXED_WIDTH #undef DEF_INT_SPEC #undef DEF_INT_SPEC_I #undef DEF_INT_SPEC_II #undef DEF_INT_SPEC_III #define DEF_FLT_SPEC(type) \ template <> \ type FromStringImpl(const char* data, size_t len) { \ return ParseFlt(data, len); \ } DEF_FLT_SPEC(long double) #undef DEF_FLT_SPEC // Using StrToD for float and double because it is faster than sscanf. // Exception-free, specialized for float types template <> bool TryFromStringImpl(const char* data, size_t len, double& result) { if (!len) { return false; } char* se = nullptr; double d = StrToD(data, data + len, &se); if (se != data + len) { return false; } result = d; return true; } template <> bool TryFromStringImpl(const char* data, size_t len, float& result) { double d; if (TryFromStringImpl(data, len, d)) { result = static_cast(d); return true; } return false; } template <> bool TryFromStringImpl(const char* data, size_t len, long double& result) { double d; if (TryFromStringImpl(data, len, d)) { result = static_cast(d); return true; } return false; } // Exception-throwing, specialized for float types template <> double FromStringImpl(const char* data, size_t len) { double d = 0.0; if (!TryFromStringImpl(data, len, d)) { ythrow TFromStringException() << TStringBuf("cannot parse float(") << TStringBuf(data, len) << TStringBuf(")"); } return d; } template <> float FromStringImpl(const char* data, size_t len) { return static_cast(FromStringImpl(data, len)); } double StrToD(const char* b, const char* e, char** se) { struct TCvt: public StringToDoubleConverter { inline TCvt() : StringToDoubleConverter(ALLOW_TRAILING_JUNK | ALLOW_HEX | ALLOW_LEADING_SPACES, 0.0, NAN, nullptr, nullptr) { } }; int out = 0; const auto res = SingletonWithPriority()->StringToDouble(b, e - b, &out); if (se) { *se = (char*)(b + out); } return res; } double StrToD(const char* b, char** se) { return StrToD(b, b + strlen(b), se); } namespace { static inline DoubleToStringConverter& ToStringConverterNoPad() noexcept { struct TCvt: public DoubleToStringConverter { inline TCvt() noexcept : DoubleToStringConverter(EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan", 'e', -10, 21, 4, 0) { } }; return *SingletonWithPriority(); } struct TBuilder { alignas(StringBuilder) char Store[sizeof(StringBuilder)]; StringBuilder* SB; inline TBuilder(char* buf, size_t len) noexcept : SB(new (Store) StringBuilder(buf, len)) { } }; static inline size_t FixZeros(char* buf, size_t len) noexcept { auto end = buf + len; auto point = (char*)memchr(buf, '.', len); if (!point) { return len; } auto exp = (char*)memchr(point, 'e', end - point); if (!exp) { exp = end; } auto c = exp; c -= 1; while (point < c && *c == '0') { --c; } if (*c == '.') { --c; } memmove(c + 1, exp, end - exp); return c - buf + 1 + end - exp; } static inline size_t FixEnd(char* buf, size_t len) noexcept { if (len > 2) { auto sign = buf[len - 2]; if (sign == '-' || sign == '+') { buf[len] = buf[len - 1]; buf[len - 1] = '0'; ++len; } } buf[len] = 0; return len; } static inline size_t DoDtoa(double d, char* buf, size_t len, int prec) noexcept { TBuilder sb(buf, len); Y_ABORT_UNLESS(ToStringConverterNoPad().ToPrecision(d, prec, sb.SB), "conversion failed"); return FixEnd(buf, FixZeros(buf, sb.SB->position())); } } template <> size_t ToStringImpl(double d, char* buf, size_t len) { return DoDtoa(d, buf, len, 10); } template <> size_t ToStringImpl(float f, char* buf, size_t len) { return DoDtoa(f, buf, len, 6); } size_t FloatToString(float t, char* buf, size_t len, EFloatToStringMode mode, int ndigits) { if (mode == PREC_AUTO) { TBuilder sb(buf, len); Y_ABORT_UNLESS(ToStringConverterNoPad().ToShortestSingle(t, sb.SB), "conversion failed"); return FixEnd(buf, sb.SB->position()); } return FloatToString((double)t, buf, len, mode, ndigits); } size_t FloatToString(double t, char* buf, size_t len, EFloatToStringMode mode, int ndigits) { if (mode == PREC_NDIGITS) { auto minDigits = DoubleToStringConverter::kMinPrecisionDigits; auto maxDigits = DoubleToStringConverter::kMaxPrecisionDigits; return DoDtoa(t, buf, len, ClampVal(ndigits, minDigits, maxDigits)); } TBuilder sb(buf, len); if (mode == PREC_AUTO) { Y_ABORT_UNLESS(ToStringConverterNoPad().ToShortest(t, sb.SB), "conversion failed"); return FixEnd(buf, sb.SB->position()); } if (!ToStringConverterNoPad().ToFixed(t, ndigits, sb.SB)) { return FloatToString(t, buf, len, PREC_AUTO); } if (mode == PREC_POINT_DIGITS_STRIP_ZEROES) { return FixZeros(buf, sb.SB->position()); } return sb.SB->position(); }