text_yson.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. #include "text_yson.h"
  2. #include "error.h"
  3. #include <library/cpp/yt/assert/assert.h>
  4. #include <library/cpp/yt/string/format.h>
  5. #include <library/cpp/yt/coding/varint.h>
  6. #include <library/cpp/yt/misc/cast.h>
  7. #include <array>
  8. #include <util/string/escape.h>
  9. #include <util/stream/mem.h>
  10. namespace NYT::NDetail {
  11. ////////////////////////////////////////////////////////////////////////////////
  12. size_t FloatToStringWithNanInf(double value, char* buf, size_t size)
  13. {
  14. if (std::isfinite(value)) {
  15. return FloatToString(value, buf, size);
  16. }
  17. static const TStringBuf nanLiteral = "%nan";
  18. static const TStringBuf infLiteral = "%inf";
  19. static const TStringBuf negativeInfLiteral = "%-inf";
  20. TStringBuf str;
  21. if (std::isnan(value)) {
  22. str = nanLiteral;
  23. } else if (std::isinf(value) && value > 0) {
  24. str = infLiteral;
  25. } else {
  26. str = negativeInfLiteral;
  27. }
  28. YT_VERIFY(str.size() + 1 <= size);
  29. ::memcpy(buf, str.data(), str.size() + 1);
  30. return str.size();
  31. }
  32. ////////////////////////////////////////////////////////////////////////////////
  33. // NB(arkady-e1ppa): Copied from library/cpp/yt/yson_string/format.h
  34. // to avoid direct dependency on it.
  35. //! Indicates an entity.
  36. constexpr char EntitySymbol = '#';
  37. //! Marks the beginning of a binary string literal.
  38. constexpr char StringMarker = '\x01';
  39. //! Marks the beginning of a binary i64 literal.
  40. constexpr char Int64Marker = '\x02';
  41. //! Marks the beginning of a binary double literal.
  42. constexpr char DoubleMarker = '\x03';
  43. //! Marks |false| boolean value.
  44. constexpr char FalseMarker = '\x04';
  45. //! Marks |true| boolean value.
  46. constexpr char TrueMarker = '\x05';
  47. //! Marks the beginning of a binary ui64 literal.
  48. constexpr char Uint64Marker = '\x06';
  49. ////////////////////////////////////////////////////////////////////////////////
  50. bool IsBinaryYson(TStringBuf str)
  51. {
  52. return
  53. std::ssize(str) != 0 &&
  54. (str.front() == EntitySymbol ||
  55. str.front() == StringMarker ||
  56. str.front() == Int64Marker ||
  57. str.front() == DoubleMarker ||
  58. str.front() == FalseMarker ||
  59. str.front() == TrueMarker ||
  60. str.front() == Uint64Marker);
  61. }
  62. ////////////////////////////////////////////////////////////////////////////////
  63. template <>
  64. std::string ConvertToTextYsonString<i8>(const i8& value)
  65. {
  66. return ConvertToTextYsonString(static_cast<i64>(value));
  67. }
  68. template <>
  69. std::string ConvertToTextYsonString<i32>(const i32& value)
  70. {
  71. return ConvertToTextYsonString(static_cast<i64>(value));
  72. }
  73. template <>
  74. std::string ConvertToTextYsonString<i64>(const i64& value)
  75. {
  76. return std::string{::ToString(value)};
  77. }
  78. template <>
  79. std::string ConvertToTextYsonString<ui8>(const ui8& value)
  80. {
  81. return ConvertToTextYsonString(static_cast<ui64>(value));
  82. }
  83. template <>
  84. std::string ConvertToTextYsonString<ui32>(const ui32& value)
  85. {
  86. return ConvertToTextYsonString(static_cast<ui64>(value));
  87. }
  88. template <>
  89. std::string ConvertToTextYsonString<ui64>(const ui64& value)
  90. {
  91. return std::string{::ToString(value) + 'u'};
  92. }
  93. template <>
  94. std::string ConvertToTextYsonString<TStringBuf>(const TStringBuf& value)
  95. {
  96. return std::string(NYT::Format("\"%v\"", ::EscapeC(value)));
  97. }
  98. template <>
  99. std::string ConvertToTextYsonString<float>(const float& value)
  100. {
  101. return ConvertToTextYsonString(static_cast<double>(value));
  102. }
  103. template <>
  104. std::string ConvertToTextYsonString<double>(const double& value)
  105. {
  106. char buf[256];
  107. auto str = TStringBuf(buf, NDetail::FloatToStringWithNanInf(value, buf, sizeof(buf)));
  108. auto ret = NYT::Format(
  109. "%v%v",
  110. str,
  111. MakeFormatterWrapper([&] (TStringBuilderBase* builder) {
  112. if (str.find('.') == TString::npos && str.find('e') == TString::npos && std::isfinite(value)) {
  113. builder->AppendChar('.');
  114. }
  115. }));
  116. return std::string(std::move(ret));
  117. }
  118. template <>
  119. std::string ConvertToTextYsonString<bool>(const bool& value)
  120. {
  121. return value
  122. ? std::string(TStringBuf("%true"))
  123. : std::string(TStringBuf("%false"));
  124. }
  125. template <>
  126. std::string ConvertToTextYsonString<TInstant>(const TInstant& value)
  127. {
  128. return ConvertToTextYsonString(TStringBuf(value.ToString()));
  129. }
  130. template <>
  131. std::string ConvertToTextYsonString<TDuration>(const TDuration& value)
  132. {
  133. // ConvertTo does unchecked cast to i64 :(.
  134. return ConvertToTextYsonString(static_cast<i64>(value.MilliSeconds()));
  135. }
  136. template <>
  137. std::string ConvertToTextYsonString<TGuid>(const TGuid& value)
  138. {
  139. return ConvertToTextYsonString(TStringBuf(NYT::ToString(value)));
  140. }
  141. ////////////////////////////////////////////////////////////////////////////////
  142. namespace {
  143. template <class TSomeInt>
  144. TSomeInt ReadTextUint(TStringBuf strBuf)
  145. {
  146. // Drop 'u'
  147. return ::FromString<TSomeInt>(TStringBuf{strBuf.data(), strBuf.length() - 1});
  148. }
  149. template <class TSomeInt>
  150. TSomeInt ReadTextInt(TStringBuf strBuf)
  151. {
  152. return ::FromString<TSomeInt>(TStringBuf{strBuf.data(), strBuf.length()});
  153. }
  154. bool IsNumeric(TStringBuf strBuf)
  155. {
  156. bool isNumeric = true;
  157. bool isNegative = false;
  158. for (int i = 0; i < std::ssize(strBuf); ++i) {
  159. char c = strBuf[i];
  160. if (!('0' <= c && c <= '9')) {
  161. if (i == 0 && c == '-') {
  162. isNegative = true;
  163. continue;
  164. }
  165. if (i == std::ssize(strBuf) - 1 && c == 'u' && !isNegative) {
  166. continue;
  167. }
  168. isNumeric = false;
  169. break;
  170. }
  171. }
  172. return isNumeric;
  173. }
  174. ////////////////////////////////////////////////////////////////////////////////
  175. template <class TSomeInt>
  176. TSomeInt ParseSomeIntFromTextYsonString(TStringBuf strBuf)
  177. {
  178. if (std::ssize(strBuf) == 0 || !IsNumeric(strBuf)) {
  179. THROW_ERROR_EXCEPTION(
  180. "Unexpected %v\n"
  181. "Value is not numeric",
  182. strBuf);
  183. }
  184. if (strBuf.back() == 'u') {
  185. // Drop 'u'
  186. return ReadTextUint<TSomeInt>(strBuf);
  187. } else {
  188. return ReadTextInt<TSomeInt>(strBuf);
  189. }
  190. }
  191. ////////////////////////////////////////////////////////////////////////////////
  192. TString DoParseStringFromTextYson(TStringBuf strBuf)
  193. {
  194. // Remove quotation marks.
  195. return ::UnescapeC(TStringBuf{strBuf.data() + 1, strBuf.length() - 2});
  196. }
  197. TString ParseStringFromTextYsonString(TStringBuf strBuf)
  198. {
  199. if (std::ssize(strBuf) < 2 || strBuf.front() != '\"' || strBuf.back() != '\"') {
  200. THROW_ERROR_EXCEPTION(
  201. "Unexpected %v\n"
  202. "Text yson string must begin and end with \\\"",
  203. strBuf);
  204. }
  205. return DoParseStringFromTextYson(strBuf);
  206. }
  207. ////////////////////////////////////////////////////////////////////////////////
  208. double ParseDoubleFromTextYsonString(TStringBuf strBuf)
  209. {
  210. if (std::ssize(strBuf) < 2) {
  211. THROW_ERROR_EXCEPTION(
  212. "Incorrect remaining string length: expected at least 2, got %v",
  213. std::ssize(strBuf));
  214. }
  215. // Check special values first.
  216. // %nan
  217. // %inf, %+inf, %-inf
  218. if (strBuf[0] == '%') {
  219. switch (strBuf[1]) {
  220. case '+':
  221. case 'i':
  222. return std::numeric_limits<double>::infinity();
  223. case '-':
  224. return -std::numeric_limits<double>::infinity();
  225. case 'n':
  226. return std::numeric_limits<double>::quiet_NaN();
  227. default:
  228. THROW_ERROR_EXCEPTION(
  229. "Incorrect %%-literal %v",
  230. strBuf);
  231. }
  232. }
  233. return ::FromString<double>(strBuf);
  234. }
  235. } // namespace
  236. ////////////////////////////////////////////////////////////////////////////////
  237. #define PARSE_INT(type, underlyingType) \
  238. template <> \
  239. type ConvertFromTextYsonString<type>(TStringBuf str) \
  240. { \
  241. try { \
  242. return CheckedIntegralCast<type>(ParseSomeIntFromTextYsonString<underlyingType>(str)); \
  243. } catch (const std::exception& ex) { \
  244. THROW_ERROR_EXCEPTION("Error parsing \"" #type "\" value from YSON") << ex; \
  245. } \
  246. }
  247. PARSE_INT(i8, i64)
  248. PARSE_INT(i16, i64)
  249. PARSE_INT(i32, i64)
  250. PARSE_INT(i64, i64)
  251. PARSE_INT(ui8, ui64)
  252. PARSE_INT(ui16, ui64)
  253. PARSE_INT(ui32, ui64)
  254. PARSE_INT(ui64, ui64)
  255. #undef PARSE
  256. ////////////////////////////////////////////////////////////////////////////////
  257. template <>
  258. TString ConvertFromTextYsonString<TString>(TStringBuf str)
  259. {
  260. try {
  261. return ParseStringFromTextYsonString(str);
  262. } catch (const std::exception& ex) {
  263. THROW_ERROR_EXCEPTION("Error parsing \"string\" value from YSON") << ex;
  264. }
  265. }
  266. template <>
  267. std::string ConvertFromTextYsonString<std::string>(TStringBuf str)
  268. {
  269. return std::string(ConvertFromTextYsonString<TString>(str));
  270. }
  271. template <>
  272. float ConvertFromTextYsonString<float>(TStringBuf str)
  273. {
  274. try {
  275. return static_cast<float>(ParseDoubleFromTextYsonString(str));
  276. } catch (const std::exception& ex) {
  277. THROW_ERROR_EXCEPTION("Error parsing \"float\" value from YSON") << ex;
  278. }
  279. }
  280. template <>
  281. double ConvertFromTextYsonString<double>(TStringBuf str)
  282. {
  283. try {
  284. return ParseDoubleFromTextYsonString(str);
  285. } catch (const std::exception& ex) {
  286. THROW_ERROR_EXCEPTION("Error parsing \"double\" value from YSON") << ex;
  287. }
  288. }
  289. template <>
  290. bool ConvertFromTextYsonString<bool>(TStringBuf strBuf)
  291. {
  292. try {
  293. if (std::ssize(strBuf) == 0) {
  294. THROW_ERROR_EXCEPTION("Empty string");
  295. }
  296. char ch = strBuf.front();
  297. if (ch == '%') {
  298. if (strBuf != "%true" && strBuf != "%false") {
  299. THROW_ERROR_EXCEPTION(
  300. "Expected %%true or %%false but found %v",
  301. strBuf);
  302. }
  303. return strBuf == "%true";
  304. }
  305. if (ch == '\"') {
  306. return ParseBool(DoParseStringFromTextYson(strBuf));
  307. }
  308. // NB(arkady-e1ppa): This check is linear in size(strBuf)
  309. // And thus is tried as the last resort.
  310. if (IsNumeric(strBuf)) {
  311. auto checkValue = [&] (const auto& functor) {
  312. auto value = functor(strBuf);
  313. if (value != 0 && value != 1) {
  314. THROW_ERROR_EXCEPTION(
  315. "Expected 0 or 1 but found %v",
  316. value);
  317. }
  318. return static_cast<bool>(value);
  319. };
  320. if (strBuf.back() == 'u') {
  321. return checkValue(&ReadTextUint<ui64>);
  322. } else {
  323. return checkValue(&ReadTextInt<i64>);
  324. }
  325. }
  326. THROW_ERROR_EXCEPTION(
  327. "Unexpected %v\n"
  328. "No known conversion to \"boolean\" value",
  329. strBuf);
  330. } catch (const std::exception& ex) {
  331. THROW_ERROR_EXCEPTION("Error parsing \"boolean\" value from YSON") << ex;
  332. }
  333. }
  334. template <>
  335. TInstant ConvertFromTextYsonString<TInstant>(TStringBuf str)
  336. {
  337. try {
  338. return TInstant::ParseIso8601(ParseStringFromTextYsonString(str));
  339. } catch (const std::exception& ex) {
  340. THROW_ERROR_EXCEPTION("Error parsing \"instant\" value from YSON") << ex;
  341. }
  342. }
  343. template <>
  344. TDuration ConvertFromTextYsonString<TDuration>(TStringBuf str)
  345. {
  346. try {
  347. return TDuration::MilliSeconds(ParseSomeIntFromTextYsonString<i64>(str));
  348. } catch (const std::exception& ex) {
  349. THROW_ERROR_EXCEPTION("Error parsing \"duration\" value from YSON") << ex;
  350. }
  351. }
  352. template <>
  353. TGuid ConvertFromTextYsonString<TGuid>(TStringBuf str)
  354. {
  355. try {
  356. return TGuid::FromString(ParseStringFromTextYsonString(str));
  357. } catch (const std::exception& ex) {
  358. THROW_ERROR_EXCEPTION("Error parsing \"guid\" value from YSON") << ex;
  359. }
  360. }
  361. ////////////////////////////////////////////////////////////////////////////////
  362. } // namespace NYT::NDetail