writer.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. #include "writer.h"
  2. #include "detail.h"
  3. #include "format.h"
  4. #include "parser.h"
  5. #include "varint.h"
  6. #include "zigzag.h"
  7. #include <util/string/cast.h>
  8. #include <cmath>
  9. namespace NYson {
  10. ////////////////////////////////////////////////////////////////////////////////
  11. // Copied from <util/string/escape.cpp>
  12. namespace {
  13. inline char HexDigit(char value) {
  14. Y_ASSERT(value < 16);
  15. if (value < 10)
  16. return '0' + value;
  17. else
  18. return 'A' + value - 10;
  19. }
  20. inline char OctDigit(char value) {
  21. Y_ASSERT(value < 8);
  22. return '0' + value;
  23. }
  24. inline bool IsPrintable(char c) {
  25. return c >= 32 && c <= 126;
  26. }
  27. inline bool IsHexDigit(char c) {
  28. return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
  29. }
  30. inline bool IsOctDigit(char c) {
  31. return c >= '0' && c <= '7';
  32. }
  33. const size_t ESCAPE_C_BUFFER_SIZE = 4;
  34. inline size_t EscapeC(unsigned char c, char next, char r[ESCAPE_C_BUFFER_SIZE]) {
  35. // (1) Printable characters go as-is, except backslash and double quote.
  36. // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible).
  37. // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal.
  38. if (c == '\"') {
  39. r[0] = '\\';
  40. r[1] = '\"';
  41. return 2;
  42. } else if (c == '\\') {
  43. r[0] = '\\';
  44. r[1] = '\\';
  45. return 2;
  46. } else if (IsPrintable(c)) {
  47. r[0] = c;
  48. return 1;
  49. } else if (c == '\r') {
  50. r[0] = '\\';
  51. r[1] = 'r';
  52. return 2;
  53. } else if (c == '\n') {
  54. r[0] = '\\';
  55. r[1] = 'n';
  56. return 2;
  57. } else if (c == '\t') {
  58. r[0] = '\\';
  59. r[1] = 't';
  60. return 2;
  61. } else if (c < 8 && !IsOctDigit(next)) {
  62. r[0] = '\\';
  63. r[1] = OctDigit(c);
  64. return 2;
  65. } else if (!IsHexDigit(next)) {
  66. r[0] = '\\';
  67. r[1] = 'x';
  68. r[2] = HexDigit((c & 0xF0) >> 4);
  69. r[3] = HexDigit((c & 0x0F) >> 0);
  70. return 4;
  71. } else {
  72. r[0] = '\\';
  73. r[1] = OctDigit((c & 0700) >> 6);
  74. r[2] = OctDigit((c & 0070) >> 3);
  75. r[3] = OctDigit((c & 0007) >> 0);
  76. return 4;
  77. }
  78. }
  79. void EscapeC(const char* str, size_t len, IOutputStream& output) {
  80. char buffer[ESCAPE_C_BUFFER_SIZE];
  81. size_t i, j;
  82. for (i = 0, j = 0; i < len; ++i) {
  83. size_t rlen = EscapeC(str[i], (i + 1 < len ? str[i + 1] : 0), buffer);
  84. if (rlen > 1) {
  85. output.Write(str + j, i - j);
  86. j = i + 1;
  87. output.Write(buffer, rlen);
  88. }
  89. }
  90. if (j > 0) {
  91. output.Write(str + j, len - j);
  92. } else {
  93. output.Write(str, len);
  94. }
  95. }
  96. TString FloatToStringWithNanInf(double value) {
  97. if (std::isfinite(value)) {
  98. return ::ToString(value);
  99. }
  100. static const TStringBuf nanLiteral = "%nan";
  101. static const TStringBuf infLiteral = "%inf";
  102. static const TStringBuf negativeInfLiteral = "%-inf";
  103. TStringBuf str;
  104. if (std::isnan(value)) {
  105. str = nanLiteral;
  106. } else if (value > 0) {
  107. str = infLiteral;
  108. } else {
  109. str = negativeInfLiteral;
  110. }
  111. return TString(str.data(), str.size());
  112. }
  113. }
  114. ////////////////////////////////////////////////////////////////////////////////
  115. TYsonWriter::TYsonWriter(
  116. IOutputStream* stream,
  117. EYsonFormat format,
  118. EYsonType type,
  119. bool enableRaw)
  120. : Stream(stream)
  121. , Format(format)
  122. , Type(type)
  123. , EnableRaw(enableRaw)
  124. , Depth(0)
  125. , BeforeFirstItem(true)
  126. {
  127. Y_ASSERT(stream);
  128. }
  129. void TYsonWriter::WriteIndent() {
  130. for (int i = 0; i < IndentSize * Depth; ++i) {
  131. Stream->Write(' ');
  132. }
  133. }
  134. bool TYsonWriter::IsTopLevelFragmentContext() const {
  135. return Depth == 0 && (Type == ::NYson::EYsonType::ListFragment || Type == ::NYson::EYsonType::MapFragment);
  136. }
  137. void TYsonWriter::EndNode() {
  138. if (IsTopLevelFragmentContext()) {
  139. ETokenType separatorToken =
  140. Type == ::NYson::EYsonType::ListFragment
  141. ? ListItemSeparatorToken
  142. : KeyedItemSeparatorToken;
  143. Stream->Write(TokenTypeToChar(separatorToken));
  144. if (Format == EYsonFormat::Text || Format == EYsonFormat::Pretty) {
  145. Stream->Write('\n');
  146. }
  147. }
  148. }
  149. void TYsonWriter::BeginCollection(ETokenType beginToken) {
  150. Stream->Write(TokenTypeToChar(beginToken));
  151. ++Depth;
  152. BeforeFirstItem = true;
  153. }
  154. void TYsonWriter::CollectionItem(ETokenType separatorToken) {
  155. if (!IsTopLevelFragmentContext()) {
  156. if (!BeforeFirstItem) {
  157. Stream->Write(TokenTypeToChar(separatorToken));
  158. }
  159. if (Format == EYsonFormat::Pretty) {
  160. Stream->Write('\n');
  161. WriteIndent();
  162. }
  163. }
  164. BeforeFirstItem = false;
  165. }
  166. void TYsonWriter::EndCollection(ETokenType endToken) {
  167. --Depth;
  168. if (Format == EYsonFormat::Pretty && !BeforeFirstItem) {
  169. Stream->Write('\n');
  170. WriteIndent();
  171. }
  172. Stream->Write(TokenTypeToChar(endToken));
  173. BeforeFirstItem = false;
  174. }
  175. void TYsonWriter::WriteStringScalar(const TStringBuf& value) {
  176. if (Format == EYsonFormat::Binary) {
  177. Stream->Write(NDetail::StringMarker);
  178. WriteVarInt32(Stream, static_cast<i32>(value.length()));
  179. Stream->Write(value.begin(), value.length());
  180. } else {
  181. Stream->Write('"');
  182. EscapeC(value.data(), value.length(), *Stream);
  183. Stream->Write('"');
  184. }
  185. }
  186. void TYsonWriter::OnStringScalar(TStringBuf value) {
  187. WriteStringScalar(value);
  188. EndNode();
  189. }
  190. void TYsonWriter::OnInt64Scalar(i64 value) {
  191. if (Format == EYsonFormat::Binary) {
  192. Stream->Write(NDetail::Int64Marker);
  193. WriteVarInt64(Stream, value);
  194. } else {
  195. Stream->Write(::ToString(value));
  196. }
  197. EndNode();
  198. }
  199. void TYsonWriter::OnUint64Scalar(ui64 value) {
  200. if (Format == EYsonFormat::Binary) {
  201. Stream->Write(NDetail::Uint64Marker);
  202. WriteVarUInt64(Stream, value);
  203. } else {
  204. Stream->Write(::ToString(value));
  205. Stream->Write("u");
  206. }
  207. EndNode();
  208. }
  209. void TYsonWriter::OnDoubleScalar(double value) {
  210. if (Format == EYsonFormat::Binary) {
  211. Stream->Write(NDetail::DoubleMarker);
  212. Stream->Write(&value, sizeof(double));
  213. } else {
  214. auto str = FloatToStringWithNanInf(value);
  215. Stream->Write(str);
  216. if (str.find('.') == TString::npos && str.find('e') == TString::npos && std::isfinite(value)) {
  217. Stream->Write(".");
  218. }
  219. }
  220. EndNode();
  221. }
  222. void TYsonWriter::OnBooleanScalar(bool value) {
  223. if (Format == EYsonFormat::Binary) {
  224. Stream->Write(value ? NDetail::TrueMarker : NDetail::FalseMarker);
  225. } else {
  226. Stream->Write(value ? "%true" : "%false");
  227. }
  228. EndNode();
  229. }
  230. void TYsonWriter::OnEntity() {
  231. Stream->Write(TokenTypeToChar(EntityToken));
  232. EndNode();
  233. }
  234. void TYsonWriter::OnBeginList() {
  235. BeginCollection(BeginListToken);
  236. }
  237. void TYsonWriter::OnListItem() {
  238. CollectionItem(ListItemSeparatorToken);
  239. }
  240. void TYsonWriter::OnEndList() {
  241. EndCollection(EndListToken);
  242. EndNode();
  243. }
  244. void TYsonWriter::OnBeginMap() {
  245. BeginCollection(BeginMapToken);
  246. }
  247. void TYsonWriter::OnKeyedItem(TStringBuf key) {
  248. CollectionItem(KeyedItemSeparatorToken);
  249. WriteStringScalar(key);
  250. if (Format == NYson::EYsonFormat::Pretty) {
  251. Stream->Write(' ');
  252. }
  253. Stream->Write(TokenTypeToChar(KeyValueSeparatorToken));
  254. if (Format == NYson::EYsonFormat::Pretty) {
  255. Stream->Write(' ');
  256. }
  257. BeforeFirstItem = false;
  258. }
  259. void TYsonWriter::OnEndMap() {
  260. EndCollection(EndMapToken);
  261. EndNode();
  262. }
  263. void TYsonWriter::OnBeginAttributes() {
  264. BeginCollection(BeginAttributesToken);
  265. }
  266. void TYsonWriter::OnEndAttributes() {
  267. EndCollection(EndAttributesToken);
  268. if (Format == NYson::EYsonFormat::Pretty) {
  269. Stream->Write(' ');
  270. }
  271. }
  272. void TYsonWriter::OnRaw(TStringBuf yson, EYsonType type) {
  273. if (EnableRaw) {
  274. Stream->Write(yson);
  275. BeforeFirstItem = false;
  276. } else {
  277. TYsonConsumerBase::OnRaw(yson, type);
  278. }
  279. }
  280. TYsonWriter::TState TYsonWriter::State() const {
  281. TState state;
  282. state.Depth = Depth;
  283. state.BeforeFirstItem = BeforeFirstItem;
  284. return state;
  285. }
  286. void TYsonWriter::Reset(const TState& state) {
  287. Depth = state.Depth;
  288. BeforeFirstItem = state.BeforeFirstItem;
  289. }
  290. ////////////////////////////////////////////////////////////////////////////////
  291. void ReformatYsonStream(
  292. IInputStream* input,
  293. IOutputStream* output,
  294. EYsonFormat format,
  295. EYsonType type) {
  296. TYsonWriter writer(output, format, type);
  297. TYsonParser parser(&writer, input, type);
  298. parser.Parse();
  299. }
  300. ////////////////////////////////////////////////////////////////////////////////
  301. } // namespace NYson