JsonIO.hh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * https://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef avro_json_JsonIO_hh__
  19. #define avro_json_JsonIO_hh__
  20. #include <boost/lexical_cast.hpp>
  21. #include <boost/math/special_functions/fpclassify.hpp>
  22. #include <boost/utility.hpp>
  23. #include <locale>
  24. #include <sstream>
  25. #include <stack>
  26. #include <string>
  27. #include "Config.hh"
  28. #include "Stream.hh"
  29. namespace avro {
  30. namespace json {
  31. inline char toHex(unsigned int n) {
  32. return (n < 10) ? (n + '0') : (n + 'a' - 10);
  33. }
  34. class AVRO_DECL JsonParser : boost::noncopyable {
  35. public:
  36. enum class Token {
  37. Null,
  38. Bool,
  39. Long,
  40. Double,
  41. String,
  42. ArrayStart,
  43. ArrayEnd,
  44. ObjectStart,
  45. ObjectEnd
  46. };
  47. size_t line() const { return line_; }
  48. private:
  49. enum State {
  50. stValue, // Expect a data type
  51. stArray0, // Expect a data type or ']'
  52. stArrayN, // Expect a ',' or ']'
  53. stObject0, // Expect a string or a '}'
  54. stObjectN, // Expect a ',' or '}'
  55. stKey // Expect a ':'
  56. };
  57. std::stack<State> stateStack;
  58. State curState;
  59. bool hasNext;
  60. char nextChar;
  61. bool peeked;
  62. StreamReader in_;
  63. Token curToken;
  64. bool bv;
  65. int64_t lv;
  66. double dv;
  67. std::string sv;
  68. size_t line_;
  69. Token doAdvance();
  70. Token tryLiteral(const char exp[], size_t n, Token tk);
  71. Token tryNumber(char ch);
  72. Token tryString();
  73. static Exception unexpected(unsigned char ch);
  74. char next();
  75. static std::string decodeString(const std::string &s, bool binary);
  76. public:
  77. JsonParser() : curState(stValue), hasNext(false), nextChar(0), peeked(false),
  78. curToken(Token::Null), bv(false), lv(0), dv(0), line_(1) {}
  79. void init(InputStream &is) {
  80. // Clear by swapping with an empty stack
  81. std::stack<State>().swap(stateStack);
  82. curState = stValue;
  83. hasNext = false;
  84. peeked = false;
  85. line_ = 1;
  86. in_.reset(is);
  87. }
  88. Token advance() {
  89. if (!peeked) {
  90. curToken = doAdvance();
  91. } else {
  92. peeked = false;
  93. }
  94. return curToken;
  95. }
  96. Token peek() {
  97. if (!peeked) {
  98. curToken = doAdvance();
  99. peeked = true;
  100. }
  101. return curToken;
  102. }
  103. void expectToken(Token tk);
  104. bool boolValue() const {
  105. return bv;
  106. }
  107. Token cur() const {
  108. return curToken;
  109. }
  110. double doubleValue() const {
  111. return dv;
  112. }
  113. int64_t longValue() const {
  114. return lv;
  115. }
  116. const std::string &rawString() const {
  117. return sv;
  118. }
  119. std::string stringValue() const {
  120. return decodeString(sv, false);
  121. }
  122. std::string bytesValue() const {
  123. return decodeString(sv, true);
  124. }
  125. void drain() {
  126. if (!stateStack.empty() || peeked) {
  127. throw Exception("Invalid state for draining");
  128. }
  129. in_.drain(hasNext);
  130. hasNext = false;
  131. }
  132. /**
  133. * Return UTF-8 encoded string value.
  134. */
  135. static std::string toStringValue(const std::string &sv) {
  136. return decodeString(sv, false);
  137. }
  138. /**
  139. * Return byte-encoded string value. It is an error if the input
  140. * JSON string contained unicode characters more than "\u00ff'.
  141. */
  142. static std::string toBytesValue(const std::string &sv) {
  143. return decodeString(sv, true);
  144. }
  145. static const char *const tokenNames[];
  146. static const char *toString(Token tk) {
  147. return tokenNames[static_cast<size_t>(tk)];
  148. }
  149. };
  150. class AVRO_DECL JsonNullFormatter {
  151. public:
  152. explicit JsonNullFormatter(StreamWriter &) {}
  153. void handleObjectStart() {}
  154. void handleObjectEnd() {}
  155. void handleValueEnd() {}
  156. void handleColon() {}
  157. };
  158. class AVRO_DECL JsonPrettyFormatter {
  159. StreamWriter &out_;
  160. size_t level_;
  161. std::vector<uint8_t> indent_;
  162. static const int CHARS_PER_LEVEL = 2;
  163. void printIndent() {
  164. size_t charsToIndent = level_ * CHARS_PER_LEVEL;
  165. if (indent_.size() < charsToIndent) {
  166. indent_.resize(charsToIndent * 2, ' ');
  167. }
  168. out_.writeBytes(indent_.data(), charsToIndent);
  169. }
  170. public:
  171. explicit JsonPrettyFormatter(StreamWriter &out) : out_(out), level_(0), indent_(10, ' ') {}
  172. void handleObjectStart() {
  173. out_.write('\n');
  174. ++level_;
  175. printIndent();
  176. }
  177. void handleObjectEnd() {
  178. out_.write('\n');
  179. --level_;
  180. printIndent();
  181. }
  182. void handleValueEnd() {
  183. out_.write('\n');
  184. printIndent();
  185. }
  186. void handleColon() {
  187. out_.write(' ');
  188. }
  189. };
  190. template<class F>
  191. class AVRO_DECL JsonGenerator {
  192. StreamWriter out_;
  193. F formatter_;
  194. enum State {
  195. stStart,
  196. stArray0,
  197. stArrayN,
  198. stMap0,
  199. stMapN,
  200. stKey,
  201. };
  202. std::stack<State> stateStack;
  203. State top;
  204. void write(const char *b, const char *p) {
  205. if (b != p) {
  206. out_.writeBytes(reinterpret_cast<const uint8_t *>(b), p - b);
  207. }
  208. }
  209. void escape(char c, const char *b, const char *p) {
  210. write(b, p);
  211. out_.write('\\');
  212. out_.write(c);
  213. }
  214. void escapeCtl(char c) {
  215. escapeUnicode(static_cast<uint8_t>(c));
  216. }
  217. void writeHex(char c) {
  218. out_.write(toHex((static_cast<unsigned char>(c)) / 16));
  219. out_.write(toHex((static_cast<unsigned char>(c)) % 16));
  220. }
  221. void escapeUnicode(uint32_t c) {
  222. out_.write('\\');
  223. out_.write('u');
  224. writeHex((c >> 8) & 0xff);
  225. writeHex(c & 0xff);
  226. }
  227. void doEncodeString(const char *b, size_t len, bool binary) {
  228. const char *e = b + len;
  229. out_.write('"');
  230. for (const char *p = b; p != e; p++) {
  231. if ((*p & 0x80) != 0) {
  232. write(b, p);
  233. if (binary) {
  234. escapeCtl(*p);
  235. } else if ((*p & 0x40) == 0) {
  236. throw Exception("Invalid UTF-8 sequence");
  237. } else {
  238. int more = 1;
  239. uint32_t value;
  240. if ((*p & 0x20) != 0) {
  241. more++;
  242. if ((*p & 0x10) != 0) {
  243. more++;
  244. if ((*p & 0x08) != 0) {
  245. throw Exception("Invalid UTF-8 sequence");
  246. } else {
  247. value = *p & 0x07;
  248. }
  249. } else {
  250. value = *p & 0x0f;
  251. }
  252. } else {
  253. value = *p & 0x1f;
  254. }
  255. for (int i = 0; i < more; ++i) {
  256. if (++p == e || (*p & 0xc0) != 0x80) {
  257. throw Exception("Invalid UTF-8 sequence");
  258. }
  259. value <<= 6;
  260. value |= *p & 0x3f;
  261. }
  262. escapeUnicode(value);
  263. }
  264. } else {
  265. switch (*p) {
  266. case '\\':
  267. case '"':
  268. case '/':
  269. escape(*p, b, p);
  270. break;
  271. case '\b':
  272. escape('b', b, p);
  273. break;
  274. case '\f':
  275. escape('f', b, p);
  276. break;
  277. case '\n':
  278. escape('n', b, p);
  279. break;
  280. case '\r':
  281. escape('r', b, p);
  282. break;
  283. case '\t':
  284. escape('t', b, p);
  285. break;
  286. default:
  287. if (std::iscntrl(*p, std::locale::classic())) {
  288. write(b, p);
  289. escapeCtl(*p);
  290. break;
  291. } else {
  292. continue;
  293. }
  294. }
  295. }
  296. b = p + 1;
  297. }
  298. write(b, e);
  299. out_.write('"');
  300. }
  301. void sep() {
  302. if (top == stArrayN) {
  303. out_.write(',');
  304. formatter_.handleValueEnd();
  305. } else if (top == stArray0) {
  306. top = stArrayN;
  307. }
  308. }
  309. void sep2() {
  310. if (top == stKey) {
  311. top = stMapN;
  312. }
  313. }
  314. public:
  315. JsonGenerator() : formatter_(out_), top(stStart) {}
  316. void init(OutputStream &os) {
  317. out_.reset(os);
  318. }
  319. void flush() {
  320. out_.flush();
  321. }
  322. int64_t byteCount() const {
  323. return out_.byteCount();
  324. }
  325. void encodeNull() {
  326. sep();
  327. out_.writeBytes(reinterpret_cast<const uint8_t *>("null"), 4);
  328. sep2();
  329. }
  330. void encodeBool(bool b) {
  331. sep();
  332. if (b) {
  333. out_.writeBytes(reinterpret_cast<const uint8_t *>("true"), 4);
  334. } else {
  335. out_.writeBytes(reinterpret_cast<const uint8_t *>("false"), 5);
  336. }
  337. sep2();
  338. }
  339. template<typename T>
  340. void encodeNumber(T t) {
  341. sep();
  342. std::ostringstream oss;
  343. oss << boost::lexical_cast<std::string>(t);
  344. const std::string s = oss.str();
  345. out_.writeBytes(reinterpret_cast<const uint8_t *>(s.data()), s.size());
  346. sep2();
  347. }
  348. void encodeNumber(double t) {
  349. sep();
  350. std::ostringstream oss;
  351. if (boost::math::isfinite(t)) {
  352. oss << boost::lexical_cast<std::string>(t);
  353. } else if (boost::math::isnan(t)) {
  354. oss << "NaN";
  355. } else if (t == std::numeric_limits<double>::infinity()) {
  356. oss << "Infinity";
  357. } else {
  358. oss << "-Infinity";
  359. }
  360. const std::string s = oss.str();
  361. out_.writeBytes(reinterpret_cast<const uint8_t *>(s.data()), s.size());
  362. sep2();
  363. }
  364. void encodeString(const std::string &s) {
  365. if (top == stMap0) {
  366. top = stKey;
  367. } else if (top == stMapN) {
  368. out_.write(',');
  369. formatter_.handleValueEnd();
  370. top = stKey;
  371. } else if (top == stKey) {
  372. top = stMapN;
  373. } else {
  374. sep();
  375. }
  376. doEncodeString(s.c_str(), s.size(), false);
  377. if (top == stKey) {
  378. out_.write(':');
  379. formatter_.handleColon();
  380. }
  381. }
  382. void encodeBinary(const uint8_t *bytes, size_t len) {
  383. sep();
  384. doEncodeString(reinterpret_cast<const char *>(bytes), len, true);
  385. sep2();
  386. }
  387. void arrayStart() {
  388. sep();
  389. stateStack.push(top);
  390. top = stArray0;
  391. out_.write('[');
  392. formatter_.handleObjectStart();
  393. }
  394. void arrayEnd() {
  395. top = stateStack.top();
  396. stateStack.pop();
  397. formatter_.handleObjectEnd();
  398. out_.write(']');
  399. sep2();
  400. }
  401. void objectStart() {
  402. sep();
  403. stateStack.push(top);
  404. top = stMap0;
  405. out_.write('{');
  406. formatter_.handleObjectStart();
  407. }
  408. void objectEnd() {
  409. top = stateStack.top();
  410. stateStack.pop();
  411. formatter_.handleObjectEnd();
  412. out_.write('}');
  413. sep2();
  414. }
  415. };
  416. } // namespace json
  417. } // namespace avro
  418. #endif