JsonIO.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * https://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "JsonIO.hh"
  19. namespace avro {
  20. namespace json {
  21. using std::ostringstream;
  22. using std::string;
  23. const char *const
  24. JsonParser::tokenNames[] = {
  25. "Null",
  26. "Bool",
  27. "Integer",
  28. "Double",
  29. "String",
  30. "Array start",
  31. "Array end",
  32. "Object start",
  33. "Object end",
  34. };
  35. char JsonParser::next() {
  36. char ch = hasNext ? nextChar : ' ';
  37. while (isspace(ch)) {
  38. if (ch == '\n') {
  39. line_++;
  40. }
  41. ch = in_.read();
  42. }
  43. hasNext = false;
  44. return ch;
  45. }
  46. void JsonParser::expectToken(Token tk) {
  47. if (advance() != tk) {
  48. if (tk == Token::Double) {
  49. if (cur() == Token::String
  50. && (sv == "Infinity" || sv == "-Infinity" || sv == "NaN")) {
  51. curToken = Token::Double;
  52. dv = sv == "Infinity" ? std::numeric_limits<double>::infinity() : sv == "-Infinity" ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::quiet_NaN();
  53. return;
  54. } else if (cur() == Token::Long) {
  55. dv = double(lv);
  56. return;
  57. }
  58. }
  59. ostringstream oss;
  60. oss << "Incorrect token in the stream. Expected: "
  61. << JsonParser::toString(tk) << ", found "
  62. << JsonParser::toString(cur());
  63. throw Exception(oss.str());
  64. }
  65. }
  66. JsonParser::Token JsonParser::doAdvance() {
  67. char ch = next();
  68. if (ch == ']') {
  69. if (curState == stArray0 || curState == stArrayN) {
  70. curState = stateStack.top();
  71. stateStack.pop();
  72. return Token::ArrayEnd;
  73. } else {
  74. throw unexpected(ch);
  75. }
  76. } else if (ch == '}') {
  77. if (curState == stObject0 || curState == stObjectN) {
  78. curState = stateStack.top();
  79. stateStack.pop();
  80. return Token::ObjectEnd;
  81. } else {
  82. throw unexpected(ch);
  83. }
  84. } else if (ch == ',') {
  85. if (curState != stObjectN && curState != stArrayN) {
  86. throw unexpected(ch);
  87. }
  88. if (curState == stObjectN) {
  89. curState = stObject0;
  90. }
  91. ch = next();
  92. } else if (ch == ':') {
  93. if (curState != stKey) {
  94. throw unexpected(ch);
  95. }
  96. curState = stObjectN;
  97. ch = next();
  98. }
  99. if (curState == stObject0) {
  100. if (ch != '"') {
  101. throw unexpected(ch);
  102. }
  103. curState = stKey;
  104. } else if (curState == stArray0) {
  105. curState = stArrayN;
  106. }
  107. switch (ch) {
  108. case '[':
  109. stateStack.push(curState);
  110. curState = stArray0;
  111. return Token::ArrayStart;
  112. case '{':
  113. stateStack.push(curState);
  114. curState = stObject0;
  115. return Token::ObjectStart;
  116. case '"':
  117. return tryString();
  118. case 't':
  119. bv = true;
  120. return tryLiteral("rue", 3, Token::Bool);
  121. case 'f':
  122. bv = false;
  123. return tryLiteral("alse", 4, Token::Bool);
  124. case 'n':
  125. return tryLiteral("ull", 3, Token::Null);
  126. default:
  127. if (isdigit(ch) || ch == '-') {
  128. return tryNumber(ch);
  129. } else {
  130. throw unexpected(ch);
  131. }
  132. }
  133. }
  134. JsonParser::Token JsonParser::tryNumber(char ch) {
  135. sv.clear();
  136. sv.push_back(ch);
  137. hasNext = false;
  138. int state = (ch == '-') ? 0 : (ch == '0') ? 1 : 2;
  139. for (;;) {
  140. switch (state) {
  141. case 0:
  142. if (in_.hasMore()) {
  143. ch = in_.read();
  144. if (isdigit(ch)) {
  145. state = (ch == '0') ? 1 : 2;
  146. sv.push_back(ch);
  147. continue;
  148. }
  149. hasNext = true;
  150. }
  151. break;
  152. case 1:
  153. if (in_.hasMore()) {
  154. ch = in_.read();
  155. if (ch == '.') {
  156. state = 3;
  157. sv.push_back(ch);
  158. continue;
  159. } else if (ch == 'e' || ch == 'E') {
  160. sv.push_back(ch);
  161. state = 5;
  162. continue;
  163. }
  164. hasNext = true;
  165. }
  166. break;
  167. case 2:
  168. if (in_.hasMore()) {
  169. ch = in_.read();
  170. if (isdigit(ch)) {
  171. sv.push_back(ch);
  172. continue;
  173. } else if (ch == '.') {
  174. state = 3;
  175. sv.push_back(ch);
  176. continue;
  177. } else if (ch == 'e' || ch == 'E') {
  178. sv.push_back(ch);
  179. state = 5;
  180. continue;
  181. }
  182. hasNext = true;
  183. }
  184. break;
  185. case 3:
  186. case 6:
  187. if (in_.hasMore()) {
  188. ch = in_.read();
  189. if (isdigit(ch)) {
  190. sv.push_back(ch);
  191. state++;
  192. continue;
  193. }
  194. hasNext = true;
  195. }
  196. break;
  197. case 4:
  198. if (in_.hasMore()) {
  199. ch = in_.read();
  200. if (isdigit(ch)) {
  201. sv.push_back(ch);
  202. continue;
  203. } else if (ch == 'e' || ch == 'E') {
  204. sv.push_back(ch);
  205. state = 5;
  206. continue;
  207. }
  208. hasNext = true;
  209. }
  210. break;
  211. case 5:
  212. if (in_.hasMore()) {
  213. ch = in_.read();
  214. if (ch == '+' || ch == '-') {
  215. sv.push_back(ch);
  216. state = 6;
  217. continue;
  218. } else if (isdigit(ch)) {
  219. sv.push_back(ch);
  220. state = 7;
  221. continue;
  222. }
  223. hasNext = true;
  224. }
  225. break;
  226. case 7:
  227. if (in_.hasMore()) {
  228. ch = in_.read();
  229. if (isdigit(ch)) {
  230. sv.push_back(ch);
  231. continue;
  232. }
  233. hasNext = true;
  234. }
  235. break;
  236. default:
  237. throw Exception("Unexpected JSON parse state");
  238. }
  239. if (state == 1 || state == 2 || state == 4 || state == 7) {
  240. if (hasNext) {
  241. nextChar = ch;
  242. }
  243. std::istringstream iss(sv);
  244. if (state == 1 || state == 2) {
  245. iss >> lv;
  246. return Token::Long;
  247. } else {
  248. iss >> dv;
  249. return Token::Double;
  250. }
  251. } else {
  252. if (hasNext) {
  253. throw unexpected(ch);
  254. } else {
  255. throw Exception("Unexpected EOF");
  256. }
  257. }
  258. }
  259. }
  260. JsonParser::Token JsonParser::tryString() {
  261. sv.clear();
  262. for (;;) {
  263. char ch = in_.read();
  264. if (ch == '"') {
  265. return Token::String;
  266. } else if (ch == '\\') {
  267. ch = in_.read();
  268. switch (ch) {
  269. case '"':
  270. case '\\':
  271. case '/':
  272. case 'b':
  273. case 'f':
  274. case 'n':
  275. case 'r':
  276. case 't':
  277. sv.push_back('\\');
  278. sv.push_back(ch);
  279. break;
  280. case 'u':
  281. case 'U': {
  282. uint32_t n = 0;
  283. char e[4];
  284. in_.readBytes(reinterpret_cast<uint8_t *>(e), 4);
  285. sv.push_back('\\');
  286. sv.push_back(ch);
  287. for (char c : e) {
  288. n *= 16;
  289. if (isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
  290. sv.push_back(c);
  291. } else {
  292. throw unexpected(c);
  293. }
  294. }
  295. } break;
  296. default:
  297. throw unexpected(ch);
  298. }
  299. } else {
  300. sv.push_back(ch);
  301. }
  302. }
  303. }
  304. string JsonParser::decodeString(const string &s, bool binary) {
  305. string result;
  306. for (string::const_iterator it = s.begin(); it != s.end(); ++it) {
  307. char ch = *it;
  308. if (ch == '\\') {
  309. ch = *++it;
  310. switch (ch) {
  311. case '"':
  312. case '\\':
  313. case '/':
  314. result.push_back(ch);
  315. continue;
  316. case 'b':
  317. result.push_back('\b');
  318. continue;
  319. case 'f':
  320. result.push_back('\f');
  321. continue;
  322. case 'n':
  323. result.push_back('\n');
  324. continue;
  325. case 'r':
  326. result.push_back('\r');
  327. continue;
  328. case 't':
  329. result.push_back('\t');
  330. continue;
  331. case 'u':
  332. case 'U': {
  333. uint32_t n = 0;
  334. char e[4];
  335. for (char &i : e) {
  336. n *= 16;
  337. char c = *++it;
  338. i = c;
  339. if (isdigit(c)) {
  340. n += c - '0';
  341. } else if (c >= 'a' && c <= 'f') {
  342. n += c - 'a' + 10;
  343. } else if (c >= 'A' && c <= 'F') {
  344. n += c - 'A' + 10;
  345. }
  346. }
  347. if (binary) {
  348. if (n > 0xff) {
  349. throw Exception(boost::format(
  350. "Invalid byte for binary: %1%%2%")
  351. % ch % string(e, 4));
  352. } else {
  353. result.push_back(n);
  354. continue;
  355. }
  356. }
  357. if (n < 0x80) {
  358. result.push_back(n);
  359. } else if (n < 0x800) {
  360. result.push_back((n >> 6) | 0xc0);
  361. result.push_back((n & 0x3f) | 0x80);
  362. } else if (n < 0x10000) {
  363. result.push_back((n >> 12) | 0xe0);
  364. result.push_back(((n >> 6) & 0x3f) | 0x80);
  365. result.push_back((n & 0x3f) | 0x80);
  366. } else if (n < 110000) {
  367. result.push_back((n >> 18) | 0xf0);
  368. result.push_back(((n >> 12) & 0x3f) | 0x80);
  369. result.push_back(((n >> 6) & 0x3f) | 0x80);
  370. result.push_back((n & 0x3f) | 0x80);
  371. } else {
  372. throw Exception(boost::format(
  373. "Invalid unicode value: %1%i%2%")
  374. % ch % string(e, 4));
  375. }
  376. }
  377. continue;
  378. default:
  379. throw Exception("Unexpected JSON parse state");
  380. }
  381. } else {
  382. result.push_back(ch);
  383. }
  384. }
  385. return result;
  386. }
  387. Exception JsonParser::unexpected(unsigned char c) {
  388. std::ostringstream oss;
  389. oss << "Unexpected character in json " << toHex(c / 16) << toHex(c % 16);
  390. return Exception(oss.str());
  391. }
  392. JsonParser::Token JsonParser::tryLiteral(const char exp[], size_t n, Token tk) {
  393. char c[100];
  394. in_.readBytes(reinterpret_cast<uint8_t *>(c), n);
  395. for (size_t i = 0; i < n; ++i) {
  396. if (c[i] != exp[i]) {
  397. throw unexpected(c[i]);
  398. }
  399. }
  400. if (in_.hasMore()) {
  401. nextChar = in_.read();
  402. if (isdigit(nextChar) || isalpha(nextChar)) {
  403. throw unexpected(nextChar);
  404. }
  405. hasNext = true;
  406. }
  407. return tk;
  408. }
  409. } // namespace json
  410. } // namespace avro