parser.rl6 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. #include <library/cpp/json/fast_sax/unescape.h>
  2. #include <library/cpp/json/fast_sax/parser.h>
  3. #include <util/string/cast.h>
  4. #include <util/generic/buffer.h>
  5. #include <util/generic/strbuf.h>
  6. #include <util/generic/ymath.h>
  7. namespace NJson {
  8. enum EStoredStr {
  9. SS_NONE = 0, SS_NOCOPY, SS_MUSTCOPY
  10. };
  11. struct TParserCtx {
  12. TJsonCallbacks& Hndl;
  13. TBuffer Buffer;
  14. TStringBuf String;
  15. EStoredStr Stored = SS_NONE;
  16. bool ExpectValue = true;
  17. const char* p0 = nullptr;
  18. const char* p = nullptr;
  19. const char* pe = nullptr;
  20. const char* eof = nullptr;
  21. const char* ts = nullptr;
  22. const char* te = nullptr;
  23. int cs = 0;
  24. int act = 0;
  25. TParserCtx(TJsonCallbacks& h, TStringBuf data)
  26. : Hndl(h)
  27. , p0(data.data())
  28. , p(data.data())
  29. , pe(data.end())
  30. , eof(data.end())
  31. {}
  32. static inline bool GoodPtrs(const char* b, const char* e) {
  33. return b && e && b <= e;
  34. }
  35. bool OnError(TStringBuf reason = TStringBuf(""), bool end = false) const {
  36. size_t off = 0;
  37. TStringBuf token;
  38. if (GoodPtrs(p0, ts)) {
  39. off = ts - p0;
  40. } else if (end && GoodPtrs(p0, pe)) {
  41. off = pe - p0;
  42. }
  43. if (GoodPtrs(ts, te)) {
  44. token = TStringBuf(ts, te);
  45. }
  46. if (!token) {
  47. Hndl.OnError(off, reason);
  48. } else {
  49. Hndl.OnError(off, TString::Join(reason, " at token: '", token, "'"));
  50. }
  51. return false;
  52. }
  53. bool OnVal() {
  54. if (Y_UNLIKELY(!ExpectValue)) {
  55. return false;
  56. }
  57. ExpectValue = false;
  58. return true;
  59. }
  60. bool OnNull() {
  61. return Y_LIKELY(OnVal())
  62. && Hndl.OnNull();
  63. }
  64. bool OnTrue() {
  65. return Y_LIKELY(OnVal())
  66. && Hndl.OnBoolean(true);
  67. }
  68. bool OnFalse() {
  69. return Y_LIKELY(OnVal())
  70. && Hndl.OnBoolean(false);
  71. }
  72. bool OnPInt() {
  73. unsigned long long res = 0;
  74. return Y_LIKELY(OnVal())
  75. && TryFromString<unsigned long long>(TStringBuf(ts, te), res)
  76. && Hndl.OnUInteger(res);
  77. }
  78. bool OnNInt() {
  79. long long res = 0;
  80. return Y_LIKELY(OnVal())
  81. && TryFromString<long long>(TStringBuf(ts, te), res)
  82. && Hndl.OnInteger(res);
  83. }
  84. bool OnFlt() {
  85. double res = 0;
  86. return Y_LIKELY(OnVal())
  87. && TryFromString<double>(TStringBuf(ts, te), res)
  88. && IsFinite(res)
  89. && Hndl.OnDouble(res);
  90. }
  91. bool OnMapOpen() {
  92. bool res = Y_LIKELY(OnVal())
  93. && Hndl.OnOpenMap();
  94. ExpectValue = true;
  95. return res;
  96. }
  97. bool OnArrOpen() {
  98. bool res = Y_LIKELY(OnVal())
  99. && Hndl.OnOpenArray();
  100. ExpectValue = true;
  101. return res;
  102. }
  103. bool OnString(TStringBuf s, EStoredStr t) {
  104. if (Y_LIKELY(OnVal())) {
  105. String = s;
  106. Stored = t;
  107. return true;
  108. } else {
  109. return false;
  110. }
  111. }
  112. bool OnStrU() {
  113. return OnString(TStringBuf(ts, te), SS_NOCOPY);
  114. }
  115. bool OnStrQ() {
  116. return OnString(TStringBuf(ts + 1, te - 1), SS_NOCOPY);
  117. }
  118. bool OnStrE() {
  119. Buffer.Clear();
  120. Buffer.Reserve(2 * (te - ts));
  121. return OnString(UnescapeJsonUnicode(TStringBuf(ts + 1, te - ts - 2), Buffer.data()), SS_MUSTCOPY);
  122. }
  123. bool OnMapClose() {
  124. ExpectValue = false;
  125. return Y_LIKELY(OnAfterVal())
  126. && Hndl.OnCloseMap();
  127. }
  128. bool OnArrClose() {
  129. ExpectValue = false;
  130. return Y_LIKELY(OnAfterVal())
  131. && Hndl.OnCloseArray();
  132. }
  133. bool OnColon() {
  134. if (ExpectValue) {
  135. return false;
  136. }
  137. ExpectValue = true;
  138. const auto stored = Stored;
  139. Stored = SS_NONE;
  140. switch (stored) {
  141. default:
  142. return false;
  143. case SS_NOCOPY:
  144. return Hndl.OnMapKeyNoCopy(String);
  145. case SS_MUSTCOPY:
  146. return Hndl.OnMapKey(String);
  147. }
  148. }
  149. bool OnAfterVal() {
  150. const auto stored = Stored;
  151. Stored = SS_NONE;
  152. switch (stored) {
  153. default:
  154. return true;
  155. case SS_NOCOPY:
  156. return Hndl.OnStringNoCopy(String);
  157. case SS_MUSTCOPY:
  158. return Hndl.OnString(String);
  159. }
  160. }
  161. bool OnComma() {
  162. if (Y_UNLIKELY(ExpectValue)) {
  163. return false;
  164. }
  165. ExpectValue = true;
  166. return OnAfterVal();
  167. }
  168. bool Parse();
  169. };
  170. #if 0
  171. %%{
  172. machine fastjson;
  173. alphtype char;
  174. action OnNull { if (Y_UNLIKELY(!OnNull())) goto TOKEN_ERROR; }
  175. action OnTrue { if (Y_UNLIKELY(!OnTrue())) goto TOKEN_ERROR; }
  176. action OnFalse { if (Y_UNLIKELY(!OnFalse())) goto TOKEN_ERROR; }
  177. action OnPInt { if (Y_UNLIKELY(!OnPInt())) goto TOKEN_ERROR; }
  178. action OnNInt { if (Y_UNLIKELY(!OnNInt())) goto TOKEN_ERROR; }
  179. action OnFlt { if (Y_UNLIKELY(!OnFlt())) goto TOKEN_ERROR; }
  180. action OnStrU { if (Y_UNLIKELY(!OnStrU())) goto TOKEN_ERROR; }
  181. action OnStrQ { if (Y_UNLIKELY(!OnStrQ())) goto TOKEN_ERROR; }
  182. action OnStrE { if (Y_UNLIKELY(!OnStrE())) goto TOKEN_ERROR; }
  183. action OnDictO { if (Y_UNLIKELY(!OnMapOpen())) goto TOKEN_ERROR; }
  184. action OnDictC { if (Y_UNLIKELY(!OnMapClose())) goto TOKEN_ERROR; }
  185. action OnArrO { if (Y_UNLIKELY(!OnArrOpen())) goto TOKEN_ERROR; }
  186. action OnArrC { if (Y_UNLIKELY(!OnArrClose())) goto TOKEN_ERROR; }
  187. action OnComma { if (Y_UNLIKELY(!OnComma())) goto TOKEN_ERROR; }
  188. action OnColon { if (Y_UNLIKELY(!OnColon())) goto TOKEN_ERROR; }
  189. action OnError { goto TOKEN_ERROR; }
  190. comment1 = "/*" (any* -- "*/") "*/";
  191. pint = [0-9]+;
  192. nint = '-'[0-9]+;
  193. flt = '-'?[0-9.][0-9.eE+\-]+;
  194. uchar0 = [a-zA-Z_@$] | (0x80 .. 0xFF);
  195. uchar = uchar0 | digit | [.\-];
  196. qchar = [^'\\]; #';
  197. dchar = [^"\\]; #";
  198. echar = "\\" any;
  199. qechar = qchar | echar;
  200. dechar = dchar | echar;
  201. strq = "'" qchar* "'";
  202. strd = '"' dchar* '"';
  203. strqe = "'" qechar* "'";
  204. strde = '"' dechar* '"';
  205. strU = uchar0 uchar*;
  206. strQ = strq | strd;
  207. strE = strqe | strde;
  208. ws = (0x00 .. 0x20) | 0x7F;
  209. sp = ws+;
  210. main := |*
  211. 'null' => OnNull;
  212. 'true' => OnTrue;
  213. 'false' => OnFalse;
  214. pint => OnPInt;
  215. nint => OnNInt;
  216. flt => OnFlt;
  217. strU => OnStrU;
  218. strQ => OnStrQ;
  219. strE => OnStrE;
  220. ',' => OnComma;
  221. ':' => OnColon;
  222. '{' => OnDictO;
  223. '}' => OnDictC;
  224. '[' => OnArrO;
  225. ']' => OnArrC;
  226. sp;
  227. comment1;
  228. (flt | pint | nint) (any - (ws | ',' | ':' | '{' | '}' | '[' | ']')) => OnError;
  229. any => OnError;
  230. *|;
  231. }%%
  232. #endif
  233. bool TParserCtx::Parse() {
  234. try {
  235. %%{
  236. write data noerror nofinal;
  237. write init;
  238. write exec;
  239. }%%
  240. Y_UNUSED(fastjson_en_main);
  241. } catch (const TFromStringException& e) {
  242. return OnError(e.what());
  243. }
  244. return OnAfterVal() && Hndl.OnEnd() || OnError("invalid or truncated", true);
  245. TOKEN_ERROR:
  246. return OnError("invalid syntax");
  247. }
  248. bool ReadJsonFast(TStringBuf data, TJsonCallbacks* h) {
  249. return TParserCtx(*h, data).Parse();
  250. }
  251. }