/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef avro_json_JsonIO_hh__ #define avro_json_JsonIO_hh__ #include #include #include #include #include #include #include #include "Config.hh" #include "Stream.hh" namespace avro { namespace json { inline char toHex(unsigned int n) { return (n < 10) ? (n + '0') : (n + 'a' - 10); } class AVRO_DECL JsonParser : boost::noncopyable { public: enum class Token { Null, Bool, Long, Double, String, ArrayStart, ArrayEnd, ObjectStart, ObjectEnd }; size_t line() const { return line_; } private: enum State { stValue, // Expect a data type stArray0, // Expect a data type or ']' stArrayN, // Expect a ',' or ']' stObject0, // Expect a string or a '}' stObjectN, // Expect a ',' or '}' stKey // Expect a ':' }; std::stack stateStack; State curState; bool hasNext; char nextChar; bool peeked; StreamReader in_; Token curToken; bool bv; int64_t lv; double dv; std::string sv; size_t line_; Token doAdvance(); Token tryLiteral(const char exp[], size_t n, Token tk); Token tryNumber(char ch); Token tryString(); static Exception unexpected(unsigned char ch); char next(); static std::string decodeString(const std::string &s, bool binary); public: JsonParser() : curState(stValue), hasNext(false), nextChar(0), peeked(false), curToken(Token::Null), bv(false), lv(0), dv(0), line_(1) {} void init(InputStream &is) { // Clear by swapping with an empty stack std::stack().swap(stateStack); curState = stValue; hasNext = false; peeked = false; line_ = 1; in_.reset(is); } Token advance() { if (!peeked) { curToken = doAdvance(); } else { peeked = false; } return curToken; } Token peek() { if (!peeked) { curToken = doAdvance(); peeked = true; } return curToken; } void expectToken(Token tk); bool boolValue() const { return bv; } Token cur() const { return curToken; } double doubleValue() const { return dv; } int64_t longValue() const { return lv; } const std::string &rawString() const { return sv; } std::string stringValue() const { return decodeString(sv, false); } std::string bytesValue() const { return decodeString(sv, true); } void drain() { if (!stateStack.empty() || peeked) { throw Exception("Invalid state for draining"); } in_.drain(hasNext); hasNext = false; } /** * Return UTF-8 encoded string value. */ static std::string toStringValue(const std::string &sv) { return decodeString(sv, false); } /** * Return byte-encoded string value. It is an error if the input * JSON string contained unicode characters more than "\u00ff'. */ static std::string toBytesValue(const std::string &sv) { return decodeString(sv, true); } static const char *const tokenNames[]; static const char *toString(Token tk) { return tokenNames[static_cast(tk)]; } }; class AVRO_DECL JsonNullFormatter { public: explicit JsonNullFormatter(StreamWriter &) {} void handleObjectStart() {} void handleObjectEnd() {} void handleValueEnd() {} void handleColon() {} }; class AVRO_DECL JsonPrettyFormatter { StreamWriter &out_; size_t level_; std::vector indent_; static const int CHARS_PER_LEVEL = 2; void printIndent() { size_t charsToIndent = level_ * CHARS_PER_LEVEL; if (indent_.size() < charsToIndent) { indent_.resize(charsToIndent * 2, ' '); } out_.writeBytes(indent_.data(), charsToIndent); } public: explicit JsonPrettyFormatter(StreamWriter &out) : out_(out), level_(0), indent_(10, ' ') {} void handleObjectStart() { out_.write('\n'); ++level_; printIndent(); } void handleObjectEnd() { out_.write('\n'); --level_; printIndent(); } void handleValueEnd() { out_.write('\n'); printIndent(); } void handleColon() { out_.write(' '); } }; template class AVRO_DECL JsonGenerator { StreamWriter out_; F formatter_; enum State { stStart, stArray0, stArrayN, stMap0, stMapN, stKey, }; std::stack stateStack; State top; void write(const char *b, const char *p) { if (b != p) { out_.writeBytes(reinterpret_cast(b), p - b); } } void escape(char c, const char *b, const char *p) { write(b, p); out_.write('\\'); out_.write(c); } void escapeCtl(char c) { escapeUnicode(static_cast(c)); } void writeHex(char c) { out_.write(toHex((static_cast(c)) / 16)); out_.write(toHex((static_cast(c)) % 16)); } void escapeUnicode(uint32_t c) { out_.write('\\'); out_.write('u'); writeHex((c >> 8) & 0xff); writeHex(c & 0xff); } void doEncodeString(const char *b, size_t len, bool binary) { const char *e = b + len; out_.write('"'); for (const char *p = b; p != e; p++) { if ((*p & 0x80) != 0) { write(b, p); if (binary) { escapeCtl(*p); } else if ((*p & 0x40) == 0) { throw Exception("Invalid UTF-8 sequence"); } else { int more = 1; uint32_t value; if ((*p & 0x20) != 0) { more++; if ((*p & 0x10) != 0) { more++; if ((*p & 0x08) != 0) { throw Exception("Invalid UTF-8 sequence"); } else { value = *p & 0x07; } } else { value = *p & 0x0f; } } else { value = *p & 0x1f; } for (int i = 0; i < more; ++i) { if (++p == e || (*p & 0xc0) != 0x80) { throw Exception("Invalid UTF-8 sequence"); } value <<= 6; value |= *p & 0x3f; } escapeUnicode(value); } } else { switch (*p) { case '\\': case '"': case '/': escape(*p, b, p); break; case '\b': escape('b', b, p); break; case '\f': escape('f', b, p); break; case '\n': escape('n', b, p); break; case '\r': escape('r', b, p); break; case '\t': escape('t', b, p); break; default: if (std::iscntrl(*p, std::locale::classic())) { write(b, p); escapeCtl(*p); break; } else { continue; } } } b = p + 1; } write(b, e); out_.write('"'); } void sep() { if (top == stArrayN) { out_.write(','); formatter_.handleValueEnd(); } else if (top == stArray0) { top = stArrayN; } } void sep2() { if (top == stKey) { top = stMapN; } } public: JsonGenerator() : formatter_(out_), top(stStart) {} void init(OutputStream &os) { out_.reset(os); } void flush() { out_.flush(); } int64_t byteCount() const { return out_.byteCount(); } void encodeNull() { sep(); out_.writeBytes(reinterpret_cast("null"), 4); sep2(); } void encodeBool(bool b) { sep(); if (b) { out_.writeBytes(reinterpret_cast("true"), 4); } else { out_.writeBytes(reinterpret_cast("false"), 5); } sep2(); } template void encodeNumber(T t) { sep(); std::ostringstream oss; oss << boost::lexical_cast(t); const std::string s = oss.str(); out_.writeBytes(reinterpret_cast(s.data()), s.size()); sep2(); } void encodeNumber(double t) { sep(); std::ostringstream oss; if (boost::math::isfinite(t)) { oss << boost::lexical_cast(t); } else if (boost::math::isnan(t)) { oss << "NaN"; } else if (t == std::numeric_limits::infinity()) { oss << "Infinity"; } else { oss << "-Infinity"; } const std::string s = oss.str(); out_.writeBytes(reinterpret_cast(s.data()), s.size()); sep2(); } void encodeString(const std::string &s) { if (top == stMap0) { top = stKey; } else if (top == stMapN) { out_.write(','); formatter_.handleValueEnd(); top = stKey; } else if (top == stKey) { top = stMapN; } else { sep(); } doEncodeString(s.c_str(), s.size(), false); if (top == stKey) { out_.write(':'); formatter_.handleColon(); } } void encodeBinary(const uint8_t *bytes, size_t len) { sep(); doEncodeString(reinterpret_cast(bytes), len, true); sep2(); } void arrayStart() { sep(); stateStack.push(top); top = stArray0; out_.write('['); formatter_.handleObjectStart(); } void arrayEnd() { top = stateStack.top(); stateStack.pop(); formatter_.handleObjectEnd(); out_.write(']'); sep2(); } void objectStart() { sep(); stateStack.push(top); top = stMap0; out_.write('{'); formatter_.handleObjectStart(); } void objectEnd() { top = stateStack.top(); stateStack.pop(); formatter_.handleObjectEnd(); out_.write('}'); sep2(); } }; } // namespace json } // namespace avro #endif