123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481 |
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #ifndef avro_json_JsonIO_hh__
- #define avro_json_JsonIO_hh__
- #include <boost/lexical_cast.hpp>
- #include <boost/math/special_functions/fpclassify.hpp>
- #include <boost/utility.hpp>
- #include <locale>
- #include <sstream>
- #include <stack>
- #include <string>
- #include "Config.hh"
- #include "Stream.hh"
- namespace avro {
- namespace json {
- inline char toHex(unsigned int n) {
- return (n < 10) ? (n + '0') : (n + 'a' - 10);
- }
- class AVRO_DECL JsonParser : boost::noncopyable {
- public:
- enum class Token {
- Null,
- Bool,
- Long,
- Double,
- String,
- ArrayStart,
- ArrayEnd,
- ObjectStart,
- ObjectEnd
- };
- size_t line() const { return line_; }
- private:
- enum State {
- stValue, // Expect a data type
- stArray0, // Expect a data type or ']'
- stArrayN, // Expect a ',' or ']'
- stObject0, // Expect a string or a '}'
- stObjectN, // Expect a ',' or '}'
- stKey // Expect a ':'
- };
- std::stack<State> stateStack;
- State curState;
- bool hasNext;
- char nextChar;
- bool peeked;
- StreamReader in_;
- Token curToken;
- bool bv;
- int64_t lv;
- double dv;
- std::string sv;
- size_t line_;
- Token doAdvance();
- Token tryLiteral(const char exp[], size_t n, Token tk);
- Token tryNumber(char ch);
- Token tryString();
- static Exception unexpected(unsigned char ch);
- char next();
- static std::string decodeString(const std::string &s, bool binary);
- public:
- JsonParser() : curState(stValue), hasNext(false), nextChar(0), peeked(false),
- curToken(Token::Null), bv(false), lv(0), dv(0), line_(1) {}
- void init(InputStream &is) {
- // Clear by swapping with an empty stack
- std::stack<State>().swap(stateStack);
- curState = stValue;
- hasNext = false;
- peeked = false;
- line_ = 1;
- in_.reset(is);
- }
- Token advance() {
- if (!peeked) {
- curToken = doAdvance();
- } else {
- peeked = false;
- }
- return curToken;
- }
- Token peek() {
- if (!peeked) {
- curToken = doAdvance();
- peeked = true;
- }
- return curToken;
- }
- void expectToken(Token tk);
- bool boolValue() const {
- return bv;
- }
- Token cur() const {
- return curToken;
- }
- double doubleValue() const {
- return dv;
- }
- int64_t longValue() const {
- return lv;
- }
- const std::string &rawString() const {
- return sv;
- }
- std::string stringValue() const {
- return decodeString(sv, false);
- }
- std::string bytesValue() const {
- return decodeString(sv, true);
- }
- void drain() {
- if (!stateStack.empty() || peeked) {
- throw Exception("Invalid state for draining");
- }
- in_.drain(hasNext);
- hasNext = false;
- }
- /**
- * Return UTF-8 encoded string value.
- */
- static std::string toStringValue(const std::string &sv) {
- return decodeString(sv, false);
- }
- /**
- * Return byte-encoded string value. It is an error if the input
- * JSON string contained unicode characters more than "\u00ff'.
- */
- static std::string toBytesValue(const std::string &sv) {
- return decodeString(sv, true);
- }
- static const char *const tokenNames[];
- static const char *toString(Token tk) {
- return tokenNames[static_cast<size_t>(tk)];
- }
- };
- class AVRO_DECL JsonNullFormatter {
- public:
- explicit JsonNullFormatter(StreamWriter &) {}
- void handleObjectStart() {}
- void handleObjectEnd() {}
- void handleValueEnd() {}
- void handleColon() {}
- };
- class AVRO_DECL JsonPrettyFormatter {
- StreamWriter &out_;
- size_t level_;
- std::vector<uint8_t> indent_;
- static const int CHARS_PER_LEVEL = 2;
- void printIndent() {
- size_t charsToIndent = level_ * CHARS_PER_LEVEL;
- if (indent_.size() < charsToIndent) {
- indent_.resize(charsToIndent * 2, ' ');
- }
- out_.writeBytes(indent_.data(), charsToIndent);
- }
- public:
- explicit JsonPrettyFormatter(StreamWriter &out) : out_(out), level_(0), indent_(10, ' ') {}
- void handleObjectStart() {
- out_.write('\n');
- ++level_;
- printIndent();
- }
- void handleObjectEnd() {
- out_.write('\n');
- --level_;
- printIndent();
- }
- void handleValueEnd() {
- out_.write('\n');
- printIndent();
- }
- void handleColon() {
- out_.write(' ');
- }
- };
- template<class F>
- class AVRO_DECL JsonGenerator {
- StreamWriter out_;
- F formatter_;
- enum State {
- stStart,
- stArray0,
- stArrayN,
- stMap0,
- stMapN,
- stKey,
- };
- std::stack<State> stateStack;
- State top;
- void write(const char *b, const char *p) {
- if (b != p) {
- out_.writeBytes(reinterpret_cast<const uint8_t *>(b), p - b);
- }
- }
- void escape(char c, const char *b, const char *p) {
- write(b, p);
- out_.write('\\');
- out_.write(c);
- }
- void escapeCtl(char c) {
- escapeUnicode(static_cast<uint8_t>(c));
- }
- void writeHex(char c) {
- out_.write(toHex((static_cast<unsigned char>(c)) / 16));
- out_.write(toHex((static_cast<unsigned char>(c)) % 16));
- }
- void escapeUnicode(uint32_t c) {
- out_.write('\\');
- out_.write('u');
- writeHex((c >> 8) & 0xff);
- writeHex(c & 0xff);
- }
- void doEncodeString(const char *b, size_t len, bool binary) {
- const char *e = b + len;
- out_.write('"');
- for (const char *p = b; p != e; p++) {
- if ((*p & 0x80) != 0) {
- write(b, p);
- if (binary) {
- escapeCtl(*p);
- } else if ((*p & 0x40) == 0) {
- throw Exception("Invalid UTF-8 sequence");
- } else {
- int more = 1;
- uint32_t value;
- if ((*p & 0x20) != 0) {
- more++;
- if ((*p & 0x10) != 0) {
- more++;
- if ((*p & 0x08) != 0) {
- throw Exception("Invalid UTF-8 sequence");
- } else {
- value = *p & 0x07;
- }
- } else {
- value = *p & 0x0f;
- }
- } else {
- value = *p & 0x1f;
- }
- for (int i = 0; i < more; ++i) {
- if (++p == e || (*p & 0xc0) != 0x80) {
- throw Exception("Invalid UTF-8 sequence");
- }
- value <<= 6;
- value |= *p & 0x3f;
- }
- escapeUnicode(value);
- }
- } else {
- switch (*p) {
- case '\\':
- case '"':
- case '/':
- escape(*p, b, p);
- break;
- case '\b':
- escape('b', b, p);
- break;
- case '\f':
- escape('f', b, p);
- break;
- case '\n':
- escape('n', b, p);
- break;
- case '\r':
- escape('r', b, p);
- break;
- case '\t':
- escape('t', b, p);
- break;
- default:
- if (std::iscntrl(*p, std::locale::classic())) {
- write(b, p);
- escapeCtl(*p);
- break;
- } else {
- continue;
- }
- }
- }
- b = p + 1;
- }
- write(b, e);
- out_.write('"');
- }
- void sep() {
- if (top == stArrayN) {
- out_.write(',');
- formatter_.handleValueEnd();
- } else if (top == stArray0) {
- top = stArrayN;
- }
- }
- void sep2() {
- if (top == stKey) {
- top = stMapN;
- }
- }
- public:
- JsonGenerator() : formatter_(out_), top(stStart) {}
- void init(OutputStream &os) {
- out_.reset(os);
- }
- void flush() {
- out_.flush();
- }
- int64_t byteCount() const {
- return out_.byteCount();
- }
- void encodeNull() {
- sep();
- out_.writeBytes(reinterpret_cast<const uint8_t *>("null"), 4);
- sep2();
- }
- void encodeBool(bool b) {
- sep();
- if (b) {
- out_.writeBytes(reinterpret_cast<const uint8_t *>("true"), 4);
- } else {
- out_.writeBytes(reinterpret_cast<const uint8_t *>("false"), 5);
- }
- sep2();
- }
- template<typename T>
- void encodeNumber(T t) {
- sep();
- std::ostringstream oss;
- oss << boost::lexical_cast<std::string>(t);
- const std::string s = oss.str();
- out_.writeBytes(reinterpret_cast<const uint8_t *>(s.data()), s.size());
- sep2();
- }
- void encodeNumber(double t) {
- sep();
- std::ostringstream oss;
- if (boost::math::isfinite(t)) {
- oss << boost::lexical_cast<std::string>(t);
- } else if (boost::math::isnan(t)) {
- oss << "NaN";
- } else if (t == std::numeric_limits<double>::infinity()) {
- oss << "Infinity";
- } else {
- oss << "-Infinity";
- }
- const std::string s = oss.str();
- out_.writeBytes(reinterpret_cast<const uint8_t *>(s.data()), s.size());
- sep2();
- }
- void encodeString(const std::string &s) {
- if (top == stMap0) {
- top = stKey;
- } else if (top == stMapN) {
- out_.write(',');
- formatter_.handleValueEnd();
- top = stKey;
- } else if (top == stKey) {
- top = stMapN;
- } else {
- sep();
- }
- doEncodeString(s.c_str(), s.size(), false);
- if (top == stKey) {
- out_.write(':');
- formatter_.handleColon();
- }
- }
- void encodeBinary(const uint8_t *bytes, size_t len) {
- sep();
- doEncodeString(reinterpret_cast<const char *>(bytes), len, true);
- sep2();
- }
- void arrayStart() {
- sep();
- stateStack.push(top);
- top = stArray0;
- out_.write('[');
- formatter_.handleObjectStart();
- }
- void arrayEnd() {
- top = stateStack.top();
- stateStack.pop();
- formatter_.handleObjectEnd();
- out_.write(']');
- sep2();
- }
- void objectStart() {
- sep();
- stateStack.push(top);
- top = stMap0;
- out_.write('{');
- formatter_.handleObjectStart();
- }
- void objectEnd() {
- top = stateStack.top();
- stateStack.pop();
- formatter_.handleObjectEnd();
- out_.write('}');
- sep2();
- }
- };
- } // namespace json
- } // namespace avro
- #endif
|