decode.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "regexp"
  10. "unicode/utf8"
  11. "google.golang.org/protobuf/internal/errors"
  12. )
  13. // call specifies which Decoder method was invoked.
  14. type call uint8
  15. const (
  16. readCall call = iota
  17. peekCall
  18. )
  19. const unexpectedFmt = "unexpected token %s"
  20. // ErrUnexpectedEOF means that EOF was encountered in the middle of the input.
  21. var ErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF)
  22. // Decoder is a token-based JSON decoder.
  23. type Decoder struct {
  24. // lastCall is last method called, either readCall or peekCall.
  25. // Initial value is readCall.
  26. lastCall call
  27. // lastToken contains the last read token.
  28. lastToken Token
  29. // lastErr contains the last read error.
  30. lastErr error
  31. // openStack is a stack containing ObjectOpen and ArrayOpen values. The
  32. // top of stack represents the object or the array the current value is
  33. // directly located in.
  34. openStack []Kind
  35. // orig is used in reporting line and column.
  36. orig []byte
  37. // in contains the unconsumed input.
  38. in []byte
  39. }
  40. // NewDecoder returns a Decoder to read the given []byte.
  41. func NewDecoder(b []byte) *Decoder {
  42. return &Decoder{orig: b, in: b}
  43. }
  44. // Peek looks ahead and returns the next token kind without advancing a read.
  45. func (d *Decoder) Peek() (Token, error) {
  46. defer func() { d.lastCall = peekCall }()
  47. if d.lastCall == readCall {
  48. d.lastToken, d.lastErr = d.Read()
  49. }
  50. return d.lastToken, d.lastErr
  51. }
  52. // Read returns the next JSON token.
  53. // It will return an error if there is no valid token.
  54. func (d *Decoder) Read() (Token, error) {
  55. const scalar = Null | Bool | Number | String
  56. defer func() { d.lastCall = readCall }()
  57. if d.lastCall == peekCall {
  58. return d.lastToken, d.lastErr
  59. }
  60. tok, err := d.parseNext()
  61. if err != nil {
  62. return Token{}, err
  63. }
  64. switch tok.kind {
  65. case EOF:
  66. if len(d.openStack) != 0 ||
  67. d.lastToken.kind&scalar|ObjectClose|ArrayClose == 0 {
  68. return Token{}, ErrUnexpectedEOF
  69. }
  70. case Null:
  71. if !d.isValueNext() {
  72. return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
  73. }
  74. case Bool, Number:
  75. if !d.isValueNext() {
  76. return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
  77. }
  78. case String:
  79. if d.isValueNext() {
  80. break
  81. }
  82. // This string token should only be for a field name.
  83. if d.lastToken.kind&(ObjectOpen|comma) == 0 {
  84. return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
  85. }
  86. if len(d.in) == 0 {
  87. return Token{}, ErrUnexpectedEOF
  88. }
  89. if c := d.in[0]; c != ':' {
  90. return Token{}, d.newSyntaxError(d.currPos(), `unexpected character %s, missing ":" after field name`, string(c))
  91. }
  92. tok.kind = Name
  93. d.consume(1)
  94. case ObjectOpen, ArrayOpen:
  95. if !d.isValueNext() {
  96. return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
  97. }
  98. d.openStack = append(d.openStack, tok.kind)
  99. case ObjectClose:
  100. if len(d.openStack) == 0 ||
  101. d.lastToken.kind&(Name|comma) != 0 ||
  102. d.openStack[len(d.openStack)-1] != ObjectOpen {
  103. return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
  104. }
  105. d.openStack = d.openStack[:len(d.openStack)-1]
  106. case ArrayClose:
  107. if len(d.openStack) == 0 ||
  108. d.lastToken.kind == comma ||
  109. d.openStack[len(d.openStack)-1] != ArrayOpen {
  110. return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
  111. }
  112. d.openStack = d.openStack[:len(d.openStack)-1]
  113. case comma:
  114. if len(d.openStack) == 0 ||
  115. d.lastToken.kind&(scalar|ObjectClose|ArrayClose) == 0 {
  116. return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
  117. }
  118. }
  119. // Update d.lastToken only after validating token to be in the right sequence.
  120. d.lastToken = tok
  121. if d.lastToken.kind == comma {
  122. return d.Read()
  123. }
  124. return tok, nil
  125. }
  126. // Any sequence that looks like a non-delimiter (for error reporting).
  127. var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
  128. // parseNext parses for the next JSON token. It returns a Token object for
  129. // different types, except for Name. It does not handle whether the next token
  130. // is in a valid sequence or not.
  131. func (d *Decoder) parseNext() (Token, error) {
  132. // Trim leading spaces.
  133. d.consume(0)
  134. in := d.in
  135. if len(in) == 0 {
  136. return d.consumeToken(EOF, 0), nil
  137. }
  138. switch in[0] {
  139. case 'n':
  140. if n := matchWithDelim("null", in); n != 0 {
  141. return d.consumeToken(Null, n), nil
  142. }
  143. case 't':
  144. if n := matchWithDelim("true", in); n != 0 {
  145. return d.consumeBoolToken(true, n), nil
  146. }
  147. case 'f':
  148. if n := matchWithDelim("false", in); n != 0 {
  149. return d.consumeBoolToken(false, n), nil
  150. }
  151. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  152. if n, ok := parseNumber(in); ok {
  153. return d.consumeToken(Number, n), nil
  154. }
  155. case '"':
  156. s, n, err := d.parseString(in)
  157. if err != nil {
  158. return Token{}, err
  159. }
  160. return d.consumeStringToken(s, n), nil
  161. case '{':
  162. return d.consumeToken(ObjectOpen, 1), nil
  163. case '}':
  164. return d.consumeToken(ObjectClose, 1), nil
  165. case '[':
  166. return d.consumeToken(ArrayOpen, 1), nil
  167. case ']':
  168. return d.consumeToken(ArrayClose, 1), nil
  169. case ',':
  170. return d.consumeToken(comma, 1), nil
  171. }
  172. return Token{}, d.newSyntaxError(d.currPos(), "invalid value %s", errRegexp.Find(in))
  173. }
  174. // newSyntaxError returns an error with line and column information useful for
  175. // syntax errors.
  176. func (d *Decoder) newSyntaxError(pos int, f string, x ...interface{}) error {
  177. e := errors.New(f, x...)
  178. line, column := d.Position(pos)
  179. return errors.New("syntax error (line %d:%d): %v", line, column, e)
  180. }
  181. // Position returns line and column number of given index of the original input.
  182. // It will panic if index is out of range.
  183. func (d *Decoder) Position(idx int) (line int, column int) {
  184. b := d.orig[:idx]
  185. line = bytes.Count(b, []byte("\n")) + 1
  186. if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
  187. b = b[i+1:]
  188. }
  189. column = utf8.RuneCount(b) + 1 // ignore multi-rune characters
  190. return line, column
  191. }
  192. // currPos returns the current index position of d.in from d.orig.
  193. func (d *Decoder) currPos() int {
  194. return len(d.orig) - len(d.in)
  195. }
  196. // matchWithDelim matches s with the input b and verifies that the match
  197. // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
  198. // As a special case, EOF is considered a delimiter. It returns the length of s
  199. // if there is a match, else 0.
  200. func matchWithDelim(s string, b []byte) int {
  201. if !bytes.HasPrefix(b, []byte(s)) {
  202. return 0
  203. }
  204. n := len(s)
  205. if n < len(b) && isNotDelim(b[n]) {
  206. return 0
  207. }
  208. return n
  209. }
  210. // isNotDelim returns true if given byte is a not delimiter character.
  211. func isNotDelim(c byte) bool {
  212. return (c == '-' || c == '+' || c == '.' || c == '_' ||
  213. ('a' <= c && c <= 'z') ||
  214. ('A' <= c && c <= 'Z') ||
  215. ('0' <= c && c <= '9'))
  216. }
  217. // consume consumes n bytes of input and any subsequent whitespace.
  218. func (d *Decoder) consume(n int) {
  219. d.in = d.in[n:]
  220. for len(d.in) > 0 {
  221. switch d.in[0] {
  222. case ' ', '\n', '\r', '\t':
  223. d.in = d.in[1:]
  224. default:
  225. return
  226. }
  227. }
  228. }
  229. // isValueNext returns true if next type should be a JSON value: Null,
  230. // Number, String or Bool.
  231. func (d *Decoder) isValueNext() bool {
  232. if len(d.openStack) == 0 {
  233. return d.lastToken.kind == 0
  234. }
  235. start := d.openStack[len(d.openStack)-1]
  236. switch start {
  237. case ObjectOpen:
  238. return d.lastToken.kind&Name != 0
  239. case ArrayOpen:
  240. return d.lastToken.kind&(ArrayOpen|comma) != 0
  241. }
  242. panic(fmt.Sprintf(
  243. "unreachable logic in Decoder.isValueNext, lastToken.kind: %v, openStack: %v",
  244. d.lastToken.kind, start))
  245. }
  246. // consumeToken constructs a Token for given Kind with raw value derived from
  247. // current d.in and given size, and consumes the given size-length of it.
  248. func (d *Decoder) consumeToken(kind Kind, size int) Token {
  249. tok := Token{
  250. kind: kind,
  251. raw: d.in[:size],
  252. pos: len(d.orig) - len(d.in),
  253. }
  254. d.consume(size)
  255. return tok
  256. }
  257. // consumeBoolToken constructs a Token for a Bool kind with raw value derived from
  258. // current d.in and given size.
  259. func (d *Decoder) consumeBoolToken(b bool, size int) Token {
  260. tok := Token{
  261. kind: Bool,
  262. raw: d.in[:size],
  263. pos: len(d.orig) - len(d.in),
  264. boo: b,
  265. }
  266. d.consume(size)
  267. return tok
  268. }
  269. // consumeStringToken constructs a Token for a String kind with raw value derived
  270. // from current d.in and given size.
  271. func (d *Decoder) consumeStringToken(s string, size int) Token {
  272. tok := Token{
  273. kind: String,
  274. raw: d.in[:size],
  275. pos: len(d.orig) - len(d.in),
  276. str: s,
  277. }
  278. d.consume(size)
  279. return tok
  280. }
  281. // Clone returns a copy of the Decoder for use in reading ahead the next JSON
  282. // object, array or other values without affecting current Decoder.
  283. func (d *Decoder) Clone() *Decoder {
  284. ret := *d
  285. ret.openStack = append([]Kind(nil), ret.openStack...)
  286. return &ret
  287. }