decode_string.go 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "strconv"
  7. "unicode"
  8. "unicode/utf16"
  9. "unicode/utf8"
  10. "google.golang.org/protobuf/internal/strs"
  11. )
  12. func (d *Decoder) parseString(in []byte) (string, int, error) {
  13. in0 := in
  14. if len(in) == 0 {
  15. return "", 0, ErrUnexpectedEOF
  16. }
  17. if in[0] != '"' {
  18. return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q at start of string", in[0])
  19. }
  20. in = in[1:]
  21. i := indexNeedEscapeInBytes(in)
  22. in, out := in[i:], in[:i:i] // set cap to prevent mutations
  23. for len(in) > 0 {
  24. switch r, n := utf8.DecodeRune(in); {
  25. case r == utf8.RuneError && n == 1:
  26. return "", 0, d.newSyntaxError(d.currPos(), "invalid UTF-8 in string")
  27. case r < ' ':
  28. return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q in string", r)
  29. case r == '"':
  30. in = in[1:]
  31. n := len(in0) - len(in)
  32. return string(out), n, nil
  33. case r == '\\':
  34. if len(in) < 2 {
  35. return "", 0, ErrUnexpectedEOF
  36. }
  37. switch r := in[1]; r {
  38. case '"', '\\', '/':
  39. in, out = in[2:], append(out, r)
  40. case 'b':
  41. in, out = in[2:], append(out, '\b')
  42. case 'f':
  43. in, out = in[2:], append(out, '\f')
  44. case 'n':
  45. in, out = in[2:], append(out, '\n')
  46. case 'r':
  47. in, out = in[2:], append(out, '\r')
  48. case 't':
  49. in, out = in[2:], append(out, '\t')
  50. case 'u':
  51. if len(in) < 6 {
  52. return "", 0, ErrUnexpectedEOF
  53. }
  54. v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  55. if err != nil {
  56. return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6])
  57. }
  58. in = in[6:]
  59. r := rune(v)
  60. if utf16.IsSurrogate(r) {
  61. if len(in) < 6 {
  62. return "", 0, ErrUnexpectedEOF
  63. }
  64. v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  65. r = utf16.DecodeRune(r, rune(v))
  66. if in[0] != '\\' || in[1] != 'u' ||
  67. r == unicode.ReplacementChar || err != nil {
  68. return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6])
  69. }
  70. in = in[6:]
  71. }
  72. out = append(out, string(r)...)
  73. default:
  74. return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:2])
  75. }
  76. default:
  77. i := indexNeedEscapeInBytes(in[n:])
  78. in, out = in[n+i:], append(out, in[:n+i]...)
  79. }
  80. }
  81. return "", 0, ErrUnexpectedEOF
  82. }
  83. // indexNeedEscapeInBytes returns the index of the character that needs
  84. // escaping. If no characters need escaping, this returns the input length.
  85. func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }