encode.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package text
  5. import (
  6. "math"
  7. "math/bits"
  8. "strconv"
  9. "strings"
  10. "unicode/utf8"
  11. "google.golang.org/protobuf/internal/detrand"
  12. "google.golang.org/protobuf/internal/errors"
  13. )
  14. // encType represents an encoding type.
  15. type encType uint8
  16. const (
  17. _ encType = (1 << iota) / 2
  18. name
  19. scalar
  20. messageOpen
  21. messageClose
  22. )
  23. // Encoder provides methods to write out textproto constructs and values. The user is
  24. // responsible for producing valid sequences of constructs and values.
  25. type Encoder struct {
  26. encoderState
  27. indent string
  28. delims [2]byte
  29. outputASCII bool
  30. }
  31. type encoderState struct {
  32. lastType encType
  33. indents []byte
  34. out []byte
  35. }
  36. // NewEncoder returns an Encoder.
  37. //
  38. // If indent is a non-empty string, it causes every entry in a List or Message
  39. // to be preceded by the indent and trailed by a newline.
  40. //
  41. // If delims is not the zero value, it controls the delimiter characters used
  42. // for messages (e.g., "{}" vs "<>").
  43. //
  44. // If outputASCII is true, strings will be serialized in such a way that
  45. // multi-byte UTF-8 sequences are escaped. This property ensures that the
  46. // overall output is ASCII (as opposed to UTF-8).
  47. func NewEncoder(buf []byte, indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
  48. e := &Encoder{
  49. encoderState: encoderState{out: buf},
  50. }
  51. if len(indent) > 0 {
  52. if strings.Trim(indent, " \t") != "" {
  53. return nil, errors.New("indent may only be composed of space and tab characters")
  54. }
  55. e.indent = indent
  56. }
  57. switch delims {
  58. case [2]byte{0, 0}:
  59. e.delims = [2]byte{'{', '}'}
  60. case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
  61. e.delims = delims
  62. default:
  63. return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
  64. }
  65. e.outputASCII = outputASCII
  66. return e, nil
  67. }
  68. // Bytes returns the content of the written bytes.
  69. func (e *Encoder) Bytes() []byte {
  70. return e.out
  71. }
  72. // StartMessage writes out the '{' or '<' symbol.
  73. func (e *Encoder) StartMessage() {
  74. e.prepareNext(messageOpen)
  75. e.out = append(e.out, e.delims[0])
  76. }
  77. // EndMessage writes out the '}' or '>' symbol.
  78. func (e *Encoder) EndMessage() {
  79. e.prepareNext(messageClose)
  80. e.out = append(e.out, e.delims[1])
  81. }
  82. // WriteName writes out the field name and the separator ':'.
  83. func (e *Encoder) WriteName(s string) {
  84. e.prepareNext(name)
  85. e.out = append(e.out, s...)
  86. e.out = append(e.out, ':')
  87. }
  88. // WriteBool writes out the given boolean value.
  89. func (e *Encoder) WriteBool(b bool) {
  90. if b {
  91. e.WriteLiteral("true")
  92. } else {
  93. e.WriteLiteral("false")
  94. }
  95. }
  96. // WriteString writes out the given string value.
  97. func (e *Encoder) WriteString(s string) {
  98. e.prepareNext(scalar)
  99. e.out = appendString(e.out, s, e.outputASCII)
  100. }
  101. func appendString(out []byte, in string, outputASCII bool) []byte {
  102. out = append(out, '"')
  103. i := indexNeedEscapeInString(in)
  104. in, out = in[i:], append(out, in[:i]...)
  105. for len(in) > 0 {
  106. switch r, n := utf8.DecodeRuneInString(in); {
  107. case r == utf8.RuneError && n == 1:
  108. // We do not report invalid UTF-8 because strings in the text format
  109. // are used to represent both the proto string and bytes type.
  110. r = rune(in[0])
  111. fallthrough
  112. case r < ' ' || r == '"' || r == '\\' || r == 0x7f:
  113. out = append(out, '\\')
  114. switch r {
  115. case '"', '\\':
  116. out = append(out, byte(r))
  117. case '\n':
  118. out = append(out, 'n')
  119. case '\r':
  120. out = append(out, 'r')
  121. case '\t':
  122. out = append(out, 't')
  123. default:
  124. out = append(out, 'x')
  125. out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
  126. out = strconv.AppendUint(out, uint64(r), 16)
  127. }
  128. in = in[n:]
  129. case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f):
  130. out = append(out, '\\')
  131. if r <= math.MaxUint16 {
  132. out = append(out, 'u')
  133. out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
  134. out = strconv.AppendUint(out, uint64(r), 16)
  135. } else {
  136. out = append(out, 'U')
  137. out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
  138. out = strconv.AppendUint(out, uint64(r), 16)
  139. }
  140. in = in[n:]
  141. default:
  142. i := indexNeedEscapeInString(in[n:])
  143. in, out = in[n+i:], append(out, in[:n+i]...)
  144. }
  145. }
  146. out = append(out, '"')
  147. return out
  148. }
  149. // indexNeedEscapeInString returns the index of the character that needs
  150. // escaping. If no characters need escaping, this returns the input length.
  151. func indexNeedEscapeInString(s string) int {
  152. for i := 0; i < len(s); i++ {
  153. if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f {
  154. return i
  155. }
  156. }
  157. return len(s)
  158. }
  159. // WriteFloat writes out the given float value for given bitSize.
  160. func (e *Encoder) WriteFloat(n float64, bitSize int) {
  161. e.prepareNext(scalar)
  162. e.out = appendFloat(e.out, n, bitSize)
  163. }
  164. func appendFloat(out []byte, n float64, bitSize int) []byte {
  165. switch {
  166. case math.IsNaN(n):
  167. return append(out, "nan"...)
  168. case math.IsInf(n, +1):
  169. return append(out, "inf"...)
  170. case math.IsInf(n, -1):
  171. return append(out, "-inf"...)
  172. default:
  173. return strconv.AppendFloat(out, n, 'g', -1, bitSize)
  174. }
  175. }
  176. // WriteInt writes out the given signed integer value.
  177. func (e *Encoder) WriteInt(n int64) {
  178. e.prepareNext(scalar)
  179. e.out = strconv.AppendInt(e.out, n, 10)
  180. }
  181. // WriteUint writes out the given unsigned integer value.
  182. func (e *Encoder) WriteUint(n uint64) {
  183. e.prepareNext(scalar)
  184. e.out = strconv.AppendUint(e.out, n, 10)
  185. }
  186. // WriteLiteral writes out the given string as a literal value without quotes.
  187. // This is used for writing enum literal strings.
  188. func (e *Encoder) WriteLiteral(s string) {
  189. e.prepareNext(scalar)
  190. e.out = append(e.out, s...)
  191. }
  192. // prepareNext adds possible space and indentation for the next value based
  193. // on last encType and indent option. It also updates e.lastType to next.
  194. func (e *Encoder) prepareNext(next encType) {
  195. defer func() {
  196. e.lastType = next
  197. }()
  198. // Single line.
  199. if len(e.indent) == 0 {
  200. // Add space after each field before the next one.
  201. if e.lastType&(scalar|messageClose) != 0 && next == name {
  202. e.out = append(e.out, ' ')
  203. // Add a random extra space to make output unstable.
  204. if detrand.Bool() {
  205. e.out = append(e.out, ' ')
  206. }
  207. }
  208. return
  209. }
  210. // Multi-line.
  211. switch {
  212. case e.lastType == name:
  213. e.out = append(e.out, ' ')
  214. // Add a random extra space after name: to make output unstable.
  215. if detrand.Bool() {
  216. e.out = append(e.out, ' ')
  217. }
  218. case e.lastType == messageOpen && next != messageClose:
  219. e.indents = append(e.indents, e.indent...)
  220. e.out = append(e.out, '\n')
  221. e.out = append(e.out, e.indents...)
  222. case e.lastType&(scalar|messageClose) != 0:
  223. if next == messageClose {
  224. e.indents = e.indents[:len(e.indents)-len(e.indent)]
  225. }
  226. e.out = append(e.out, '\n')
  227. e.out = append(e.out, e.indents...)
  228. }
  229. }
  230. // Snapshot returns the current snapshot for use in Reset.
  231. func (e *Encoder) Snapshot() encoderState {
  232. return e.encoderState
  233. }
  234. // Reset resets the Encoder to the given encoderState from a Snapshot.
  235. func (e *Encoder) Reset(es encoderState) {
  236. e.encoderState = es
  237. }
  238. // AppendString appends the escaped form of the input string to b.
  239. func AppendString(b []byte, s string) []byte {
  240. return appendString(b, s, false)
  241. }