decode_token.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package text
  5. import (
  6. "bytes"
  7. "fmt"
  8. "math"
  9. "strconv"
  10. "strings"
  11. "google.golang.org/protobuf/internal/flags"
  12. )
  13. // Kind represents a token kind expressible in the textproto format.
  14. type Kind uint8
  15. // Kind values.
  16. const (
  17. Invalid Kind = iota
  18. EOF
  19. Name // Name indicates the field name.
  20. Scalar // Scalar are scalar values, e.g. "string", 47, ENUM_LITERAL, true.
  21. MessageOpen
  22. MessageClose
  23. ListOpen
  24. ListClose
  25. // comma and semi-colon are only for parsing in between values and should not be exposed.
  26. comma
  27. semicolon
  28. // bof indicates beginning of file, which is the default token
  29. // kind at the beginning of parsing.
  30. bof = Invalid
  31. )
  32. func (t Kind) String() string {
  33. switch t {
  34. case Invalid:
  35. return "<invalid>"
  36. case EOF:
  37. return "eof"
  38. case Scalar:
  39. return "scalar"
  40. case Name:
  41. return "name"
  42. case MessageOpen:
  43. return "{"
  44. case MessageClose:
  45. return "}"
  46. case ListOpen:
  47. return "["
  48. case ListClose:
  49. return "]"
  50. case comma:
  51. return ","
  52. case semicolon:
  53. return ";"
  54. default:
  55. return fmt.Sprintf("<invalid:%v>", uint8(t))
  56. }
  57. }
  58. // NameKind represents different types of field names.
  59. type NameKind uint8
  60. // NameKind values.
  61. const (
  62. IdentName NameKind = iota + 1
  63. TypeName
  64. FieldNumber
  65. )
  66. func (t NameKind) String() string {
  67. switch t {
  68. case IdentName:
  69. return "IdentName"
  70. case TypeName:
  71. return "TypeName"
  72. case FieldNumber:
  73. return "FieldNumber"
  74. default:
  75. return fmt.Sprintf("<invalid:%v>", uint8(t))
  76. }
  77. }
  78. // Bit mask in Token.attrs to indicate if a Name token is followed by the
  79. // separator char ':'. The field name separator char is optional for message
  80. // field or repeated message field, but required for all other types. Decoder
  81. // simply indicates whether a Name token is followed by separator or not. It is
  82. // up to the prototext package to validate.
  83. const hasSeparator = 1 << 7
  84. // Scalar value types.
  85. const (
  86. numberValue = iota + 1
  87. stringValue
  88. literalValue
  89. )
  90. // Bit mask in Token.numAttrs to indicate that the number is a negative.
  91. const isNegative = 1 << 7
  92. // Token provides a parsed token kind and value. Values are provided by the
  93. // different accessor methods.
  94. type Token struct {
  95. // Kind of the Token object.
  96. kind Kind
  97. // attrs contains metadata for the following Kinds:
  98. // Name: hasSeparator bit and one of NameKind.
  99. // Scalar: one of numberValue, stringValue, literalValue.
  100. attrs uint8
  101. // numAttrs contains metadata for numberValue:
  102. // - highest bit is whether negative or positive.
  103. // - lower bits indicate one of numDec, numHex, numOct, numFloat.
  104. numAttrs uint8
  105. // pos provides the position of the token in the original input.
  106. pos int
  107. // raw bytes of the serialized token.
  108. // This is a subslice into the original input.
  109. raw []byte
  110. // str contains parsed string for the following:
  111. // - stringValue of Scalar kind
  112. // - numberValue of Scalar kind
  113. // - TypeName of Name kind
  114. str string
  115. }
  116. // Kind returns the token kind.
  117. func (t Token) Kind() Kind {
  118. return t.kind
  119. }
  120. // RawString returns the read value in string.
  121. func (t Token) RawString() string {
  122. return string(t.raw)
  123. }
  124. // Pos returns the token position from the input.
  125. func (t Token) Pos() int {
  126. return t.pos
  127. }
  128. // NameKind returns IdentName, TypeName or FieldNumber.
  129. // It panics if type is not Name.
  130. func (t Token) NameKind() NameKind {
  131. if t.kind == Name {
  132. return NameKind(t.attrs &^ hasSeparator)
  133. }
  134. panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
  135. }
  136. // HasSeparator returns true if the field name is followed by the separator char
  137. // ':', else false. It panics if type is not Name.
  138. func (t Token) HasSeparator() bool {
  139. if t.kind == Name {
  140. return t.attrs&hasSeparator != 0
  141. }
  142. panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
  143. }
  144. // IdentName returns the value for IdentName type.
  145. func (t Token) IdentName() string {
  146. if t.kind == Name && t.attrs&uint8(IdentName) != 0 {
  147. return string(t.raw)
  148. }
  149. panic(fmt.Sprintf("Token is not an IdentName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
  150. }
  151. // TypeName returns the value for TypeName type.
  152. func (t Token) TypeName() string {
  153. if t.kind == Name && t.attrs&uint8(TypeName) != 0 {
  154. return t.str
  155. }
  156. panic(fmt.Sprintf("Token is not a TypeName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
  157. }
  158. // FieldNumber returns the value for FieldNumber type. It returns a
  159. // non-negative int32 value. Caller will still need to validate for the correct
  160. // field number range.
  161. func (t Token) FieldNumber() int32 {
  162. if t.kind != Name || t.attrs&uint8(FieldNumber) == 0 {
  163. panic(fmt.Sprintf("Token is not a FieldNumber: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
  164. }
  165. // Following should not return an error as it had already been called right
  166. // before this Token was constructed.
  167. num, _ := strconv.ParseInt(string(t.raw), 10, 32)
  168. return int32(num)
  169. }
  170. // String returns the string value for a Scalar type.
  171. func (t Token) String() (string, bool) {
  172. if t.kind != Scalar || t.attrs != stringValue {
  173. return "", false
  174. }
  175. return t.str, true
  176. }
  177. // Enum returns the literal value for a Scalar type for use as enum literals.
  178. func (t Token) Enum() (string, bool) {
  179. if t.kind != Scalar || t.attrs != literalValue || (len(t.raw) > 0 && t.raw[0] == '-') {
  180. return "", false
  181. }
  182. return string(t.raw), true
  183. }
  184. // Bool returns the bool value for a Scalar type.
  185. func (t Token) Bool() (bool, bool) {
  186. if t.kind != Scalar {
  187. return false, false
  188. }
  189. switch t.attrs {
  190. case literalValue:
  191. if b, ok := boolLits[string(t.raw)]; ok {
  192. return b, true
  193. }
  194. case numberValue:
  195. // Unsigned integer representation of 0 or 1 is permitted: 00, 0x0, 01,
  196. // 0x1, etc.
  197. n, err := strconv.ParseUint(t.str, 0, 64)
  198. if err == nil {
  199. switch n {
  200. case 0:
  201. return false, true
  202. case 1:
  203. return true, true
  204. }
  205. }
  206. }
  207. return false, false
  208. }
  209. // These exact boolean literals are the ones supported in C++.
  210. var boolLits = map[string]bool{
  211. "t": true,
  212. "true": true,
  213. "True": true,
  214. "f": false,
  215. "false": false,
  216. "False": false,
  217. }
  218. // Uint64 returns the uint64 value for a Scalar type.
  219. func (t Token) Uint64() (uint64, bool) {
  220. if t.kind != Scalar || t.attrs != numberValue ||
  221. t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
  222. return 0, false
  223. }
  224. n, err := strconv.ParseUint(t.str, 0, 64)
  225. if err != nil {
  226. return 0, false
  227. }
  228. return n, true
  229. }
  230. // Uint32 returns the uint32 value for a Scalar type.
  231. func (t Token) Uint32() (uint32, bool) {
  232. if t.kind != Scalar || t.attrs != numberValue ||
  233. t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
  234. return 0, false
  235. }
  236. n, err := strconv.ParseUint(t.str, 0, 32)
  237. if err != nil {
  238. return 0, false
  239. }
  240. return uint32(n), true
  241. }
  242. // Int64 returns the int64 value for a Scalar type.
  243. func (t Token) Int64() (int64, bool) {
  244. if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
  245. return 0, false
  246. }
  247. if n, err := strconv.ParseInt(t.str, 0, 64); err == nil {
  248. return n, true
  249. }
  250. // C++ accepts large positive hex numbers as negative values.
  251. // This feature is here for proto1 backwards compatibility purposes.
  252. if flags.ProtoLegacy && (t.numAttrs == numHex) {
  253. if n, err := strconv.ParseUint(t.str, 0, 64); err == nil {
  254. return int64(n), true
  255. }
  256. }
  257. return 0, false
  258. }
  259. // Int32 returns the int32 value for a Scalar type.
  260. func (t Token) Int32() (int32, bool) {
  261. if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
  262. return 0, false
  263. }
  264. if n, err := strconv.ParseInt(t.str, 0, 32); err == nil {
  265. return int32(n), true
  266. }
  267. // C++ accepts large positive hex numbers as negative values.
  268. // This feature is here for proto1 backwards compatibility purposes.
  269. if flags.ProtoLegacy && (t.numAttrs == numHex) {
  270. if n, err := strconv.ParseUint(t.str, 0, 32); err == nil {
  271. return int32(n), true
  272. }
  273. }
  274. return 0, false
  275. }
  276. // Float64 returns the float64 value for a Scalar type.
  277. func (t Token) Float64() (float64, bool) {
  278. if t.kind != Scalar {
  279. return 0, false
  280. }
  281. switch t.attrs {
  282. case literalValue:
  283. if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
  284. return f, true
  285. }
  286. case numberValue:
  287. n, err := strconv.ParseFloat(t.str, 64)
  288. if err == nil {
  289. return n, true
  290. }
  291. nerr := err.(*strconv.NumError)
  292. if nerr.Err == strconv.ErrRange {
  293. return n, true
  294. }
  295. }
  296. return 0, false
  297. }
  298. // Float32 returns the float32 value for a Scalar type.
  299. func (t Token) Float32() (float32, bool) {
  300. if t.kind != Scalar {
  301. return 0, false
  302. }
  303. switch t.attrs {
  304. case literalValue:
  305. if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
  306. return float32(f), true
  307. }
  308. case numberValue:
  309. n, err := strconv.ParseFloat(t.str, 64)
  310. if err == nil {
  311. // Overflows are treated as (-)infinity.
  312. return float32(n), true
  313. }
  314. nerr := err.(*strconv.NumError)
  315. if nerr.Err == strconv.ErrRange {
  316. return float32(n), true
  317. }
  318. }
  319. return 0, false
  320. }
  321. // These are the supported float literals which C++ permits case-insensitive
  322. // variants of these.
  323. var floatLits = map[string]float64{
  324. "nan": math.NaN(),
  325. "inf": math.Inf(1),
  326. "infinity": math.Inf(1),
  327. "-inf": math.Inf(-1),
  328. "-infinity": math.Inf(-1),
  329. }
  330. // TokenEquals returns true if given Tokens are equal, else false.
  331. func TokenEquals(x, y Token) bool {
  332. return x.kind == y.kind &&
  333. x.attrs == y.attrs &&
  334. x.numAttrs == y.numAttrs &&
  335. x.pos == y.pos &&
  336. bytes.Equal(x.raw, y.raw) &&
  337. x.str == y.str
  338. }