strings.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. // Copyright 2019 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package strs provides string manipulation functionality specific to protobuf.
  5. package strs
  6. import (
  7. "go/token"
  8. "strings"
  9. "unicode"
  10. "unicode/utf8"
  11. "google.golang.org/protobuf/internal/flags"
  12. "google.golang.org/protobuf/reflect/protoreflect"
  13. )
  14. // EnforceUTF8 reports whether to enforce strict UTF-8 validation.
  15. func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
  16. if flags.ProtoLegacy || fd.Syntax() == protoreflect.Editions {
  17. if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
  18. return fd.EnforceUTF8()
  19. }
  20. }
  21. return fd.Syntax() == protoreflect.Proto3
  22. }
  23. // GoCamelCase camel-cases a protobuf name for use as a Go identifier.
  24. //
  25. // If there is an interior underscore followed by a lower case letter,
  26. // drop the underscore and convert the letter to upper case.
  27. func GoCamelCase(s string) string {
  28. // Invariant: if the next letter is lower case, it must be converted
  29. // to upper case.
  30. // That is, we process a word at a time, where words are marked by _ or
  31. // upper case letter. Digits are treated as words.
  32. var b []byte
  33. for i := 0; i < len(s); i++ {
  34. c := s[i]
  35. switch {
  36. case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
  37. // Skip over '.' in ".{{lowercase}}".
  38. case c == '.':
  39. b = append(b, '_') // convert '.' to '_'
  40. case c == '_' && (i == 0 || s[i-1] == '.'):
  41. // Convert initial '_' to ensure we start with a capital letter.
  42. // Do the same for '_' after '.' to match historic behavior.
  43. b = append(b, 'X') // convert '_' to 'X'
  44. case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
  45. // Skip over '_' in "_{{lowercase}}".
  46. case isASCIIDigit(c):
  47. b = append(b, c)
  48. default:
  49. // Assume we have a letter now - if not, it's a bogus identifier.
  50. // The next word is a sequence of characters that must start upper case.
  51. if isASCIILower(c) {
  52. c -= 'a' - 'A' // convert lowercase to uppercase
  53. }
  54. b = append(b, c)
  55. // Accept lower case sequence that follows.
  56. for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
  57. b = append(b, s[i+1])
  58. }
  59. }
  60. }
  61. return string(b)
  62. }
  63. // GoSanitized converts a string to a valid Go identifier.
  64. func GoSanitized(s string) string {
  65. // Sanitize the input to the set of valid characters,
  66. // which must be '_' or be in the Unicode L or N categories.
  67. s = strings.Map(func(r rune) rune {
  68. if unicode.IsLetter(r) || unicode.IsDigit(r) {
  69. return r
  70. }
  71. return '_'
  72. }, s)
  73. // Prepend '_' in the event of a Go keyword conflict or if
  74. // the identifier is invalid (does not start in the Unicode L category).
  75. r, _ := utf8.DecodeRuneInString(s)
  76. if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
  77. return "_" + s
  78. }
  79. return s
  80. }
  81. // JSONCamelCase converts a snake_case identifier to a camelCase identifier,
  82. // according to the protobuf JSON specification.
  83. func JSONCamelCase(s string) string {
  84. var b []byte
  85. var wasUnderscore bool
  86. for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
  87. c := s[i]
  88. if c != '_' {
  89. if wasUnderscore && isASCIILower(c) {
  90. c -= 'a' - 'A' // convert to uppercase
  91. }
  92. b = append(b, c)
  93. }
  94. wasUnderscore = c == '_'
  95. }
  96. return string(b)
  97. }
  98. // JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
  99. // according to the protobuf JSON specification.
  100. func JSONSnakeCase(s string) string {
  101. var b []byte
  102. for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
  103. c := s[i]
  104. if isASCIIUpper(c) {
  105. b = append(b, '_')
  106. c += 'a' - 'A' // convert to lowercase
  107. }
  108. b = append(b, c)
  109. }
  110. return string(b)
  111. }
  112. // MapEntryName derives the name of the map entry message given the field name.
  113. // See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
  114. func MapEntryName(s string) string {
  115. var b []byte
  116. upperNext := true
  117. for _, c := range s {
  118. switch {
  119. case c == '_':
  120. upperNext = true
  121. case upperNext:
  122. b = append(b, byte(unicode.ToUpper(c)))
  123. upperNext = false
  124. default:
  125. b = append(b, byte(c))
  126. }
  127. }
  128. b = append(b, "Entry"...)
  129. return string(b)
  130. }
  131. // EnumValueName derives the camel-cased enum value name.
  132. // See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
  133. func EnumValueName(s string) string {
  134. var b []byte
  135. upperNext := true
  136. for _, c := range s {
  137. switch {
  138. case c == '_':
  139. upperNext = true
  140. case upperNext:
  141. b = append(b, byte(unicode.ToUpper(c)))
  142. upperNext = false
  143. default:
  144. b = append(b, byte(unicode.ToLower(c)))
  145. upperNext = false
  146. }
  147. }
  148. return string(b)
  149. }
  150. // TrimEnumPrefix trims the enum name prefix from an enum value name,
  151. // where the prefix is all lowercase without underscores.
  152. // See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
  153. func TrimEnumPrefix(s, prefix string) string {
  154. s0 := s // original input
  155. for len(s) > 0 && len(prefix) > 0 {
  156. if s[0] == '_' {
  157. s = s[1:]
  158. continue
  159. }
  160. if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
  161. return s0 // no prefix match
  162. }
  163. s, prefix = s[1:], prefix[1:]
  164. }
  165. if len(prefix) > 0 {
  166. return s0 // no prefix match
  167. }
  168. s = strings.TrimLeft(s, "_")
  169. if len(s) == 0 {
  170. return s0 // avoid returning empty string
  171. }
  172. return s
  173. }
  174. func isASCIILower(c byte) bool {
  175. return 'a' <= c && c <= 'z'
  176. }
  177. func isASCIIUpper(c byte) bool {
  178. return 'A' <= c && c <= 'Z'
  179. }
  180. func isASCIIDigit(c byte) bool {
  181. return '0' <= c && c <= '9'
  182. }