ascii.h 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. //
  2. // Copyright 2017 The Abseil Authors.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // https://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. // -----------------------------------------------------------------------------
  17. // File: ascii.h
  18. // -----------------------------------------------------------------------------
  19. //
  20. // This package contains functions operating on characters and strings
  21. // restricted to standard ASCII. These include character classification
  22. // functions analogous to those found in the ANSI C Standard Library <ctype.h>
  23. // header file.
  24. //
  25. // C++ implementations provide <ctype.h> functionality based on their
  26. // C environment locale. In general, reliance on such a locale is not ideal, as
  27. // the locale standard is problematic (and may not return invariant information
  28. // for the same character set, for example). These `ascii_*()` functions are
  29. // hard-wired for standard ASCII, much faster, and guaranteed to behave
  30. // consistently. They will never be overloaded, nor will their function
  31. // signature change.
  32. //
  33. // `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
  34. // `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
  35. // `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
  36. // `ascii_isxdigit()`
  37. // Analogous to the <ctype.h> functions with similar names, these
  38. // functions take an unsigned char and return a bool, based on whether the
  39. // character matches the condition specified.
  40. //
  41. // If the input character has a numerical value greater than 127, these
  42. // functions return `false`.
  43. //
  44. // `ascii_tolower()`, `ascii_toupper()`
  45. // Analogous to the <ctype.h> functions with similar names, these functions
  46. // take an unsigned char and return a char.
  47. //
  48. // If the input character is not an ASCII {lower,upper}-case letter (including
  49. // numerical values greater than 127) then the functions return the same value
  50. // as the input character.
  51. #ifndef ABSL_STRINGS_ASCII_H_
  52. #define ABSL_STRINGS_ASCII_H_
  53. #include <algorithm>
  54. #include <cstddef>
  55. #include <string>
  56. #include <utility>
  57. #include "absl/base/attributes.h"
  58. #include "absl/base/config.h"
  59. #include "absl/base/nullability.h"
  60. #include "absl/strings/internal/resize_uninitialized.h"
  61. #include "absl/strings/string_view.h"
  62. namespace absl {
  63. ABSL_NAMESPACE_BEGIN
  64. namespace ascii_internal {
  65. // Declaration for an array of bitfields holding character information.
  66. ABSL_DLL extern const unsigned char kPropertyBits[256];
  67. // Declaration for the array of characters to upper-case characters.
  68. ABSL_DLL extern const char kToUpper[256];
  69. // Declaration for the array of characters to lower-case characters.
  70. ABSL_DLL extern const char kToLower[256];
  71. void AsciiStrToLower(absl::Nonnull<char*> dst, absl::Nullable<const char*> src,
  72. size_t n);
  73. void AsciiStrToUpper(absl::Nonnull<char*> dst, absl::Nullable<const char*> src,
  74. size_t n);
  75. } // namespace ascii_internal
  76. // ascii_isalpha()
  77. //
  78. // Determines whether the given character is an alphabetic character.
  79. inline bool ascii_isalpha(unsigned char c) {
  80. return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
  81. }
  82. // ascii_isalnum()
  83. //
  84. // Determines whether the given character is an alphanumeric character.
  85. inline bool ascii_isalnum(unsigned char c) {
  86. return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
  87. }
  88. // ascii_isspace()
  89. //
  90. // Determines whether the given character is a whitespace character (space,
  91. // tab, vertical tab, formfeed, linefeed, or carriage return).
  92. inline bool ascii_isspace(unsigned char c) {
  93. return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
  94. }
  95. // ascii_ispunct()
  96. //
  97. // Determines whether the given character is a punctuation character.
  98. inline bool ascii_ispunct(unsigned char c) {
  99. return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
  100. }
  101. // ascii_isblank()
  102. //
  103. // Determines whether the given character is a blank character (tab or space).
  104. inline bool ascii_isblank(unsigned char c) {
  105. return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
  106. }
  107. // ascii_iscntrl()
  108. //
  109. // Determines whether the given character is a control character.
  110. inline bool ascii_iscntrl(unsigned char c) {
  111. return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
  112. }
  113. // ascii_isxdigit()
  114. //
  115. // Determines whether the given character can be represented as a hexadecimal
  116. // digit character (i.e. {0-9} or {A-F}).
  117. inline bool ascii_isxdigit(unsigned char c) {
  118. return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
  119. }
  120. // ascii_isdigit()
  121. //
  122. // Determines whether the given character can be represented as a decimal
  123. // digit character (i.e. {0-9}).
  124. inline constexpr bool ascii_isdigit(unsigned char c) {
  125. return c >= '0' && c <= '9';
  126. }
  127. // ascii_isprint()
  128. //
  129. // Determines whether the given character is printable, including spaces.
  130. inline constexpr bool ascii_isprint(unsigned char c) {
  131. return c >= 32 && c < 127;
  132. }
  133. // ascii_isgraph()
  134. //
  135. // Determines whether the given character has a graphical representation.
  136. inline constexpr bool ascii_isgraph(unsigned char c) {
  137. return c > 32 && c < 127;
  138. }
  139. // ascii_isupper()
  140. //
  141. // Determines whether the given character is uppercase.
  142. inline constexpr bool ascii_isupper(unsigned char c) {
  143. return c >= 'A' && c <= 'Z';
  144. }
  145. // ascii_islower()
  146. //
  147. // Determines whether the given character is lowercase.
  148. inline constexpr bool ascii_islower(unsigned char c) {
  149. return c >= 'a' && c <= 'z';
  150. }
  151. // ascii_isascii()
  152. //
  153. // Determines whether the given character is ASCII.
  154. inline constexpr bool ascii_isascii(unsigned char c) { return c < 128; }
  155. // ascii_tolower()
  156. //
  157. // Returns an ASCII character, converting to lowercase if uppercase is
  158. // passed. Note that character values > 127 are simply returned.
  159. inline char ascii_tolower(unsigned char c) {
  160. return ascii_internal::kToLower[c];
  161. }
  162. // Converts the characters in `s` to lowercase, changing the contents of `s`.
  163. void AsciiStrToLower(absl::Nonnull<std::string*> s);
  164. // Creates a lowercase string from a given absl::string_view.
  165. ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
  166. std::string result;
  167. strings_internal::STLStringResizeUninitialized(&result, s.size());
  168. ascii_internal::AsciiStrToLower(&result[0], s.data(), s.size());
  169. return result;
  170. }
  171. // Creates a lowercase string from a given std::string&&.
  172. //
  173. // (Template is used to lower priority of this overload.)
  174. template <int&... DoNotSpecify>
  175. ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(std::string&& s) {
  176. std::string result = std::move(s);
  177. absl::AsciiStrToLower(&result);
  178. return result;
  179. }
  180. // ascii_toupper()
  181. //
  182. // Returns the ASCII character, converting to upper-case if lower-case is
  183. // passed. Note that characters values > 127 are simply returned.
  184. inline char ascii_toupper(unsigned char c) {
  185. return ascii_internal::kToUpper[c];
  186. }
  187. // Converts the characters in `s` to uppercase, changing the contents of `s`.
  188. void AsciiStrToUpper(absl::Nonnull<std::string*> s);
  189. // Creates an uppercase string from a given absl::string_view.
  190. ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
  191. std::string result;
  192. strings_internal::STLStringResizeUninitialized(&result, s.size());
  193. ascii_internal::AsciiStrToUpper(&result[0], s.data(), s.size());
  194. return result;
  195. }
  196. // Creates an uppercase string from a given std::string&&.
  197. //
  198. // (Template is used to lower priority of this overload.)
  199. template <int&... DoNotSpecify>
  200. ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(std::string&& s) {
  201. std::string result = std::move(s);
  202. absl::AsciiStrToUpper(&result);
  203. return result;
  204. }
  205. // Returns absl::string_view with whitespace stripped from the beginning of the
  206. // given string_view.
  207. ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
  208. absl::string_view str) {
  209. auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
  210. return str.substr(static_cast<size_t>(it - str.begin()));
  211. }
  212. // Strips in place whitespace from the beginning of the given string.
  213. inline void StripLeadingAsciiWhitespace(absl::Nonnull<std::string*> str) {
  214. auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
  215. str->erase(str->begin(), it);
  216. }
  217. // Returns absl::string_view with whitespace stripped from the end of the given
  218. // string_view.
  219. ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
  220. absl::string_view str) {
  221. auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
  222. return str.substr(0, static_cast<size_t>(str.rend() - it));
  223. }
  224. // Strips in place whitespace from the end of the given string
  225. inline void StripTrailingAsciiWhitespace(absl::Nonnull<std::string*> str) {
  226. auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
  227. str->erase(static_cast<size_t>(str->rend() - it));
  228. }
  229. // Returns absl::string_view with whitespace stripped from both ends of the
  230. // given string_view.
  231. ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
  232. absl::string_view str) {
  233. return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
  234. }
  235. // Strips in place whitespace from both ends of the given string
  236. inline void StripAsciiWhitespace(absl::Nonnull<std::string*> str) {
  237. StripTrailingAsciiWhitespace(str);
  238. StripLeadingAsciiWhitespace(str);
  239. }
  240. // Removes leading, trailing, and consecutive internal whitespace.
  241. void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str);
  242. ABSL_NAMESPACE_END
  243. } // namespace absl
  244. #endif // ABSL_STRINGS_ASCII_H_