ascii.h 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. #pragma once
  2. #include <util/system/defaults.h>
  3. #include <util/system/compat.h>
  4. #include <util/generic/string.h>
  5. // ctype.h-like functions, locale-independent:
  6. // IsAscii{Upper,Lower,Digit,Alpha,Alnum,Space} and
  7. // AsciiTo{Upper,Lower}
  8. //
  9. // standard functions from <ctype.h> are locale dependent,
  10. // and cause undefined behavior when called on chars outside [0..127] range
  11. namespace NPrivate {
  12. enum ECharClass {
  13. CC_SPACE = 1,
  14. CC_UPPER = 2,
  15. CC_LOWER = 4,
  16. CC_DIGIT = 8,
  17. CC_ALPHA = 16,
  18. CC_ALNUM = 32,
  19. CC_ISHEX = 64,
  20. CC_PUNCT = 128,
  21. };
  22. extern const unsigned char ASCII_CLASS[256];
  23. extern const unsigned char ASCII_LOWER[256];
  24. template <class T>
  25. struct TDereference {
  26. using type = T;
  27. };
  28. #ifndef TSTRING_IS_STD_STRING
  29. template <class String>
  30. struct TDereference<TBasicCharRef<String>> {
  31. using type = typename String::value_type;
  32. };
  33. #endif
  34. template <class T>
  35. using TDereferenced = typename TDereference<T>::type;
  36. template <class T>
  37. bool RangeOk(T c) noexcept {
  38. static_assert(std::is_integral<T>::value, "Integral type character expected");
  39. if (sizeof(T) == 1) {
  40. return true;
  41. }
  42. return c >= static_cast<T>(0) && c <= static_cast<T>(127);
  43. }
  44. #ifndef TSTRING_IS_STD_STRING
  45. template <class String>
  46. bool RangeOk(const TBasicCharRef<String>& c) {
  47. return RangeOk(static_cast<typename String::value_type>(c));
  48. }
  49. #endif
  50. } // namespace NPrivate
  51. constexpr bool IsAscii(const int c) noexcept {
  52. return !(c & ~0x7f);
  53. }
  54. inline bool IsAsciiSpace(unsigned char c) {
  55. return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_SPACE;
  56. }
  57. inline bool IsAsciiUpper(unsigned char c) {
  58. return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_UPPER;
  59. }
  60. inline bool IsAsciiLower(unsigned char c) {
  61. return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_LOWER;
  62. }
  63. inline bool IsAsciiDigit(unsigned char c) {
  64. return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_DIGIT;
  65. }
  66. inline bool IsAsciiAlpha(unsigned char c) {
  67. return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALPHA;
  68. }
  69. inline bool IsAsciiAlnum(unsigned char c) {
  70. return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALNUM;
  71. }
  72. inline bool IsAsciiHex(unsigned char c) {
  73. return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ISHEX;
  74. }
  75. inline bool IsAsciiPunct(unsigned char c) {
  76. return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_PUNCT;
  77. }
  78. // some overloads
  79. template <class T>
  80. inline bool IsAsciiSpace(T c) {
  81. return ::NPrivate::RangeOk(c) && IsAsciiSpace(static_cast<unsigned char>(c));
  82. }
  83. template <class T>
  84. inline bool IsAsciiUpper(T c) {
  85. return ::NPrivate::RangeOk(c) && IsAsciiUpper(static_cast<unsigned char>(c));
  86. }
  87. template <class T>
  88. inline bool IsAsciiLower(T c) {
  89. return ::NPrivate::RangeOk(c) && IsAsciiLower(static_cast<unsigned char>(c));
  90. }
  91. template <class T>
  92. inline bool IsAsciiDigit(T c) {
  93. return ::NPrivate::RangeOk(c) && IsAsciiDigit(static_cast<unsigned char>(c));
  94. }
  95. template <class T>
  96. inline bool IsAsciiAlpha(T c) {
  97. return ::NPrivate::RangeOk(c) && IsAsciiAlpha(static_cast<unsigned char>(c));
  98. }
  99. template <class T>
  100. inline bool IsAsciiAlnum(T c) {
  101. return ::NPrivate::RangeOk(c) && IsAsciiAlnum(static_cast<unsigned char>(c));
  102. }
  103. template <class T>
  104. inline bool IsAsciiHex(T c) {
  105. return ::NPrivate::RangeOk(c) && IsAsciiHex(static_cast<unsigned char>(c));
  106. }
  107. template <class T>
  108. inline bool IsAsciiPunct(T c) {
  109. return ::NPrivate::RangeOk(c) && IsAsciiPunct(static_cast<unsigned char>(c));
  110. }
  111. // some extra helpers
  112. inline ui8 AsciiToLower(ui8 c) noexcept {
  113. return ::NPrivate::ASCII_LOWER[c];
  114. }
  115. inline char AsciiToLower(char c) noexcept {
  116. return (char)AsciiToLower((ui8)c);
  117. }
  118. template <class T>
  119. inline ::NPrivate::TDereferenced<T> AsciiToLower(T c) noexcept {
  120. return (c >= 0 && c <= 127) ? (::NPrivate::TDereferenced<T>)AsciiToLower((ui8)c) : c;
  121. }
  122. template <class T>
  123. inline ::NPrivate::TDereferenced<T> AsciiToUpper(T c) noexcept {
  124. return IsAsciiLower(c) ? (c + ('A' - 'a')) : c;
  125. }
  126. /**
  127. * ASCII case-insensitive string comparison (for proper UTF8 strings
  128. * case-insensitive comparison consider using @c library/cpp/charset).
  129. *
  130. * BUGS: Currently will NOT work properly with strings that contain
  131. * 0-terminator character inside. See IGNIETFERRO-1641 for details.
  132. *
  133. * @return true iff @c s1 ans @c s2 are case-insensitively equal.
  134. */
  135. static inline bool AsciiEqualsIgnoreCase(const char* s1, const char* s2) noexcept {
  136. return ::stricmp(s1, s2) == 0;
  137. }
  138. /**
  139. * ASCII case-insensitive string comparison (for proper UTF8 strings
  140. * case-insensitive comparison consider using @c library/cpp/charset).
  141. *
  142. * BUGS: Currently will NOT work properly with strings that contain
  143. * 0-terminator character inside. See IGNIETFERRO-1641 for details.
  144. *
  145. * @return true iff @c s1 ans @c s2 are case-insensitively equal.
  146. */
  147. static inline bool AsciiEqualsIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
  148. if (s1.size() != s2.size()) {
  149. return false;
  150. }
  151. if (s1.empty()) {
  152. return true;
  153. }
  154. return ::strnicmp(s1.data(), s2.data(), s1.size()) == 0;
  155. }
  156. /**
  157. * ASCII case-insensitive string comparison (for proper UTF8 strings
  158. * case-insensitive comparison consider using @c library/cpp/charset).
  159. *
  160. * BUGS: Currently will NOT work properly with strings that contain
  161. * 0-terminator character inside. See IGNIETFERRO-1641 for details.
  162. *
  163. * @return 0 if strings are equal, negative if @c s1 < @c s2
  164. * and positive otherwise.
  165. * (same value as @c stricmp does).
  166. */
  167. static inline int AsciiCompareIgnoreCase(const char* s1, const char* s2) noexcept {
  168. return ::stricmp(s1, s2);
  169. }
  170. /**
  171. * ASCII case-insensitive string comparison (for proper UTF8 strings
  172. * case-insensitive comparison consider using @c library/cpp/charset).
  173. *
  174. * BUGS: Currently will NOT work properly with strings that contain
  175. * 0-terminator character inside. See IGNIETFERRO-1641 for details.
  176. *
  177. * @return
  178. * - zero if strings are equal
  179. * - negative if @c s1 < @c s2
  180. * - positive otherwise,
  181. * similar to stricmp.
  182. */
  183. Y_PURE_FUNCTION int AsciiCompareIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept;
  184. /**
  185. * ASCII case-sensitive string comparison (for proper UTF8 strings
  186. * case-sensitive comparison consider using @c library/cpp/charset).
  187. *
  188. * BUGS: Currently will NOT work properly with strings that contain
  189. * 0-terminator character inside. See IGNIETFERRO-1641 for details.
  190. *
  191. * @return true iff @c s2 are case-sensitively prefix of @c s1.
  192. */
  193. static inline bool AsciiHasPrefix(const TStringBuf s1, const TStringBuf s2) noexcept {
  194. return (s1.size() >= s2.size()) && memcmp(s1.data(), s2.data(), s2.size()) == 0;
  195. }
  196. /**
  197. * ASCII case-insensitive string comparison (for proper UTF8 strings
  198. * case-insensitive comparison consider using @c library/cpp/charset).
  199. *
  200. * @return true iff @c s2 are case-insensitively prefix of @c s1.
  201. */
  202. static inline bool AsciiHasPrefixIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
  203. return (s1.size() >= s2.size()) && ::strnicmp(s1.data(), s2.data(), s2.size()) == 0;
  204. }
  205. /**
  206. * ASCII case-insensitive string comparison (for proper UTF8 strings
  207. * case-insensitive comparison consider using @c library/cpp/charset).
  208. *
  209. * @return true iff @c s2 are case-insensitively suffix of @c s1.
  210. */
  211. static inline bool AsciiHasSuffixIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
  212. return (s1.size() >= s2.size()) && ::strnicmp((s1.data() + (s1.size() - s2.size())), s2.data(), s2.size()) == 0;
  213. }