|
- #pragma once
- #include <util/system/defaults.h>
- #include <util/system/compat.h>
- #include <util/generic/string.h>
- // ctype.h-like functions, locale-independent:
- // IsAscii{Upper,Lower,Digit,Alpha,Alnum,Space} and
- // AsciiTo{Upper,Lower}
- //
- // standard functions from <ctype.h> are locale dependent,
- // and cause undefined behavior when called on chars outside [0..127] range
- namespace NPrivate {
- enum ECharClass {
- CC_SPACE = 1,
- CC_UPPER = 2,
- CC_LOWER = 4,
- CC_DIGIT = 8,
- CC_ALPHA = 16,
- CC_ALNUM = 32,
- CC_ISHEX = 64,
- CC_PUNCT = 128,
- };
- extern const unsigned char ASCII_CLASS[256];
- extern const unsigned char ASCII_LOWER[256];
- template <class T>
- struct TDereference {
- using type = T;
- };
- #ifndef TSTRING_IS_STD_STRING
- template <class String>
- struct TDereference<TBasicCharRef<String>> {
- using type = typename String::value_type;
- };
- #endif
- template <class T>
- using TDereferenced = typename TDereference<T>::type;
- template <class T>
- bool RangeOk(T c) noexcept {
- static_assert(std::is_integral<T>::value, "Integral type character expected");
- if (sizeof(T) == 1) {
- return true;
- }
- return c >= static_cast<T>(0) && c <= static_cast<T>(127);
- }
- #ifndef TSTRING_IS_STD_STRING
- template <class String>
- bool RangeOk(const TBasicCharRef<String>& c) {
- return RangeOk(static_cast<typename String::value_type>(c));
- }
- #endif
- } // namespace NPrivate
- constexpr bool IsAscii(const int c) noexcept {
- return !(c & ~0x7f);
- }
- inline bool IsAsciiSpace(unsigned char c) {
- return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_SPACE;
- }
- inline bool IsAsciiUpper(unsigned char c) {
- return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_UPPER;
- }
- inline bool IsAsciiLower(unsigned char c) {
- return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_LOWER;
- }
- inline bool IsAsciiDigit(unsigned char c) {
- return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_DIGIT;
- }
- inline bool IsAsciiAlpha(unsigned char c) {
- return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALPHA;
- }
- inline bool IsAsciiAlnum(unsigned char c) {
- return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALNUM;
- }
- inline bool IsAsciiHex(unsigned char c) {
- return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ISHEX;
- }
- inline bool IsAsciiPunct(unsigned char c) {
- return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_PUNCT;
- }
- // some overloads
- template <class T>
- inline bool IsAsciiSpace(T c) {
- return ::NPrivate::RangeOk(c) && IsAsciiSpace(static_cast<unsigned char>(c));
- }
- template <class T>
- inline bool IsAsciiUpper(T c) {
- return ::NPrivate::RangeOk(c) && IsAsciiUpper(static_cast<unsigned char>(c));
- }
- template <class T>
- inline bool IsAsciiLower(T c) {
- return ::NPrivate::RangeOk(c) && IsAsciiLower(static_cast<unsigned char>(c));
- }
- template <class T>
- inline bool IsAsciiDigit(T c) {
- return ::NPrivate::RangeOk(c) && IsAsciiDigit(static_cast<unsigned char>(c));
- }
- template <class T>
- inline bool IsAsciiAlpha(T c) {
- return ::NPrivate::RangeOk(c) && IsAsciiAlpha(static_cast<unsigned char>(c));
- }
- template <class T>
- inline bool IsAsciiAlnum(T c) {
- return ::NPrivate::RangeOk(c) && IsAsciiAlnum(static_cast<unsigned char>(c));
- }
- template <class T>
- inline bool IsAsciiHex(T c) {
- return ::NPrivate::RangeOk(c) && IsAsciiHex(static_cast<unsigned char>(c));
- }
- template <class T>
- inline bool IsAsciiPunct(T c) {
- return ::NPrivate::RangeOk(c) && IsAsciiPunct(static_cast<unsigned char>(c));
- }
- // some extra helpers
- inline ui8 AsciiToLower(ui8 c) noexcept {
- return ::NPrivate::ASCII_LOWER[c];
- }
- inline char AsciiToLower(char c) noexcept {
- return (char)AsciiToLower((ui8)c);
- }
- template <class T>
- inline ::NPrivate::TDereferenced<T> AsciiToLower(T c) noexcept {
- return (c >= 0 && c <= 127) ? (::NPrivate::TDereferenced<T>)AsciiToLower((ui8)c) : c;
- }
- template <class T>
- inline ::NPrivate::TDereferenced<T> AsciiToUpper(T c) noexcept {
- return IsAsciiLower(c) ? (c + ('A' - 'a')) : c;
- }
- /**
- * ASCII case-insensitive string comparison (for proper UTF8 strings
- * case-insensitive comparison consider using @c library/cpp/charset).
- *
- * BUGS: Currently will NOT work properly with strings that contain
- * 0-terminator character inside. See IGNIETFERRO-1641 for details.
- *
- * @return true iff @c s1 ans @c s2 are case-insensitively equal.
- */
- static inline bool AsciiEqualsIgnoreCase(const char* s1, const char* s2) noexcept {
- return ::stricmp(s1, s2) == 0;
- }
- /**
- * ASCII case-insensitive string comparison (for proper UTF8 strings
- * case-insensitive comparison consider using @c library/cpp/charset).
- *
- * BUGS: Currently will NOT work properly with strings that contain
- * 0-terminator character inside. See IGNIETFERRO-1641 for details.
- *
- * @return true iff @c s1 ans @c s2 are case-insensitively equal.
- */
- static inline bool AsciiEqualsIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
- if (s1.size() != s2.size()) {
- return false;
- }
- if (s1.empty()) {
- return true;
- }
- return ::strnicmp(s1.data(), s2.data(), s1.size()) == 0;
- }
- /**
- * ASCII case-insensitive string comparison (for proper UTF8 strings
- * case-insensitive comparison consider using @c library/cpp/charset).
- *
- * BUGS: Currently will NOT work properly with strings that contain
- * 0-terminator character inside. See IGNIETFERRO-1641 for details.
- *
- * @return 0 if strings are equal, negative if @c s1 < @c s2
- * and positive otherwise.
- * (same value as @c stricmp does).
- */
- static inline int AsciiCompareIgnoreCase(const char* s1, const char* s2) noexcept {
- return ::stricmp(s1, s2);
- }
- /**
- * ASCII case-insensitive string comparison (for proper UTF8 strings
- * case-insensitive comparison consider using @c library/cpp/charset).
- *
- * BUGS: Currently will NOT work properly with strings that contain
- * 0-terminator character inside. See IGNIETFERRO-1641 for details.
- *
- * @return
- * - zero if strings are equal
- * - negative if @c s1 < @c s2
- * - positive otherwise,
- * similar to stricmp.
- */
- Y_PURE_FUNCTION int AsciiCompareIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept;
- /**
- * ASCII case-sensitive string comparison (for proper UTF8 strings
- * case-sensitive comparison consider using @c library/cpp/charset).
- *
- * BUGS: Currently will NOT work properly with strings that contain
- * 0-terminator character inside. See IGNIETFERRO-1641 for details.
- *
- * @return true iff @c s2 are case-sensitively prefix of @c s1.
- */
- static inline bool AsciiHasPrefix(const TStringBuf s1, const TStringBuf s2) noexcept {
- return (s1.size() >= s2.size()) && memcmp(s1.data(), s2.data(), s2.size()) == 0;
- }
- /**
- * ASCII case-insensitive string comparison (for proper UTF8 strings
- * case-insensitive comparison consider using @c library/cpp/charset).
- *
- * @return true iff @c s2 are case-insensitively prefix of @c s1.
- */
- static inline bool AsciiHasPrefixIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
- return (s1.size() >= s2.size()) && ::strnicmp(s1.data(), s2.data(), s2.size()) == 0;
- }
- /**
- * ASCII case-insensitive string comparison (for proper UTF8 strings
- * case-insensitive comparison consider using @c library/cpp/charset).
- *
- * @return true iff @c s2 are case-insensitively suffix of @c s1.
- */
- static inline bool AsciiHasSuffixIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
- return (s1.size() >= s2.size()) && ::strnicmp((s1.data() + (s1.size() - s2.size())), s2.data(), s2.size()) == 0;
- }
|