string.cpp 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. #include "string.h"
  2. #include "format.h"
  3. #include <library/cpp/yt/assert/assert.h>
  4. #include <library/cpp/yt/exception/exception.h>
  5. #include <util/generic/hash.h>
  6. #include <util/string/ascii.h>
  7. namespace NYT {
  8. ////////////////////////////////////////////////////////////////////////////////
  9. void UnderscoreCaseToCamelCase(TStringBuilderBase* builder, TStringBuf str)
  10. {
  11. bool first = true;
  12. bool upper = true;
  13. for (char c : str) {
  14. if (c == '_') {
  15. upper = true;
  16. } else {
  17. if (upper) {
  18. if (!std::isalpha(c) && !first) {
  19. builder->AppendChar('_');
  20. }
  21. c = std::toupper(c);
  22. }
  23. builder->AppendChar(c);
  24. upper = false;
  25. }
  26. first = false;
  27. }
  28. }
  29. TString UnderscoreCaseToCamelCase(TStringBuf str)
  30. {
  31. TStringBuilder builder;
  32. UnderscoreCaseToCamelCase(&builder, str);
  33. return builder.Flush();
  34. }
  35. void CamelCaseToUnderscoreCase(TStringBuilderBase* builder, TStringBuf str)
  36. {
  37. bool first = true;
  38. for (char c : str) {
  39. if (std::isupper(c) && std::isalpha(c)) {
  40. if (!first) {
  41. builder->AppendChar('_');
  42. }
  43. c = std::tolower(c);
  44. }
  45. builder->AppendChar(c);
  46. first = false;
  47. }
  48. }
  49. TString CamelCaseToUnderscoreCase(TStringBuf str)
  50. {
  51. TStringBuilder builder;
  52. CamelCaseToUnderscoreCase(&builder, str);
  53. return builder.Flush();
  54. }
  55. ////////////////////////////////////////////////////////////////////////////////
  56. TString TrimLeadingWhitespaces(const TString& str)
  57. {
  58. for (int i = 0; i < static_cast<int>(str.size()); ++i) {
  59. if (str[i] != ' ') {
  60. return str.substr(i);
  61. }
  62. }
  63. return "";
  64. }
  65. TString Trim(const TString& str, const TString& whitespaces)
  66. {
  67. size_t end = str.size();
  68. while (end > 0) {
  69. size_t i = end - 1;
  70. bool isWhitespace = false;
  71. for (auto c : whitespaces) {
  72. if (str[i] == c) {
  73. isWhitespace = true;
  74. break;
  75. }
  76. }
  77. if (!isWhitespace) {
  78. break;
  79. }
  80. --end;
  81. }
  82. if (end == 0) {
  83. return "";
  84. }
  85. size_t begin = str.find_first_not_of(whitespaces);
  86. YT_VERIFY(begin != TString::npos);
  87. YT_VERIFY(begin < end);
  88. return str.substr(begin, end - begin);
  89. }
  90. ////////////////////////////////////////////////////////////////////////////////
  91. namespace {
  92. const ui16 DecimalDigits2[100] = {
  93. 12336, 12592, 12848, 13104, 13360, 13616, 13872, 14128, 14384, 14640,
  94. 12337, 12593, 12849, 13105, 13361, 13617, 13873, 14129, 14385, 14641,
  95. 12338, 12594, 12850, 13106, 13362, 13618, 13874, 14130, 14386, 14642,
  96. 12339, 12595, 12851, 13107, 13363, 13619, 13875, 14131, 14387, 14643,
  97. 12340, 12596, 12852, 13108, 13364, 13620, 13876, 14132, 14388, 14644,
  98. 12341, 12597, 12853, 13109, 13365, 13621, 13877, 14133, 14389, 14645,
  99. 12342, 12598, 12854, 13110, 13366, 13622, 13878, 14134, 14390, 14646,
  100. 12343, 12599, 12855, 13111, 13367, 13623, 13879, 14135, 14391, 14647,
  101. 12344, 12600, 12856, 13112, 13368, 13624, 13880, 14136, 14392, 14648,
  102. 12345, 12601, 12857, 13113, 13369, 13625, 13881, 14137, 14393, 14649
  103. };
  104. template <class T>
  105. char* WriteSignedDecIntToBufferBackwardsImpl(char* ptr, T value, TStringBuf min)
  106. {
  107. if (value == 0) {
  108. --ptr;
  109. *ptr = '0';
  110. return ptr;
  111. }
  112. // The negative value handling code below works incorrectly for min values.
  113. if (value == std::numeric_limits<T>::min()) {
  114. ptr -= min.length();
  115. ::memcpy(ptr, min.begin(), min.length());
  116. return ptr;
  117. }
  118. bool negative = false;
  119. if (value < 0) {
  120. negative = true;
  121. value = -value;
  122. }
  123. while (value >= 10) {
  124. auto rem = value % 100;
  125. auto quot = value / 100;
  126. ptr -= 2;
  127. ::memcpy(ptr, &DecimalDigits2[rem], 2);
  128. value = quot;
  129. }
  130. if (value > 0) {
  131. --ptr;
  132. *ptr = ('0' + value);
  133. }
  134. if (negative) {
  135. --ptr;
  136. *ptr = '-';
  137. }
  138. return ptr;
  139. }
  140. template <class T>
  141. char* WriteUnsignedDecIntToBufferBackwardsImpl(char* ptr, T value)
  142. {
  143. if (value == 0) {
  144. --ptr;
  145. *ptr = '0';
  146. return ptr;
  147. }
  148. while (value >= 10) {
  149. auto rem = value % 100;
  150. auto quot = value / 100;
  151. ptr -= 2;
  152. ::memcpy(ptr, &DecimalDigits2[rem], 2);
  153. value = quot;
  154. }
  155. if (value > 0) {
  156. --ptr;
  157. *ptr = ('0' + value);
  158. }
  159. return ptr;
  160. }
  161. } // namespace
  162. template <>
  163. char* WriteDecIntToBufferBackwards(char* ptr, i32 value)
  164. {
  165. return WriteSignedDecIntToBufferBackwardsImpl(ptr, value, TStringBuf("-2147483647"));
  166. }
  167. template <>
  168. char* WriteDecIntToBufferBackwards(char* ptr, i64 value)
  169. {
  170. return WriteSignedDecIntToBufferBackwardsImpl(ptr, value, TStringBuf("-9223372036854775808"));
  171. }
  172. template <>
  173. char* WriteDecIntToBufferBackwards(char* ptr, ui32 value)
  174. {
  175. return WriteUnsignedDecIntToBufferBackwardsImpl(ptr, value);
  176. }
  177. template <>
  178. char* WriteDecIntToBufferBackwards(char* ptr, ui64 value)
  179. {
  180. return WriteUnsignedDecIntToBufferBackwardsImpl(ptr, value);
  181. }
  182. ////////////////////////////////////////////////////////////////////////////////
  183. namespace {
  184. template <class T>
  185. char* WriteSignedHexIntToBufferBackwardsImpl(char* ptr, T value, bool uppercase, TStringBuf min)
  186. {
  187. if (value == 0) {
  188. --ptr;
  189. *ptr = '0';
  190. return ptr;
  191. }
  192. // The negative value handling code below works incorrectly for min values.
  193. if (value == std::numeric_limits<T>::min()) {
  194. ptr -= min.length();
  195. ::memcpy(ptr, min.begin(), min.length());
  196. return ptr;
  197. }
  198. bool negative = false;
  199. if (value < 0) {
  200. negative = true;
  201. value = -value;
  202. }
  203. while (value != 0) {
  204. auto rem = value & 0xf;
  205. auto quot = value >> 4;
  206. --ptr;
  207. *ptr = uppercase ? IntToHexUppercase[rem] : IntToHexLowercase[rem];
  208. value = quot;
  209. }
  210. if (negative) {
  211. --ptr;
  212. *ptr = '-';
  213. }
  214. return ptr;
  215. }
  216. template <class T>
  217. char* WriteUnsignedHexIntToBufferBackwardsImpl(char* ptr, T value, bool uppercase)
  218. {
  219. if (value == 0) {
  220. --ptr;
  221. *ptr = '0';
  222. return ptr;
  223. }
  224. while (value != 0) {
  225. auto rem = value & 0xf;
  226. auto quot = value >> 4;
  227. --ptr;
  228. *ptr = uppercase ? IntToHexUppercase[rem] : IntToHexLowercase[rem];
  229. value = quot;
  230. }
  231. return ptr;
  232. }
  233. } // namespace
  234. template <>
  235. char* WriteHexIntToBufferBackwards(char* ptr, i32 value, bool uppercase)
  236. {
  237. return WriteSignedHexIntToBufferBackwardsImpl(ptr, value, uppercase, TStringBuf("-80000000"));
  238. }
  239. template <>
  240. char* WriteHexIntToBufferBackwards(char* ptr, i64 value, bool uppercase)
  241. {
  242. return WriteSignedHexIntToBufferBackwardsImpl(ptr, value, uppercase, TStringBuf("-8000000000000000"));
  243. }
  244. template <>
  245. char* WriteHexIntToBufferBackwards(char* ptr, ui32 value, bool uppercase)
  246. {
  247. return WriteUnsignedHexIntToBufferBackwardsImpl(ptr, value, uppercase);
  248. }
  249. template <>
  250. char* WriteHexIntToBufferBackwards(char* ptr, ui64 value, bool uppercase)
  251. {
  252. return WriteUnsignedHexIntToBufferBackwardsImpl(ptr, value, uppercase);
  253. }
  254. ////////////////////////////////////////////////////////////////////////////////
  255. size_t TCaseInsensitiveStringHasher::operator()(TStringBuf arg) const
  256. {
  257. auto compute = [&] (char* buffer) {
  258. for (size_t index = 0; index < arg.length(); ++index) {
  259. buffer[index] = AsciiToLower(arg[index]);
  260. }
  261. return ComputeHash(TStringBuf(buffer, arg.length()));
  262. };
  263. const size_t SmallSize = 256;
  264. if (arg.length() <= SmallSize) {
  265. std::array<char, SmallSize> stackBuffer;
  266. return compute(stackBuffer.data());
  267. } else {
  268. std::unique_ptr<char[]> heapBuffer(new char[arg.length()]);
  269. return compute(heapBuffer.get());
  270. }
  271. }
  272. bool TCaseInsensitiveStringEqualityComparer::operator()(TStringBuf lhs, TStringBuf rhs) const
  273. {
  274. return AsciiEqualsIgnoreCase(lhs, rhs);
  275. }
  276. ////////////////////////////////////////////////////////////////////////////////
  277. bool TryParseBool(TStringBuf value, bool* result)
  278. {
  279. if (value == "true" || value == "1") {
  280. *result = true;
  281. return true;
  282. } else if (value == "false" || value == "0") {
  283. *result = false;
  284. return true;
  285. } else {
  286. return false;
  287. }
  288. }
  289. bool ParseBool(TStringBuf value)
  290. {
  291. bool result;
  292. if (!TryParseBool(value, &result)) {
  293. throw TSimpleException(Format("Error parsing boolean value %Qv",
  294. value));
  295. }
  296. return result;
  297. }
  298. TStringBuf FormatBool(bool value)
  299. {
  300. return value ? TStringBuf("true") : TStringBuf("false");
  301. }
  302. ////////////////////////////////////////////////////////////////////////////////
  303. } // namespace NYT