string.cpp 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. #include "string.h"
  2. #include "format.h"
  3. #include <library/cpp/yt/assert/assert.h>
  4. #include <util/generic/hash.h>
  5. #include <util/string/ascii.h>
  6. namespace NYT {
  7. ////////////////////////////////////////////////////////////////////////////////
  8. void UnderscoreCaseToCamelCase(TStringBuilderBase* builder, TStringBuf str)
  9. {
  10. bool first = true;
  11. bool upper = true;
  12. for (char c : str) {
  13. if (c == '_') {
  14. upper = true;
  15. } else {
  16. if (upper) {
  17. if (!std::isalpha(c) && !first) {
  18. builder->AppendChar('_');
  19. }
  20. c = std::toupper(c);
  21. }
  22. builder->AppendChar(c);
  23. upper = false;
  24. }
  25. first = false;
  26. }
  27. }
  28. TString UnderscoreCaseToCamelCase(TStringBuf str)
  29. {
  30. TStringBuilder builder;
  31. UnderscoreCaseToCamelCase(&builder, str);
  32. return builder.Flush();
  33. }
  34. void CamelCaseToUnderscoreCase(TStringBuilderBase* builder, TStringBuf str)
  35. {
  36. bool first = true;
  37. for (char c : str) {
  38. if (std::isupper(c) && std::isalpha(c)) {
  39. if (!first) {
  40. builder->AppendChar('_');
  41. }
  42. c = std::tolower(c);
  43. }
  44. builder->AppendChar(c);
  45. first = false;
  46. }
  47. }
  48. TString CamelCaseToUnderscoreCase(TStringBuf str)
  49. {
  50. TStringBuilder builder;
  51. CamelCaseToUnderscoreCase(&builder, str);
  52. return builder.Flush();
  53. }
  54. ////////////////////////////////////////////////////////////////////////////////
  55. TString TrimLeadingWhitespaces(const TString& str)
  56. {
  57. for (int i = 0; i < static_cast<int>(str.size()); ++i) {
  58. if (str[i] != ' ') {
  59. return str.substr(i);
  60. }
  61. }
  62. return "";
  63. }
  64. TString Trim(const TString& str, const TString& whitespaces)
  65. {
  66. size_t end = str.size();
  67. while (end > 0) {
  68. size_t i = end - 1;
  69. bool isWhitespace = false;
  70. for (auto c : whitespaces) {
  71. if (str[i] == c) {
  72. isWhitespace = true;
  73. break;
  74. }
  75. }
  76. if (!isWhitespace) {
  77. break;
  78. }
  79. --end;
  80. }
  81. if (end == 0) {
  82. return "";
  83. }
  84. size_t begin = str.find_first_not_of(whitespaces);
  85. YT_VERIFY(begin != TString::npos);
  86. YT_VERIFY(begin < end);
  87. return str.substr(begin, end - begin);
  88. }
  89. ////////////////////////////////////////////////////////////////////////////////
  90. namespace {
  91. const ui16 DecimalDigits2[100] = {
  92. 12336, 12592, 12848, 13104, 13360, 13616, 13872, 14128, 14384, 14640,
  93. 12337, 12593, 12849, 13105, 13361, 13617, 13873, 14129, 14385, 14641,
  94. 12338, 12594, 12850, 13106, 13362, 13618, 13874, 14130, 14386, 14642,
  95. 12339, 12595, 12851, 13107, 13363, 13619, 13875, 14131, 14387, 14643,
  96. 12340, 12596, 12852, 13108, 13364, 13620, 13876, 14132, 14388, 14644,
  97. 12341, 12597, 12853, 13109, 13365, 13621, 13877, 14133, 14389, 14645,
  98. 12342, 12598, 12854, 13110, 13366, 13622, 13878, 14134, 14390, 14646,
  99. 12343, 12599, 12855, 13111, 13367, 13623, 13879, 14135, 14391, 14647,
  100. 12344, 12600, 12856, 13112, 13368, 13624, 13880, 14136, 14392, 14648,
  101. 12345, 12601, 12857, 13113, 13369, 13625, 13881, 14137, 14393, 14649
  102. };
  103. template <class T>
  104. char* WriteSignedDecIntToBufferBackwardsImpl(char* ptr, T value, TStringBuf min)
  105. {
  106. if (value == 0) {
  107. --ptr;
  108. *ptr = '0';
  109. return ptr;
  110. }
  111. // The negative value handling code below works incorrectly for min values.
  112. if (value == std::numeric_limits<T>::min()) {
  113. ptr -= min.length();
  114. ::memcpy(ptr, min.begin(), min.length());
  115. return ptr;
  116. }
  117. bool negative = false;
  118. if (value < 0) {
  119. negative = true;
  120. value = -value;
  121. }
  122. while (value >= 10) {
  123. auto rem = value % 100;
  124. auto quot = value / 100;
  125. ptr -= 2;
  126. ::memcpy(ptr, &DecimalDigits2[rem], 2);
  127. value = quot;
  128. }
  129. if (value > 0) {
  130. --ptr;
  131. *ptr = ('0' + value);
  132. }
  133. if (negative) {
  134. --ptr;
  135. *ptr = '-';
  136. }
  137. return ptr;
  138. }
  139. template <class T>
  140. char* WriteUnsignedDecIntToBufferBackwardsImpl(char* ptr, T value)
  141. {
  142. if (value == 0) {
  143. --ptr;
  144. *ptr = '0';
  145. return ptr;
  146. }
  147. while (value >= 10) {
  148. auto rem = value % 100;
  149. auto quot = value / 100;
  150. ptr -= 2;
  151. ::memcpy(ptr, &DecimalDigits2[rem], 2);
  152. value = quot;
  153. }
  154. if (value > 0) {
  155. --ptr;
  156. *ptr = ('0' + value);
  157. }
  158. return ptr;
  159. }
  160. } // namespace
  161. template <>
  162. char* WriteDecIntToBufferBackwards(char* ptr, i32 value)
  163. {
  164. return WriteSignedDecIntToBufferBackwardsImpl(ptr, value, TStringBuf("-2147483647"));
  165. }
  166. template <>
  167. char* WriteDecIntToBufferBackwards(char* ptr, i64 value)
  168. {
  169. return WriteSignedDecIntToBufferBackwardsImpl(ptr, value, TStringBuf("-9223372036854775808"));
  170. }
  171. template <>
  172. char* WriteDecIntToBufferBackwards(char* ptr, ui32 value)
  173. {
  174. return WriteUnsignedDecIntToBufferBackwardsImpl(ptr, value);
  175. }
  176. template <>
  177. char* WriteDecIntToBufferBackwards(char* ptr, ui64 value)
  178. {
  179. return WriteUnsignedDecIntToBufferBackwardsImpl(ptr, value);
  180. }
  181. ////////////////////////////////////////////////////////////////////////////////
  182. namespace {
  183. template <class T>
  184. char* WriteSignedHexIntToBufferBackwardsImpl(char* ptr, T value, bool uppercase, TStringBuf min)
  185. {
  186. if (value == 0) {
  187. --ptr;
  188. *ptr = '0';
  189. return ptr;
  190. }
  191. // The negative value handling code below works incorrectly for min values.
  192. if (value == std::numeric_limits<T>::min()) {
  193. ptr -= min.length();
  194. ::memcpy(ptr, min.begin(), min.length());
  195. return ptr;
  196. }
  197. bool negative = false;
  198. if (value < 0) {
  199. negative = true;
  200. value = -value;
  201. }
  202. while (value != 0) {
  203. auto rem = value & 0xf;
  204. auto quot = value >> 4;
  205. --ptr;
  206. *ptr = uppercase ? IntToHexUppercase[rem] : IntToHexLowercase[rem];
  207. value = quot;
  208. }
  209. if (negative) {
  210. --ptr;
  211. *ptr = '-';
  212. }
  213. return ptr;
  214. }
  215. template <class T>
  216. char* WriteUnsignedHexIntToBufferBackwardsImpl(char* ptr, T value, bool uppercase)
  217. {
  218. if (value == 0) {
  219. --ptr;
  220. *ptr = '0';
  221. return ptr;
  222. }
  223. while (value != 0) {
  224. auto rem = value & 0xf;
  225. auto quot = value >> 4;
  226. --ptr;
  227. *ptr = uppercase ? IntToHexUppercase[rem] : IntToHexLowercase[rem];
  228. value = quot;
  229. }
  230. return ptr;
  231. }
  232. } // namespace
  233. template <>
  234. char* WriteHexIntToBufferBackwards(char* ptr, i32 value, bool uppercase)
  235. {
  236. return WriteSignedHexIntToBufferBackwardsImpl(ptr, value, uppercase, TStringBuf("-80000000"));
  237. }
  238. template <>
  239. char* WriteHexIntToBufferBackwards(char* ptr, i64 value, bool uppercase)
  240. {
  241. return WriteSignedHexIntToBufferBackwardsImpl(ptr, value, uppercase, TStringBuf("-8000000000000000"));
  242. }
  243. template <>
  244. char* WriteHexIntToBufferBackwards(char* ptr, ui32 value, bool uppercase)
  245. {
  246. return WriteUnsignedHexIntToBufferBackwardsImpl(ptr, value, uppercase);
  247. }
  248. template <>
  249. char* WriteHexIntToBufferBackwards(char* ptr, ui64 value, bool uppercase)
  250. {
  251. return WriteUnsignedHexIntToBufferBackwardsImpl(ptr, value, uppercase);
  252. }
  253. ////////////////////////////////////////////////////////////////////////////////
  254. size_t TCaseInsensitiveStringHasher::operator()(TStringBuf arg) const
  255. {
  256. auto compute = [&] (char* buffer) {
  257. for (size_t index = 0; index < arg.length(); ++index) {
  258. buffer[index] = AsciiToLower(arg[index]);
  259. }
  260. return ComputeHash(TStringBuf(buffer, arg.length()));
  261. };
  262. const size_t SmallSize = 256;
  263. if (arg.length() <= SmallSize) {
  264. std::array<char, SmallSize> stackBuffer;
  265. return compute(stackBuffer.data());
  266. } else {
  267. std::unique_ptr<char[]> heapBuffer(new char[arg.length()]);
  268. return compute(heapBuffer.get());
  269. }
  270. }
  271. bool TCaseInsensitiveStringEqualityComparer::operator()(TStringBuf lhs, TStringBuf rhs) const
  272. {
  273. return AsciiEqualsIgnoreCase(lhs, rhs);
  274. }
  275. ////////////////////////////////////////////////////////////////////////////////
  276. bool TryParseBool(TStringBuf value, bool* result)
  277. {
  278. if (value == "true" || value == "1") {
  279. *result = true;
  280. return true;
  281. } else if (value == "false" || value == "0") {
  282. *result = false;
  283. return true;
  284. } else {
  285. return false;
  286. }
  287. }
  288. bool ParseBool(TStringBuf value)
  289. {
  290. bool result;
  291. if (!TryParseBool(value, &result)) {
  292. throw TSimpleException(Format("Error parsing boolean value %Qv",
  293. value));
  294. }
  295. return result;
  296. }
  297. TStringBuf FormatBool(bool value)
  298. {
  299. return value ? TStringBuf("true") : TStringBuf("false");
  300. }
  301. ////////////////////////////////////////////////////////////////////////////////
  302. } // namespace NYT