str_cat.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. //
  2. // Copyright 2017 The Abseil Authors.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // https://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. // -----------------------------------------------------------------------------
  17. // File: str_cat.h
  18. // -----------------------------------------------------------------------------
  19. //
  20. // This package contains functions for efficiently concatenating and appending
  21. // strings: `StrCat()` and `StrAppend()`. Most of the work within these routines
  22. // is actually handled through use of a special AlphaNum type, which was
  23. // designed to be used as a parameter type that efficiently manages conversion
  24. // to strings and avoids copies in the above operations.
  25. //
  26. // Any routine accepting either a string or a number may accept `AlphaNum`.
  27. // The basic idea is that by accepting a `const AlphaNum &` as an argument
  28. // to your function, your callers will automagically convert bools, integers,
  29. // and floating point values to strings for you.
  30. //
  31. // NOTE: Use of `AlphaNum` outside of the //y_absl/strings package is unsupported
  32. // except for the specific case of function parameters of type `AlphaNum` or
  33. // `const AlphaNum &`. In particular, instantiating `AlphaNum` directly as a
  34. // stack variable is not supported.
  35. //
  36. // Conversion from 8-bit values is not accepted because, if it were, then an
  37. // attempt to pass ':' instead of ":" might result in a 58 ending up in your
  38. // result.
  39. //
  40. // Bools convert to "0" or "1". Pointers to types other than `char *` are not
  41. // valid inputs. No output is generated for null `char *` pointers.
  42. //
  43. // Floating point numbers are formatted with six-digit precision, which is
  44. // the default for "std::cout <<" or printf "%g" (the same as "%.6g").
  45. //
  46. // You can convert to hexadecimal output rather than decimal output using the
  47. // `Hex` type contained here. To do so, pass `Hex(my_int)` as a parameter to
  48. // `StrCat()` or `StrAppend()`. You may specify a minimum hex field width using
  49. // a `PadSpec` enum.
  50. //
  51. // User-defined types can be formatted with the `AbslStringify()` customization
  52. // point. The API relies on detecting an overload in the user-defined type's
  53. // namespace of a free (non-member) `AbslStringify()` function as a definition
  54. // (typically declared as a friend and implemented in-line.
  55. // with the following signature:
  56. //
  57. // class MyClass { ... };
  58. //
  59. // template <typename Sink>
  60. // void AbslStringify(Sink& sink, const MyClass& value);
  61. //
  62. // An `AbslStringify()` overload for a type should only be declared in the same
  63. // file and namespace as said type.
  64. //
  65. // Note that `AbslStringify()` also supports use with `y_absl::StrFormat()` and
  66. // `y_absl::Substitute()`.
  67. //
  68. // Example:
  69. //
  70. // struct Point {
  71. // // To add formatting support to `Point`, we simply need to add a free
  72. // // (non-member) function `AbslStringify()`. This method specifies how
  73. // // Point should be printed when y_absl::StrCat() is called on it. You can add
  74. // // such a free function using a friend declaration within the body of the
  75. // // class. The sink parameter is a templated type to avoid requiring
  76. // // dependencies.
  77. // template <typename Sink> friend void AbslStringify(Sink&
  78. // sink, const Point& p) {
  79. // y_absl::Format(&sink, "(%v, %v)", p.x, p.y);
  80. // }
  81. //
  82. // int x;
  83. // int y;
  84. // };
  85. // -----------------------------------------------------------------------------
  86. #ifndef Y_ABSL_STRINGS_STR_CAT_H_
  87. #define Y_ABSL_STRINGS_STR_CAT_H_
  88. #include <algorithm>
  89. #include <array>
  90. #include <cstdint>
  91. #include <cstring>
  92. #include <util/generic/string.h>
  93. #include <type_traits>
  94. #include <utility>
  95. #include <vector>
  96. #include "y_absl/base/attributes.h"
  97. #include "y_absl/base/port.h"
  98. #include "y_absl/strings/internal/has_absl_stringify.h"
  99. #include "y_absl/strings/internal/stringify_sink.h"
  100. #include "y_absl/strings/numbers.h"
  101. #include "y_absl/strings/string_view.h"
  102. namespace y_absl {
  103. Y_ABSL_NAMESPACE_BEGIN
  104. namespace strings_internal {
  105. // AlphaNumBuffer allows a way to pass a string to StrCat without having to do
  106. // memory allocation. It is simply a pair of a fixed-size character array, and
  107. // a size. Please don't use outside of y_absl, yet.
  108. template <size_t max_size>
  109. struct AlphaNumBuffer {
  110. std::array<char, max_size> data;
  111. size_t size;
  112. };
  113. } // namespace strings_internal
  114. // Enum that specifies the number of significant digits to return in a `Hex` or
  115. // `Dec` conversion and fill character to use. A `kZeroPad2` value, for example,
  116. // would produce hexadecimal strings such as "0a","0f" and a 'kSpacePad5' value
  117. // would produce hexadecimal strings such as " a"," f".
  118. enum PadSpec : uint8_t {
  119. kNoPad = 1,
  120. kZeroPad2,
  121. kZeroPad3,
  122. kZeroPad4,
  123. kZeroPad5,
  124. kZeroPad6,
  125. kZeroPad7,
  126. kZeroPad8,
  127. kZeroPad9,
  128. kZeroPad10,
  129. kZeroPad11,
  130. kZeroPad12,
  131. kZeroPad13,
  132. kZeroPad14,
  133. kZeroPad15,
  134. kZeroPad16,
  135. kZeroPad17,
  136. kZeroPad18,
  137. kZeroPad19,
  138. kZeroPad20,
  139. kSpacePad2 = kZeroPad2 + 64,
  140. kSpacePad3,
  141. kSpacePad4,
  142. kSpacePad5,
  143. kSpacePad6,
  144. kSpacePad7,
  145. kSpacePad8,
  146. kSpacePad9,
  147. kSpacePad10,
  148. kSpacePad11,
  149. kSpacePad12,
  150. kSpacePad13,
  151. kSpacePad14,
  152. kSpacePad15,
  153. kSpacePad16,
  154. kSpacePad17,
  155. kSpacePad18,
  156. kSpacePad19,
  157. kSpacePad20,
  158. };
  159. // -----------------------------------------------------------------------------
  160. // Hex
  161. // -----------------------------------------------------------------------------
  162. //
  163. // `Hex` stores a set of hexadecimal string conversion parameters for use
  164. // within `AlphaNum` string conversions.
  165. struct Hex {
  166. uint64_t value;
  167. uint8_t width;
  168. char fill;
  169. template <typename Int>
  170. explicit Hex(
  171. Int v, PadSpec spec = y_absl::kNoPad,
  172. typename std::enable_if<sizeof(Int) == 1 &&
  173. !std::is_pointer<Int>::value>::type* = nullptr)
  174. : Hex(spec, static_cast<uint8_t>(v)) {}
  175. template <typename Int>
  176. explicit Hex(
  177. Int v, PadSpec spec = y_absl::kNoPad,
  178. typename std::enable_if<sizeof(Int) == 2 &&
  179. !std::is_pointer<Int>::value>::type* = nullptr)
  180. : Hex(spec, static_cast<uint16_t>(v)) {}
  181. template <typename Int>
  182. explicit Hex(
  183. Int v, PadSpec spec = y_absl::kNoPad,
  184. typename std::enable_if<sizeof(Int) == 4 &&
  185. !std::is_pointer<Int>::value>::type* = nullptr)
  186. : Hex(spec, static_cast<uint32_t>(v)) {}
  187. template <typename Int>
  188. explicit Hex(
  189. Int v, PadSpec spec = y_absl::kNoPad,
  190. typename std::enable_if<sizeof(Int) == 8 &&
  191. !std::is_pointer<Int>::value>::type* = nullptr)
  192. : Hex(spec, static_cast<uint64_t>(v)) {}
  193. template <typename Pointee>
  194. explicit Hex(Pointee* v, PadSpec spec = y_absl::kNoPad)
  195. : Hex(spec, reinterpret_cast<uintptr_t>(v)) {}
  196. template <typename S>
  197. friend void AbslStringify(S& sink, Hex hex) {
  198. static_assert(
  199. numbers_internal::kFastToBufferSize >= 32,
  200. "This function only works when output buffer >= 32 bytes long");
  201. char buffer[numbers_internal::kFastToBufferSize];
  202. char* const end = &buffer[numbers_internal::kFastToBufferSize];
  203. auto real_width =
  204. y_absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16);
  205. if (real_width >= hex.width) {
  206. sink.Append(y_absl::string_view(end - real_width, real_width));
  207. } else {
  208. // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and
  209. // max pad width can be up to 20.
  210. std::memset(end - 32, hex.fill, 16);
  211. // Patch up everything else up to the real_width.
  212. std::memset(end - real_width - 16, hex.fill, 16);
  213. sink.Append(y_absl::string_view(end - hex.width, hex.width));
  214. }
  215. }
  216. private:
  217. Hex(PadSpec spec, uint64_t v)
  218. : value(v),
  219. width(spec == y_absl::kNoPad
  220. ? 1
  221. : spec >= y_absl::kSpacePad2 ? spec - y_absl::kSpacePad2 + 2
  222. : spec - y_absl::kZeroPad2 + 2),
  223. fill(spec >= y_absl::kSpacePad2 ? ' ' : '0') {}
  224. };
  225. // -----------------------------------------------------------------------------
  226. // Dec
  227. // -----------------------------------------------------------------------------
  228. //
  229. // `Dec` stores a set of decimal string conversion parameters for use
  230. // within `AlphaNum` string conversions. Dec is slower than the default
  231. // integer conversion, so use it only if you need padding.
  232. struct Dec {
  233. uint64_t value;
  234. uint8_t width;
  235. char fill;
  236. bool neg;
  237. template <typename Int>
  238. explicit Dec(Int v, PadSpec spec = y_absl::kNoPad,
  239. typename std::enable_if<(sizeof(Int) <= 8)>::type* = nullptr)
  240. : value(v >= 0 ? static_cast<uint64_t>(v)
  241. : uint64_t{0} - static_cast<uint64_t>(v)),
  242. width(spec == y_absl::kNoPad
  243. ? 1
  244. : spec >= y_absl::kSpacePad2 ? spec - y_absl::kSpacePad2 + 2
  245. : spec - y_absl::kZeroPad2 + 2),
  246. fill(spec >= y_absl::kSpacePad2 ? ' ' : '0'),
  247. neg(v < 0) {}
  248. template <typename S>
  249. friend void AbslStringify(S& sink, Dec dec) {
  250. assert(dec.width <= numbers_internal::kFastToBufferSize);
  251. char buffer[numbers_internal::kFastToBufferSize];
  252. char* const end = &buffer[numbers_internal::kFastToBufferSize];
  253. char* const minfill = end - dec.width;
  254. char* writer = end;
  255. uint64_t val = dec.value;
  256. while (val > 9) {
  257. *--writer = '0' + (val % 10);
  258. val /= 10;
  259. }
  260. *--writer = '0' + static_cast<char>(val);
  261. if (dec.neg) *--writer = '-';
  262. ptrdiff_t fillers = writer - minfill;
  263. if (fillers > 0) {
  264. // Tricky: if the fill character is ' ', then it's <fill><+/-><digits>
  265. // But...: if the fill character is '0', then it's <+/-><fill><digits>
  266. bool add_sign_again = false;
  267. if (dec.neg && dec.fill == '0') { // If filling with '0',
  268. ++writer; // ignore the sign we just added
  269. add_sign_again = true; // and re-add the sign later.
  270. }
  271. writer -= fillers;
  272. std::fill_n(writer, fillers, dec.fill);
  273. if (add_sign_again) *--writer = '-';
  274. }
  275. sink.Append(y_absl::string_view(writer, static_cast<size_t>(end - writer)));
  276. }
  277. };
  278. // -----------------------------------------------------------------------------
  279. // AlphaNum
  280. // -----------------------------------------------------------------------------
  281. //
  282. // The `AlphaNum` class acts as the main parameter type for `StrCat()` and
  283. // `StrAppend()`, providing efficient conversion of numeric, boolean, decimal,
  284. // and hexadecimal values (through the `Dec` and `Hex` types) into strings.
  285. // `AlphaNum` should only be used as a function parameter. Do not instantiate
  286. // `AlphaNum` directly as a stack variable.
  287. class AlphaNum {
  288. public:
  289. // No bool ctor -- bools convert to an integral type.
  290. // A bool ctor would also convert incoming pointers (bletch).
  291. AlphaNum(int x) // NOLINT(runtime/explicit)
  292. : piece_(digits_, static_cast<size_t>(
  293. numbers_internal::FastIntToBuffer(x, digits_) -
  294. &digits_[0])) {}
  295. AlphaNum(unsigned int x) // NOLINT(runtime/explicit)
  296. : piece_(digits_, static_cast<size_t>(
  297. numbers_internal::FastIntToBuffer(x, digits_) -
  298. &digits_[0])) {}
  299. AlphaNum(long x) // NOLINT(*)
  300. : piece_(digits_, static_cast<size_t>(
  301. numbers_internal::FastIntToBuffer(x, digits_) -
  302. &digits_[0])) {}
  303. AlphaNum(unsigned long x) // NOLINT(*)
  304. : piece_(digits_, static_cast<size_t>(
  305. numbers_internal::FastIntToBuffer(x, digits_) -
  306. &digits_[0])) {}
  307. AlphaNum(long long x) // NOLINT(*)
  308. : piece_(digits_, static_cast<size_t>(
  309. numbers_internal::FastIntToBuffer(x, digits_) -
  310. &digits_[0])) {}
  311. AlphaNum(unsigned long long x) // NOLINT(*)
  312. : piece_(digits_, static_cast<size_t>(
  313. numbers_internal::FastIntToBuffer(x, digits_) -
  314. &digits_[0])) {}
  315. AlphaNum(float f) // NOLINT(runtime/explicit)
  316. : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {}
  317. AlphaNum(double f) // NOLINT(runtime/explicit)
  318. : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {}
  319. template <size_t size>
  320. AlphaNum( // NOLINT(runtime/explicit)
  321. const strings_internal::AlphaNumBuffer<size>& buf
  322. Y_ABSL_ATTRIBUTE_LIFETIME_BOUND)
  323. : piece_(&buf.data[0], buf.size) {}
  324. AlphaNum(const char* c_str // NOLINT(runtime/explicit)
  325. Y_ABSL_ATTRIBUTE_LIFETIME_BOUND)
  326. : piece_(NullSafeStringView(c_str)) {}
  327. AlphaNum(y_absl::string_view pc // NOLINT(runtime/explicit)
  328. Y_ABSL_ATTRIBUTE_LIFETIME_BOUND)
  329. : piece_(pc) {}
  330. template <typename T, typename = typename std::enable_if<
  331. strings_internal::HasAbslStringify<T>::value>::type>
  332. AlphaNum( // NOLINT(runtime/explicit)
  333. const T& v Y_ABSL_ATTRIBUTE_LIFETIME_BOUND,
  334. strings_internal::StringifySink&& sink Y_ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
  335. : piece_(strings_internal::ExtractStringification(sink, v)) {}
  336. template <typename Allocator>
  337. AlphaNum( // NOLINT(runtime/explicit)
  338. const std::basic_string<char, std::char_traits<char>, Allocator>& str
  339. Y_ABSL_ATTRIBUTE_LIFETIME_BOUND)
  340. : piece_(str) {}
  341. AlphaNum(const TString& str)
  342. : piece_(str.data(), str.size()) {}
  343. // Use string literals ":" instead of character literals ':'.
  344. AlphaNum(char c) = delete; // NOLINT(runtime/explicit)
  345. AlphaNum(const AlphaNum&) = delete;
  346. AlphaNum& operator=(const AlphaNum&) = delete;
  347. y_absl::string_view::size_type size() const { return piece_.size(); }
  348. const char* data() const { return piece_.data(); }
  349. y_absl::string_view Piece() const { return piece_; }
  350. // Match unscoped enums. Use integral promotion so that a `char`-backed
  351. // enum becomes a wider integral type AlphaNum will accept.
  352. template <typename T,
  353. typename = typename std::enable_if<
  354. std::is_enum<T>{} && std::is_convertible<T, int>{} &&
  355. !strings_internal::HasAbslStringify<T>::value>::type>
  356. AlphaNum(T e) // NOLINT(runtime/explicit)
  357. : AlphaNum(+e) {}
  358. // This overload matches scoped enums. We must explicitly cast to the
  359. // underlying type, but use integral promotion for the same reason as above.
  360. template <typename T,
  361. typename std::enable_if<
  362. std::is_enum<T>{} && !std::is_convertible<T, int>{} &&
  363. !strings_internal::HasAbslStringify<T>::value,
  364. char*>::type = nullptr>
  365. AlphaNum(T e) // NOLINT(runtime/explicit)
  366. : AlphaNum(+static_cast<typename std::underlying_type<T>::type>(e)) {}
  367. // vector<bool>::reference and const_reference require special help to
  368. // convert to `AlphaNum` because it requires two user defined conversions.
  369. template <
  370. typename T,
  371. typename std::enable_if<
  372. std::is_class<T>::value &&
  373. (std::is_same<T, std::vector<bool>::reference>::value ||
  374. std::is_same<T, std::vector<bool>::const_reference>::value)>::type* =
  375. nullptr>
  376. AlphaNum(T e) : AlphaNum(static_cast<bool>(e)) {} // NOLINT(runtime/explicit)
  377. private:
  378. y_absl::string_view piece_;
  379. char digits_[numbers_internal::kFastToBufferSize];
  380. };
  381. // -----------------------------------------------------------------------------
  382. // StrCat()
  383. // -----------------------------------------------------------------------------
  384. //
  385. // Merges given strings or numbers, using no delimiter(s), returning the merged
  386. // result as a string.
  387. //
  388. // `StrCat()` is designed to be the fastest possible way to construct a string
  389. // out of a mix of raw C strings, string_views, strings, bool values,
  390. // and numeric values.
  391. //
  392. // Don't use `StrCat()` for user-visible strings. The localization process
  393. // works poorly on strings built up out of fragments.
  394. //
  395. // For clarity and performance, don't use `StrCat()` when appending to a
  396. // string. Use `StrAppend()` instead. In particular, avoid using any of these
  397. // (anti-)patterns:
  398. //
  399. // str.append(StrCat(...))
  400. // str += StrCat(...)
  401. // str = StrCat(str, ...)
  402. //
  403. // The last case is the worst, with a potential to change a loop
  404. // from a linear time operation with O(1) dynamic allocations into a
  405. // quadratic time operation with O(n) dynamic allocations.
  406. //
  407. // See `StrAppend()` below for more information.
  408. namespace strings_internal {
  409. // Do not call directly - this is not part of the public API.
  410. TString CatPieces(std::initializer_list<y_absl::string_view> pieces);
  411. void AppendPieces(TString* dest,
  412. std::initializer_list<y_absl::string_view> pieces);
  413. } // namespace strings_internal
  414. Y_ABSL_MUST_USE_RESULT inline TString StrCat() { return TString(); }
  415. Y_ABSL_MUST_USE_RESULT inline TString StrCat(const AlphaNum& a) {
  416. return TString(a.data(), a.size());
  417. }
  418. Y_ABSL_MUST_USE_RESULT TString StrCat(const AlphaNum& a, const AlphaNum& b);
  419. Y_ABSL_MUST_USE_RESULT TString StrCat(const AlphaNum& a, const AlphaNum& b,
  420. const AlphaNum& c);
  421. Y_ABSL_MUST_USE_RESULT TString StrCat(const AlphaNum& a, const AlphaNum& b,
  422. const AlphaNum& c, const AlphaNum& d);
  423. // Support 5 or more arguments
  424. template <typename... AV>
  425. Y_ABSL_MUST_USE_RESULT inline TString StrCat(
  426. const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d,
  427. const AlphaNum& e, const AV&... args) {
  428. return strings_internal::CatPieces(
  429. {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
  430. static_cast<const AlphaNum&>(args).Piece()...});
  431. }
  432. // -----------------------------------------------------------------------------
  433. // StrAppend()
  434. // -----------------------------------------------------------------------------
  435. //
  436. // Appends a string or set of strings to an existing string, in a similar
  437. // fashion to `StrCat()`.
  438. //
  439. // WARNING: `StrAppend(&str, a, b, c, ...)` requires that none of the
  440. // a, b, c, parameters be a reference into str. For speed, `StrAppend()` does
  441. // not try to check each of its input arguments to be sure that they are not
  442. // a subset of the string being appended to. That is, while this will work:
  443. //
  444. // TString s = "foo";
  445. // s += s;
  446. //
  447. // This output is undefined:
  448. //
  449. // TString s = "foo";
  450. // StrAppend(&s, s);
  451. //
  452. // This output is undefined as well, since `y_absl::string_view` does not own its
  453. // data:
  454. //
  455. // TString s = "foobar";
  456. // y_absl::string_view p = s;
  457. // StrAppend(&s, p);
  458. inline void StrAppend(TString*) {}
  459. void StrAppend(TString* dest, const AlphaNum& a);
  460. void StrAppend(TString* dest, const AlphaNum& a, const AlphaNum& b);
  461. void StrAppend(TString* dest, const AlphaNum& a, const AlphaNum& b,
  462. const AlphaNum& c);
  463. void StrAppend(TString* dest, const AlphaNum& a, const AlphaNum& b,
  464. const AlphaNum& c, const AlphaNum& d);
  465. // Support 5 or more arguments
  466. template <typename... AV>
  467. inline void StrAppend(TString* dest, const AlphaNum& a, const AlphaNum& b,
  468. const AlphaNum& c, const AlphaNum& d, const AlphaNum& e,
  469. const AV&... args) {
  470. strings_internal::AppendPieces(
  471. dest, {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
  472. static_cast<const AlphaNum&>(args).Piece()...});
  473. }
  474. // Helper function for the future StrCat default floating-point format, %.6g
  475. // This is fast.
  476. inline strings_internal::AlphaNumBuffer<
  477. numbers_internal::kSixDigitsToBufferSize>
  478. SixDigits(double d) {
  479. strings_internal::AlphaNumBuffer<numbers_internal::kSixDigitsToBufferSize>
  480. result;
  481. result.size = numbers_internal::SixDigitsToBuffer(d, &result.data[0]);
  482. return result;
  483. }
  484. Y_ABSL_NAMESPACE_END
  485. } // namespace y_absl
  486. #endif // Y_ABSL_STRINGS_STR_CAT_H_