str_join_internal.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. //
  2. // Copyright 2017 The Abseil Authors.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // https://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. // This file declares INTERNAL parts of the Join API that are inlined/templated
  17. // or otherwise need to be available at compile time. The main abstractions
  18. // defined in this file are:
  19. //
  20. // - A handful of default Formatters
  21. // - JoinAlgorithm() overloads
  22. // - JoinRange() overloads
  23. // - JoinTuple()
  24. //
  25. // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
  26. // y_absl/strings/str_join.h
  27. //
  28. // IWYU pragma: private, include "y_absl/strings/str_join.h"
  29. #ifndef Y_ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
  30. #define Y_ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
  31. #include <cstdint>
  32. #include <cstring>
  33. #include <initializer_list>
  34. #include <iterator>
  35. #include <limits>
  36. #include <memory>
  37. #include <util/generic/string.h>
  38. #include <tuple>
  39. #include <type_traits>
  40. #include <utility>
  41. #include "y_absl/base/config.h"
  42. #include "y_absl/base/internal/raw_logging.h"
  43. #include "y_absl/strings/internal/ostringstream.h"
  44. #include "y_absl/strings/internal/resize_uninitialized.h"
  45. #include "y_absl/strings/str_cat.h"
  46. #include "y_absl/strings/string_view.h"
  47. namespace y_absl {
  48. Y_ABSL_NAMESPACE_BEGIN
  49. namespace strings_internal {
  50. //
  51. // Formatter objects
  52. //
  53. // The following are implementation classes for standard Formatter objects. The
  54. // factory functions that users will call to create and use these formatters are
  55. // defined and documented in strings/join.h.
  56. //
  57. // The default formatter. Converts alpha-numeric types to strings.
  58. struct AlphaNumFormatterImpl {
  59. // This template is needed in order to support passing in a dereferenced
  60. // vector<bool>::iterator
  61. template <typename T>
  62. void operator()(TString* out, const T& t) const {
  63. StrAppend(out, AlphaNum(t));
  64. }
  65. void operator()(TString* out, const AlphaNum& t) const {
  66. StrAppend(out, t);
  67. }
  68. };
  69. // A type that's used to overload the JoinAlgorithm() function (defined below)
  70. // for ranges that do not require additional formatting (e.g., a range of
  71. // strings).
  72. struct NoFormatter : public AlphaNumFormatterImpl {};
  73. // Formats types to strings using the << operator.
  74. class StreamFormatterImpl {
  75. public:
  76. // The method isn't const because it mutates state. Making it const will
  77. // render StreamFormatterImpl thread-hostile.
  78. template <typename T>
  79. void operator()(TString* out, const T& t) {
  80. // The stream is created lazily to avoid paying the relatively high cost
  81. // of its construction when joining an empty range.
  82. if (strm_) {
  83. strm_->clear(); // clear the bad, fail and eof bits in case they were set
  84. strm_->str(out);
  85. } else {
  86. strm_.reset(new strings_internal::OStringStream(out));
  87. }
  88. *strm_ << t;
  89. }
  90. private:
  91. std::unique_ptr<strings_internal::OStringStream> strm_;
  92. };
  93. // Formats a std::pair<>. The 'first' member is formatted using f1_ and the
  94. // 'second' member is formatted using f2_. sep_ is the separator.
  95. template <typename F1, typename F2>
  96. class PairFormatterImpl {
  97. public:
  98. PairFormatterImpl(F1 f1, y_absl::string_view sep, F2 f2)
  99. : f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {}
  100. template <typename T>
  101. void operator()(TString* out, const T& p) {
  102. f1_(out, p.first);
  103. out->append(sep_);
  104. f2_(out, p.second);
  105. }
  106. template <typename T>
  107. void operator()(TString* out, const T& p) const {
  108. f1_(out, p.first);
  109. out->append(sep_);
  110. f2_(out, p.second);
  111. }
  112. private:
  113. F1 f1_;
  114. TString sep_;
  115. F2 f2_;
  116. };
  117. // Wraps another formatter and dereferences the argument to operator() then
  118. // passes the dereferenced argument to the wrapped formatter. This can be
  119. // useful, for example, to join a std::vector<int*>.
  120. template <typename Formatter>
  121. class DereferenceFormatterImpl {
  122. public:
  123. DereferenceFormatterImpl() : f_() {}
  124. explicit DereferenceFormatterImpl(Formatter&& f)
  125. : f_(std::forward<Formatter>(f)) {}
  126. template <typename T>
  127. void operator()(TString* out, const T& t) {
  128. f_(out, *t);
  129. }
  130. template <typename T>
  131. void operator()(TString* out, const T& t) const {
  132. f_(out, *t);
  133. }
  134. private:
  135. Formatter f_;
  136. };
  137. // DefaultFormatter<T> is a traits class that selects a default Formatter to use
  138. // for the given type T. The ::Type member names the Formatter to use. This is
  139. // used by the strings::Join() functions that do NOT take a Formatter argument,
  140. // in which case a default Formatter must be chosen.
  141. //
  142. // AlphaNumFormatterImpl is the default in the base template, followed by
  143. // specializations for other types.
  144. template <typename ValueType>
  145. struct DefaultFormatter {
  146. typedef AlphaNumFormatterImpl Type;
  147. };
  148. template <>
  149. struct DefaultFormatter<const char*> {
  150. typedef AlphaNumFormatterImpl Type;
  151. };
  152. template <>
  153. struct DefaultFormatter<char*> {
  154. typedef AlphaNumFormatterImpl Type;
  155. };
  156. template <>
  157. struct DefaultFormatter<TString> {
  158. typedef NoFormatter Type;
  159. };
  160. template <>
  161. struct DefaultFormatter<y_absl::string_view> {
  162. typedef NoFormatter Type;
  163. };
  164. template <typename ValueType>
  165. struct DefaultFormatter<ValueType*> {
  166. typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type>
  167. Type;
  168. };
  169. template <typename ValueType>
  170. struct DefaultFormatter<std::unique_ptr<ValueType>>
  171. : public DefaultFormatter<ValueType*> {};
  172. //
  173. // JoinAlgorithm() functions
  174. //
  175. // The main joining algorithm. This simply joins the elements in the given
  176. // iterator range, each separated by the given separator, into an output string,
  177. // and formats each element using the provided Formatter object.
  178. template <typename Iterator, typename Formatter>
  179. TString JoinAlgorithm(Iterator start, Iterator end, y_absl::string_view s,
  180. Formatter&& f) {
  181. TString result;
  182. y_absl::string_view sep("");
  183. for (Iterator it = start; it != end; ++it) {
  184. result.append(sep.data(), sep.size());
  185. f(&result, *it);
  186. sep = s;
  187. }
  188. return result;
  189. }
  190. // A joining algorithm that's optimized for a forward iterator range of
  191. // string-like objects that do not need any additional formatting. This is to
  192. // optimize the common case of joining, say, a std::vector<string> or a
  193. // std::vector<y_absl::string_view>.
  194. //
  195. // This is an overload of the previous JoinAlgorithm() function. Here the
  196. // Formatter argument is of type NoFormatter. Since NoFormatter is an internal
  197. // type, this overload is only invoked when strings::Join() is called with a
  198. // range of string-like objects (e.g., TString, y_absl::string_view), and an
  199. // explicit Formatter argument was NOT specified.
  200. //
  201. // The optimization is that the needed space will be reserved in the output
  202. // string to avoid the need to resize while appending. To do this, the iterator
  203. // range will be traversed twice: once to calculate the total needed size, and
  204. // then again to copy the elements and delimiters to the output string.
  205. template <typename Iterator,
  206. typename = typename std::enable_if<std::is_convertible<
  207. typename std::iterator_traits<Iterator>::iterator_category,
  208. std::forward_iterator_tag>::value>::type>
  209. TString JoinAlgorithm(Iterator start, Iterator end, y_absl::string_view s,
  210. NoFormatter) {
  211. TString result;
  212. if (start != end) {
  213. // Sums size
  214. auto&& start_value = *start;
  215. // Use uint64_t to prevent size_t overflow. We assume it is not possible for
  216. // in memory strings to overflow a uint64_t.
  217. uint64_t result_size = start_value.size();
  218. for (Iterator it = start; ++it != end;) {
  219. result_size += s.size();
  220. result_size += (*it).size();
  221. }
  222. if (result_size > 0) {
  223. constexpr uint64_t kMaxSize =
  224. uint64_t{(std::numeric_limits<size_t>::max)()};
  225. Y_ABSL_INTERNAL_CHECK(result_size <= kMaxSize, "size_t overflow");
  226. STLStringResizeUninitialized(&result, static_cast<size_t>(result_size));
  227. // Joins strings
  228. char* result_buf = &*result.begin();
  229. memcpy(result_buf, start_value.data(), start_value.size());
  230. result_buf += start_value.size();
  231. for (Iterator it = start; ++it != end;) {
  232. memcpy(result_buf, s.data(), s.size());
  233. result_buf += s.size();
  234. auto&& value = *it;
  235. memcpy(result_buf, value.data(), value.size());
  236. result_buf += value.size();
  237. }
  238. }
  239. }
  240. return result;
  241. }
  242. // JoinTupleLoop implements a loop over the elements of a std::tuple, which
  243. // are heterogeneous. The primary template matches the tuple interior case. It
  244. // continues the iteration after appending a separator (for nonzero indices)
  245. // and formatting an element of the tuple. The specialization for the I=N case
  246. // matches the end-of-tuple, and terminates the iteration.
  247. template <size_t I, size_t N>
  248. struct JoinTupleLoop {
  249. template <typename Tup, typename Formatter>
  250. void operator()(TString* out, const Tup& tup, y_absl::string_view sep,
  251. Formatter&& fmt) {
  252. if (I > 0) out->append(sep.data(), sep.size());
  253. fmt(out, std::get<I>(tup));
  254. JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt);
  255. }
  256. };
  257. template <size_t N>
  258. struct JoinTupleLoop<N, N> {
  259. template <typename Tup, typename Formatter>
  260. void operator()(TString*, const Tup&, y_absl::string_view, Formatter&&) {}
  261. };
  262. template <typename... T, typename Formatter>
  263. TString JoinAlgorithm(const std::tuple<T...>& tup, y_absl::string_view sep,
  264. Formatter&& fmt) {
  265. TString result;
  266. JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt);
  267. return result;
  268. }
  269. template <typename Iterator>
  270. TString JoinRange(Iterator first, Iterator last,
  271. y_absl::string_view separator) {
  272. // No formatter was explicitly given, so a default must be chosen.
  273. typedef typename std::iterator_traits<Iterator>::value_type ValueType;
  274. typedef typename DefaultFormatter<ValueType>::Type Formatter;
  275. return JoinAlgorithm(first, last, separator, Formatter());
  276. }
  277. template <typename Range, typename Formatter>
  278. TString JoinRange(const Range& range, y_absl::string_view separator,
  279. Formatter&& fmt) {
  280. using std::begin;
  281. using std::end;
  282. return JoinAlgorithm(begin(range), end(range), separator, fmt);
  283. }
  284. template <typename Range>
  285. TString JoinRange(const Range& range, y_absl::string_view separator) {
  286. using std::begin;
  287. using std::end;
  288. return JoinRange(begin(range), end(range), separator);
  289. }
  290. template <typename Tuple, std::size_t... I>
  291. TString JoinTuple(const Tuple& value, y_absl::string_view separator,
  292. std::index_sequence<I...>) {
  293. return JoinRange(
  294. std::initializer_list<y_absl::string_view>{
  295. static_cast<const AlphaNum&>(std::get<I>(value)).Piece()...},
  296. separator);
  297. }
  298. } // namespace strings_internal
  299. Y_ABSL_NAMESPACE_END
  300. } // namespace y_absl
  301. #endif // Y_ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_