str_split_internal.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. // Copyright 2017 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // This file declares INTERNAL parts of the Split API that are inline/templated
  16. // or otherwise need to be available at compile time. The main abstractions
  17. // defined in here are
  18. //
  19. // - ConvertibleToStringView
  20. // - SplitIterator<>
  21. // - Splitter<>
  22. //
  23. // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
  24. // absl/strings/str_split.h.
  25. //
  26. // IWYU pragma: private, include "absl/strings/str_split.h"
  27. #ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
  28. #define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
  29. #include <array>
  30. #include <cstddef>
  31. #include <initializer_list>
  32. #include <iterator>
  33. #include <tuple>
  34. #include <type_traits>
  35. #include <utility>
  36. #include <vector>
  37. #include "absl/base/macros.h"
  38. #include "absl/base/port.h"
  39. #include "absl/meta/type_traits.h"
  40. #include "absl/strings/string_view.h"
  41. #ifdef _GLIBCXX_DEBUG
  42. #include "absl/strings/internal/stl_type_traits.h"
  43. #endif // _GLIBCXX_DEBUG
  44. namespace absl {
  45. ABSL_NAMESPACE_BEGIN
  46. namespace strings_internal {
  47. // This class is implicitly constructible from everything that absl::string_view
  48. // is implicitly constructible from, except for rvalue strings. This means it
  49. // can be used as a function parameter in places where passing a temporary
  50. // string might cause memory lifetime issues.
  51. class ConvertibleToStringView {
  52. public:
  53. ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit)
  54. : value_(s) {}
  55. ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit)
  56. ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit)
  57. : value_(s) {}
  58. ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit)
  59. : value_(s) {}
  60. // Disable conversion from rvalue strings.
  61. ConvertibleToStringView(std::string&& s) = delete;
  62. ConvertibleToStringView(const std::string&& s) = delete;
  63. absl::string_view value() const { return value_; }
  64. private:
  65. absl::string_view value_;
  66. };
  67. // An iterator that enumerates the parts of a string from a Splitter. The text
  68. // to be split, the Delimiter, and the Predicate are all taken from the given
  69. // Splitter object. Iterators may only be compared if they refer to the same
  70. // Splitter instance.
  71. //
  72. // This class is NOT part of the public splitting API.
  73. template <typename Splitter>
  74. class SplitIterator {
  75. public:
  76. using iterator_category = std::input_iterator_tag;
  77. using value_type = absl::string_view;
  78. using difference_type = ptrdiff_t;
  79. using pointer = const value_type*;
  80. using reference = const value_type&;
  81. enum State { kInitState, kLastState, kEndState };
  82. SplitIterator(State state, const Splitter* splitter)
  83. : pos_(0),
  84. state_(state),
  85. splitter_(splitter),
  86. delimiter_(splitter->delimiter()),
  87. predicate_(splitter->predicate()) {
  88. // Hack to maintain backward compatibility. This one block makes it so an
  89. // empty absl::string_view whose .data() happens to be nullptr behaves
  90. // *differently* from an otherwise empty absl::string_view whose .data() is
  91. // not nullptr. This is an undesirable difference in general, but this
  92. // behavior is maintained to avoid breaking existing code that happens to
  93. // depend on this old behavior/bug. Perhaps it will be fixed one day. The
  94. // difference in behavior is as follows:
  95. // Split(absl::string_view(""), '-'); // {""}
  96. // Split(absl::string_view(), '-'); // {}
  97. if (splitter_->text().data() == nullptr) {
  98. state_ = kEndState;
  99. pos_ = splitter_->text().size();
  100. return;
  101. }
  102. if (state_ == kEndState) {
  103. pos_ = splitter_->text().size();
  104. } else {
  105. ++(*this);
  106. }
  107. }
  108. bool at_end() const { return state_ == kEndState; }
  109. reference operator*() const { return curr_; }
  110. pointer operator->() const { return &curr_; }
  111. SplitIterator& operator++() {
  112. do {
  113. if (state_ == kLastState) {
  114. state_ = kEndState;
  115. return *this;
  116. }
  117. const absl::string_view text = splitter_->text();
  118. const absl::string_view d = delimiter_.Find(text, pos_);
  119. if (d.data() == text.data() + text.size()) state_ = kLastState;
  120. curr_ = text.substr(pos_,
  121. static_cast<size_t>(d.data() - (text.data() + pos_)));
  122. pos_ += curr_.size() + d.size();
  123. } while (!predicate_(curr_));
  124. return *this;
  125. }
  126. SplitIterator operator++(int) {
  127. SplitIterator old(*this);
  128. ++(*this);
  129. return old;
  130. }
  131. friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
  132. return a.state_ == b.state_ && a.pos_ == b.pos_;
  133. }
  134. friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
  135. return !(a == b);
  136. }
  137. private:
  138. size_t pos_;
  139. State state_;
  140. absl::string_view curr_;
  141. const Splitter* splitter_;
  142. typename Splitter::DelimiterType delimiter_;
  143. typename Splitter::PredicateType predicate_;
  144. };
  145. // HasMappedType<T>::value is true iff there exists a type T::mapped_type.
  146. template <typename T, typename = void>
  147. struct HasMappedType : std::false_type {};
  148. template <typename T>
  149. struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
  150. : std::true_type {};
  151. // HasValueType<T>::value is true iff there exists a type T::value_type.
  152. template <typename T, typename = void>
  153. struct HasValueType : std::false_type {};
  154. template <typename T>
  155. struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
  156. };
  157. // HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
  158. template <typename T, typename = void>
  159. struct HasConstIterator : std::false_type {};
  160. template <typename T>
  161. struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
  162. : std::true_type {};
  163. // HasEmplace<T>::value is true iff there exists a method T::emplace().
  164. template <typename T, typename = void>
  165. struct HasEmplace : std::false_type {};
  166. template <typename T>
  167. struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>>
  168. : std::true_type {};
  169. // IsInitializerList<T>::value is true iff T is an std::initializer_list. More
  170. // details below in Splitter<> where this is used.
  171. std::false_type IsInitializerListDispatch(...); // default: No
  172. template <typename T>
  173. std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
  174. template <typename T>
  175. struct IsInitializerList
  176. : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
  177. // A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
  178. // is true for type 'C'.
  179. //
  180. // Restricts conversion to container-like types (by testing for the presence of
  181. // a const_iterator member type) and also to disable conversion to an
  182. // std::initializer_list (which also has a const_iterator). Otherwise, code
  183. // compiled in C++11 will get an error due to ambiguous conversion paths (in
  184. // C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
  185. // or an std::initializer_list<T>).
  186. template <typename C, bool has_value_type, bool has_mapped_type>
  187. struct SplitterIsConvertibleToImpl : std::false_type {};
  188. template <typename C>
  189. struct SplitterIsConvertibleToImpl<C, true, false>
  190. : std::is_constructible<typename C::value_type, absl::string_view> {};
  191. template <typename C>
  192. struct SplitterIsConvertibleToImpl<C, true, true>
  193. : absl::conjunction<
  194. std::is_constructible<typename C::key_type, absl::string_view>,
  195. std::is_constructible<typename C::mapped_type, absl::string_view>> {};
  196. template <typename C>
  197. struct SplitterIsConvertibleTo
  198. : SplitterIsConvertibleToImpl<
  199. C,
  200. #ifdef _GLIBCXX_DEBUG
  201. !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
  202. #endif // _GLIBCXX_DEBUG
  203. !IsInitializerList<
  204. typename std::remove_reference<C>::type>::value &&
  205. HasValueType<C>::value && HasConstIterator<C>::value,
  206. HasMappedType<C>::value> {
  207. };
  208. template <typename StringType, typename Container, typename = void>
  209. struct ShouldUseLifetimeBound : std::false_type {};
  210. template <typename StringType, typename Container>
  211. struct ShouldUseLifetimeBound<
  212. StringType, Container,
  213. std::enable_if_t<
  214. std::is_same<StringType, std::string>::value &&
  215. std::is_same<typename Container::value_type, absl::string_view>::value>>
  216. : std::true_type {};
  217. template <typename StringType, typename First, typename Second>
  218. using ShouldUseLifetimeBoundForPair = std::integral_constant<
  219. bool, std::is_same<StringType, std::string>::value &&
  220. (std::is_same<First, absl::string_view>::value ||
  221. std::is_same<Second, absl::string_view>::value)>;
  222. // This class implements the range that is returned by absl::StrSplit(). This
  223. // class has templated conversion operators that allow it to be implicitly
  224. // converted to a variety of types that the caller may have specified on the
  225. // left-hand side of an assignment.
  226. //
  227. // The main interface for interacting with this class is through its implicit
  228. // conversion operators. However, this class may also be used like a container
  229. // in that it has .begin() and .end() member functions. It may also be used
  230. // within a range-for loop.
  231. //
  232. // Output containers can be collections of any type that is constructible from
  233. // an absl::string_view.
  234. //
  235. // An Predicate functor may be supplied. This predicate will be used to filter
  236. // the split strings: only strings for which the predicate returns true will be
  237. // kept. A Predicate object is any unary functor that takes an absl::string_view
  238. // and returns bool.
  239. //
  240. // The StringType parameter can be either string_view or string, depending on
  241. // whether the Splitter refers to a string stored elsewhere, or if the string
  242. // resides inside the Splitter itself.
  243. template <typename Delimiter, typename Predicate, typename StringType>
  244. class Splitter {
  245. public:
  246. using DelimiterType = Delimiter;
  247. using PredicateType = Predicate;
  248. using const_iterator = strings_internal::SplitIterator<Splitter>;
  249. using value_type = typename std::iterator_traits<const_iterator>::value_type;
  250. Splitter(StringType input_text, Delimiter d, Predicate p)
  251. : text_(std::move(input_text)),
  252. delimiter_(std::move(d)),
  253. predicate_(std::move(p)) {}
  254. absl::string_view text() const { return text_; }
  255. const Delimiter& delimiter() const { return delimiter_; }
  256. const Predicate& predicate() const { return predicate_; }
  257. // Range functions that iterate the split substrings as absl::string_view
  258. // objects. These methods enable a Splitter to be used in a range-based for
  259. // loop.
  260. const_iterator begin() const { return {const_iterator::kInitState, this}; }
  261. const_iterator end() const { return {const_iterator::kEndState, this}; }
  262. // An implicit conversion operator that is restricted to only those containers
  263. // that the splitter is convertible to.
  264. template <
  265. typename Container,
  266. std::enable_if_t<ShouldUseLifetimeBound<StringType, Container>::value &&
  267. SplitterIsConvertibleTo<Container>::value,
  268. std::nullptr_t> = nullptr>
  269. // NOLINTNEXTLINE(google-explicit-constructor)
  270. operator Container() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
  271. return ConvertToContainer<Container, typename Container::value_type,
  272. HasMappedType<Container>::value>()(*this);
  273. }
  274. template <
  275. typename Container,
  276. std::enable_if_t<!ShouldUseLifetimeBound<StringType, Container>::value &&
  277. SplitterIsConvertibleTo<Container>::value,
  278. std::nullptr_t> = nullptr>
  279. // NOLINTNEXTLINE(google-explicit-constructor)
  280. operator Container() const {
  281. return ConvertToContainer<Container, typename Container::value_type,
  282. HasMappedType<Container>::value>()(*this);
  283. }
  284. // Returns a pair with its .first and .second members set to the first two
  285. // strings returned by the begin() iterator. Either/both of .first and .second
  286. // will be constructed with empty strings if the iterator doesn't have a
  287. // corresponding value.
  288. template <typename First, typename Second,
  289. std::enable_if_t<
  290. ShouldUseLifetimeBoundForPair<StringType, First, Second>::value,
  291. std::nullptr_t> = nullptr>
  292. // NOLINTNEXTLINE(google-explicit-constructor)
  293. operator std::pair<First, Second>() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
  294. return ConvertToPair<First, Second>();
  295. }
  296. template <typename First, typename Second,
  297. std::enable_if_t<!ShouldUseLifetimeBoundForPair<StringType, First,
  298. Second>::value,
  299. std::nullptr_t> = nullptr>
  300. // NOLINTNEXTLINE(google-explicit-constructor)
  301. operator std::pair<First, Second>() const {
  302. return ConvertToPair<First, Second>();
  303. }
  304. private:
  305. template <typename First, typename Second>
  306. std::pair<First, Second> ConvertToPair() const {
  307. absl::string_view first, second;
  308. auto it = begin();
  309. if (it != end()) {
  310. first = *it;
  311. if (++it != end()) {
  312. second = *it;
  313. }
  314. }
  315. return {First(first), Second(second)};
  316. }
  317. // ConvertToContainer is a functor converting a Splitter to the requested
  318. // Container of ValueType. It is specialized below to optimize splitting to
  319. // certain combinations of Container and ValueType.
  320. //
  321. // This base template handles the generic case of storing the split results in
  322. // the requested non-map-like container and converting the split substrings to
  323. // the requested type.
  324. template <typename Container, typename ValueType, bool is_map = false>
  325. struct ConvertToContainer {
  326. Container operator()(const Splitter& splitter) const {
  327. Container c;
  328. auto it = std::inserter(c, c.end());
  329. for (const auto& sp : splitter) {
  330. *it++ = ValueType(sp);
  331. }
  332. return c;
  333. }
  334. };
  335. // Partial specialization for a std::vector<absl::string_view>.
  336. //
  337. // Optimized for the common case of splitting to a
  338. // std::vector<absl::string_view>. In this case we first split the results to
  339. // a small array of absl::string_view on the stack, to reduce reallocations.
  340. template <typename A>
  341. struct ConvertToContainer<std::vector<absl::string_view, A>,
  342. absl::string_view, false> {
  343. std::vector<absl::string_view, A> operator()(
  344. const Splitter& splitter) const {
  345. struct raw_view {
  346. const char* data;
  347. size_t size;
  348. operator absl::string_view() const { // NOLINT(runtime/explicit)
  349. return {data, size};
  350. }
  351. };
  352. std::vector<absl::string_view, A> v;
  353. std::array<raw_view, 16> ar;
  354. for (auto it = splitter.begin(); !it.at_end();) {
  355. size_t index = 0;
  356. do {
  357. ar[index].data = it->data();
  358. ar[index].size = it->size();
  359. ++it;
  360. } while (++index != ar.size() && !it.at_end());
  361. // We static_cast index to a signed type to work around overzealous
  362. // compiler warnings about signedness.
  363. v.insert(v.end(), ar.begin(),
  364. ar.begin() + static_cast<ptrdiff_t>(index));
  365. }
  366. return v;
  367. }
  368. };
  369. // Partial specialization for a std::vector<std::string>.
  370. //
  371. // Optimized for the common case of splitting to a std::vector<std::string>.
  372. // In this case we first split the results to a std::vector<absl::string_view>
  373. // so the returned std::vector<std::string> can have space reserved to avoid
  374. // std::string moves.
  375. template <typename A>
  376. struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
  377. std::vector<std::string, A> operator()(const Splitter& splitter) const {
  378. const std::vector<absl::string_view> v = splitter;
  379. return std::vector<std::string, A>(v.begin(), v.end());
  380. }
  381. };
  382. // Partial specialization for containers of pairs (e.g., maps).
  383. //
  384. // The algorithm is to insert a new pair into the map for each even-numbered
  385. // item, with the even-numbered item as the key with a default-constructed
  386. // value. Each odd-numbered item will then be assigned to the last pair's
  387. // value.
  388. template <typename Container, typename First, typename Second>
  389. struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
  390. using iterator = typename Container::iterator;
  391. Container operator()(const Splitter& splitter) const {
  392. Container m;
  393. iterator it;
  394. bool insert = true;
  395. for (const absl::string_view sv : splitter) {
  396. if (insert) {
  397. it = InsertOrEmplace(&m, sv);
  398. } else {
  399. it->second = Second(sv);
  400. }
  401. insert = !insert;
  402. }
  403. return m;
  404. }
  405. // Inserts the key and an empty value into the map, returning an iterator to
  406. // the inserted item. We use emplace() if available, otherwise insert().
  407. template <typename M>
  408. static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace(
  409. M* m, absl::string_view key) {
  410. // Use piecewise_construct to support old versions of gcc in which pair
  411. // constructor can't otherwise construct string from string_view.
  412. return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key),
  413. std::tuple<>()));
  414. }
  415. template <typename M>
  416. static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace(
  417. M* m, absl::string_view key) {
  418. return ToIter(m->insert(std::make_pair(First(key), Second(""))));
  419. }
  420. static iterator ToIter(std::pair<iterator, bool> pair) {
  421. return pair.first;
  422. }
  423. static iterator ToIter(iterator iter) { return iter; }
  424. };
  425. StringType text_;
  426. Delimiter delimiter_;
  427. Predicate predicate_;
  428. };
  429. } // namespace strings_internal
  430. ABSL_NAMESPACE_END
  431. } // namespace absl
  432. #endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_