str_split.cc 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. // Copyright 2017 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "y_absl/strings/str_split.h"
  15. #include <algorithm>
  16. #include <cassert>
  17. #include <cstdint>
  18. #include <cstdlib>
  19. #include <cstring>
  20. #include <iterator>
  21. #include <limits>
  22. #include <memory>
  23. #include "y_absl/base/internal/raw_logging.h"
  24. #include "y_absl/strings/ascii.h"
  25. namespace y_absl {
  26. Y_ABSL_NAMESPACE_BEGIN
  27. namespace {
  28. // This GenericFind() template function encapsulates the finding algorithm
  29. // shared between the ByString and ByAnyChar delimiters. The FindPolicy
  30. // template parameter allows each delimiter to customize the actual find
  31. // function to use and the length of the found delimiter. For example, the
  32. // Literal delimiter will ultimately use y_absl::string_view::find(), and the
  33. // AnyOf delimiter will use y_absl::string_view::find_first_of().
  34. template <typename FindPolicy>
  35. y_absl::string_view GenericFind(y_absl::string_view text,
  36. y_absl::string_view delimiter, size_t pos,
  37. FindPolicy find_policy) {
  38. if (delimiter.empty() && text.length() > 0) {
  39. // Special case for empty string delimiters: always return a zero-length
  40. // y_absl::string_view referring to the item at position 1 past pos.
  41. return y_absl::string_view(text.data() + pos + 1, 0);
  42. }
  43. size_t found_pos = y_absl::string_view::npos;
  44. y_absl::string_view found(text.data() + text.size(),
  45. 0); // By default, not found
  46. found_pos = find_policy.Find(text, delimiter, pos);
  47. if (found_pos != y_absl::string_view::npos) {
  48. found = y_absl::string_view(text.data() + found_pos,
  49. find_policy.Length(delimiter));
  50. }
  51. return found;
  52. }
  53. // Finds using y_absl::string_view::find(), therefore the length of the found
  54. // delimiter is delimiter.length().
  55. struct LiteralPolicy {
  56. static size_t Find(y_absl::string_view text, y_absl::string_view delimiter,
  57. size_t pos) {
  58. return text.find(delimiter, pos);
  59. }
  60. static size_t Length(y_absl::string_view delimiter) {
  61. return delimiter.length();
  62. }
  63. };
  64. // Finds using y_absl::string_view::find_first_of(), therefore the length of the
  65. // found delimiter is 1.
  66. struct AnyOfPolicy {
  67. static size_t Find(y_absl::string_view text, y_absl::string_view delimiter,
  68. size_t pos) {
  69. return text.find_first_of(delimiter, pos);
  70. }
  71. static size_t Length(y_absl::string_view /* delimiter */) { return 1; }
  72. };
  73. } // namespace
  74. //
  75. // ByString
  76. //
  77. ByString::ByString(y_absl::string_view sp) : delimiter_(sp) {}
  78. y_absl::string_view ByString::Find(y_absl::string_view text, size_t pos) const {
  79. if (delimiter_.length() == 1) {
  80. // Much faster to call find on a single character than on an
  81. // y_absl::string_view.
  82. size_t found_pos = text.find(delimiter_[0], pos);
  83. if (found_pos == y_absl::string_view::npos)
  84. return y_absl::string_view(text.data() + text.size(), 0);
  85. return text.substr(found_pos, 1);
  86. }
  87. return GenericFind(text, delimiter_, pos, LiteralPolicy());
  88. }
  89. //
  90. // ByChar
  91. //
  92. y_absl::string_view ByChar::Find(y_absl::string_view text, size_t pos) const {
  93. size_t found_pos = text.find(c_, pos);
  94. if (found_pos == y_absl::string_view::npos)
  95. return y_absl::string_view(text.data() + text.size(), 0);
  96. return text.substr(found_pos, 1);
  97. }
  98. //
  99. // ByAnyChar
  100. //
  101. ByAnyChar::ByAnyChar(y_absl::string_view sp) : delimiters_(sp) {}
  102. y_absl::string_view ByAnyChar::Find(y_absl::string_view text, size_t pos) const {
  103. return GenericFind(text, delimiters_, pos, AnyOfPolicy());
  104. }
  105. //
  106. // ByLength
  107. //
  108. ByLength::ByLength(ptrdiff_t length) : length_(length) {
  109. Y_ABSL_RAW_CHECK(length > 0, "");
  110. }
  111. y_absl::string_view ByLength::Find(y_absl::string_view text, size_t pos) const {
  112. pos = std::min(pos, text.size()); // truncate `pos`
  113. y_absl::string_view substr = text.substr(pos);
  114. // If the string is shorter than the chunk size we say we
  115. // "can't find the delimiter" so this will be the last chunk.
  116. if (substr.length() <= static_cast<size_t>(length_))
  117. return y_absl::string_view(text.data() + text.size(), 0);
  118. return y_absl::string_view(substr.data() + length_, 0);
  119. }
  120. Y_ABSL_NAMESPACE_END
  121. } // namespace y_absl