parser.h 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. // Copyright 2020 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
  15. #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
  16. #include <stddef.h>
  17. #include <stdlib.h>
  18. #include <cassert>
  19. #include <cstring>
  20. #include <initializer_list>
  21. #include <memory>
  22. #include <string>
  23. #include <utility>
  24. #include <vector>
  25. #include "absl/base/config.h"
  26. #include "absl/base/optimization.h"
  27. #include "absl/strings/internal/str_format/checker.h"
  28. #include "absl/strings/internal/str_format/constexpr_parser.h"
  29. #include "absl/strings/internal/str_format/extension.h"
  30. #include "absl/strings/string_view.h"
  31. namespace absl {
  32. ABSL_NAMESPACE_BEGIN
  33. namespace str_format_internal {
  34. std::string LengthModToString(LengthMod v);
  35. const char* ConsumeUnboundConversionNoInline(const char* p, const char* end,
  36. UnboundConversion* conv,
  37. int* next_arg);
  38. // Parse the format string provided in 'src' and pass the identified items into
  39. // 'consumer'.
  40. // Text runs will be passed by calling
  41. // Consumer::Append(string_view);
  42. // ConversionItems will be passed by calling
  43. // Consumer::ConvertOne(UnboundConversion, string_view);
  44. // In the case of ConvertOne, the string_view that is passed is the
  45. // portion of the format string corresponding to the conversion, not including
  46. // the leading %. On success, it returns true. On failure, it stops and returns
  47. // false.
  48. template <typename Consumer>
  49. bool ParseFormatString(string_view src, Consumer consumer) {
  50. int next_arg = 0;
  51. const char* p = src.data();
  52. const char* const end = p + src.size();
  53. while (p != end) {
  54. const char* percent =
  55. static_cast<const char*>(memchr(p, '%', static_cast<size_t>(end - p)));
  56. if (!percent) {
  57. // We found the last substring.
  58. return consumer.Append(string_view(p, static_cast<size_t>(end - p)));
  59. }
  60. // We found a percent, so push the text run then process the percent.
  61. if (ABSL_PREDICT_FALSE(!consumer.Append(
  62. string_view(p, static_cast<size_t>(percent - p))))) {
  63. return false;
  64. }
  65. if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
  66. auto tag = GetTagForChar(percent[1]);
  67. if (tag.is_conv()) {
  68. if (ABSL_PREDICT_FALSE(next_arg < 0)) {
  69. // This indicates an error in the format string.
  70. // The only way to get `next_arg < 0` here is to have a positional
  71. // argument first which sets next_arg to -1 and then a non-positional
  72. // argument.
  73. return false;
  74. }
  75. p = percent + 2;
  76. // Keep this case separate from the one below.
  77. // ConvertOne is more efficient when the compiler can see that the `basic`
  78. // flag is set.
  79. UnboundConversion conv;
  80. conv.conv = tag.as_conv();
  81. conv.arg_position = ++next_arg;
  82. if (ABSL_PREDICT_FALSE(
  83. !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
  84. return false;
  85. }
  86. } else if (percent[1] != '%') {
  87. UnboundConversion conv;
  88. p = ConsumeUnboundConversionNoInline(percent + 1, end, &conv, &next_arg);
  89. if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
  90. if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
  91. conv, string_view(percent + 1,
  92. static_cast<size_t>(p - (percent + 1)))))) {
  93. return false;
  94. }
  95. } else {
  96. if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
  97. p = percent + 2;
  98. continue;
  99. }
  100. }
  101. return true;
  102. }
  103. // Always returns true, or fails to compile in a constexpr context if s does not
  104. // point to a constexpr char array.
  105. constexpr bool EnsureConstexpr(string_view s) {
  106. return s.empty() || s[0] == s[0];
  107. }
  108. class ParsedFormatBase {
  109. public:
  110. explicit ParsedFormatBase(
  111. string_view format, bool allow_ignored,
  112. std::initializer_list<FormatConversionCharSet> convs);
  113. ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
  114. ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
  115. ParsedFormatBase& operator=(const ParsedFormatBase& other) {
  116. if (this == &other) return *this;
  117. has_error_ = other.has_error_;
  118. items_ = other.items_;
  119. size_t text_size = items_.empty() ? 0 : items_.back().text_end;
  120. data_.reset(new char[text_size]);
  121. memcpy(data_.get(), other.data_.get(), text_size);
  122. return *this;
  123. }
  124. ParsedFormatBase& operator=(ParsedFormatBase&& other) {
  125. if (this == &other) return *this;
  126. has_error_ = other.has_error_;
  127. data_ = std::move(other.data_);
  128. items_ = std::move(other.items_);
  129. // Reset the vector to make sure the invariants hold.
  130. other.items_.clear();
  131. return *this;
  132. }
  133. template <typename Consumer>
  134. bool ProcessFormat(Consumer consumer) const {
  135. const char* const base = data_.get();
  136. string_view text(base, 0);
  137. for (const auto& item : items_) {
  138. const char* const end = text.data() + text.size();
  139. text =
  140. string_view(end, static_cast<size_t>((base + item.text_end) - end));
  141. if (item.is_conversion) {
  142. if (!consumer.ConvertOne(item.conv, text)) return false;
  143. } else {
  144. if (!consumer.Append(text)) return false;
  145. }
  146. }
  147. return !has_error_;
  148. }
  149. bool has_error() const { return has_error_; }
  150. private:
  151. // Returns whether the conversions match and if !allow_ignored it verifies
  152. // that all conversions are used by the format.
  153. bool MatchesConversions(
  154. bool allow_ignored,
  155. std::initializer_list<FormatConversionCharSet> convs) const;
  156. struct ParsedFormatConsumer;
  157. struct ConversionItem {
  158. bool is_conversion;
  159. // Points to the past-the-end location of this element in the data_ array.
  160. size_t text_end;
  161. UnboundConversion conv;
  162. };
  163. bool has_error_;
  164. std::unique_ptr<char[]> data_;
  165. std::vector<ConversionItem> items_;
  166. };
  167. // A value type representing a preparsed format. These can be created, copied
  168. // around, and reused to speed up formatting loops.
  169. // The user must specify through the template arguments the conversion
  170. // characters used in the format. This will be checked at compile time.
  171. //
  172. // This class uses Conv enum values to specify each argument.
  173. // This allows for more flexibility as you can specify multiple possible
  174. // conversion characters for each argument.
  175. // ParsedFormat<char...> is a simplified alias for when the user only
  176. // needs to specify a single conversion character for each argument.
  177. //
  178. // Example:
  179. // // Extended format supports multiple characters per argument:
  180. // using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
  181. // MyFormat GetFormat(bool use_hex) {
  182. // if (use_hex) return MyFormat("foo %x bar");
  183. // return MyFormat("foo %d bar");
  184. // }
  185. // // 'format' can be used with any value that supports 'd' and 'x',
  186. // // like `int`.
  187. // auto format = GetFormat(use_hex);
  188. // value = StringF(format, i);
  189. //
  190. // This class also supports runtime format checking with the ::New() and
  191. // ::NewAllowIgnored() factory functions.
  192. // This is the only API that allows the user to pass a runtime specified format
  193. // string. These factory functions will return NULL if the format does not match
  194. // the conversions requested by the user.
  195. template <FormatConversionCharSet... C>
  196. class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
  197. public:
  198. explicit ExtendedParsedFormat(string_view format)
  199. #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
  200. __attribute__((
  201. enable_if(str_format_internal::EnsureConstexpr(format),
  202. "Format string is not constexpr."),
  203. enable_if(str_format_internal::ValidFormatImpl<C...>(format),
  204. "Format specified does not match the template arguments.")))
  205. #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
  206. : ExtendedParsedFormat(format, false) {
  207. }
  208. // ExtendedParsedFormat factory function.
  209. // The user still has to specify the conversion characters, but they will not
  210. // be checked at compile time. Instead, it will be checked at runtime.
  211. // This delays the checking to runtime, but allows the user to pass
  212. // dynamically sourced formats.
  213. // It returns NULL if the format does not match the conversion characters.
  214. // The user is responsible for checking the return value before using it.
  215. //
  216. // The 'New' variant will check that all the specified arguments are being
  217. // consumed by the format and return NULL if any argument is being ignored.
  218. // The 'NewAllowIgnored' variant will not verify this and will allow formats
  219. // that ignore arguments.
  220. static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
  221. return New(format, false);
  222. }
  223. static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
  224. string_view format) {
  225. return New(format, true);
  226. }
  227. private:
  228. static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
  229. bool allow_ignored) {
  230. std::unique_ptr<ExtendedParsedFormat> conv(
  231. new ExtendedParsedFormat(format, allow_ignored));
  232. if (conv->has_error()) return nullptr;
  233. return conv;
  234. }
  235. ExtendedParsedFormat(string_view s, bool allow_ignored)
  236. : ParsedFormatBase(s, allow_ignored, {C...}) {}
  237. };
  238. } // namespace str_format_internal
  239. ABSL_NAMESPACE_END
  240. } // namespace absl
  241. #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_