write_escaped.h 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. // -*- C++ -*-
  2. //===----------------------------------------------------------------------===//
  3. //
  4. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5. // See https://llvm.org/LICENSE.txt for license information.
  6. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H
  10. #define _LIBCPP___FORMAT_WRITE_ESCAPED_H
  11. #include <__algorithm/ranges_copy.h>
  12. #include <__algorithm/ranges_for_each.h>
  13. #include <__charconv/to_chars_integral.h>
  14. #include <__charconv/to_chars_result.h>
  15. #include <__chrono/statically_widen.h>
  16. #include <__format/escaped_output_table.h>
  17. #include <__format/formatter_output.h>
  18. #include <__format/parser_std_format_spec.h>
  19. #include <__format/unicode.h>
  20. #include <__iterator/back_insert_iterator.h>
  21. #include <__memory/addressof.h>
  22. #include <__system_error/errc.h>
  23. #include <__type_traits/make_unsigned.h>
  24. #include <__utility/move.h>
  25. #include <string_view>
  26. #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
  27. # pragma GCC system_header
  28. #endif
  29. _LIBCPP_PUSH_MACROS
  30. #include <__undef_macros>
  31. _LIBCPP_BEGIN_NAMESPACE_STD
  32. namespace __formatter {
  33. #if _LIBCPP_STD_VER >= 20
  34. /// Writes a string using format's width estimation algorithm.
  35. ///
  36. /// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the
  37. /// input is ASCII.
  38. template <class _CharT>
  39. _LIBCPP_HIDE_FROM_ABI auto
  40. __write_string(basic_string_view<_CharT> __str,
  41. output_iterator<const _CharT&> auto __out_it,
  42. __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
  43. if (!__specs.__has_precision())
  44. return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs);
  45. int __size = __formatter::__truncate(__str, __specs.__precision_);
  46. return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size);
  47. }
  48. #endif // _LIBCPP_STD_VER >= 20
  49. #if _LIBCPP_STD_VER >= 23
  50. struct __nul_terminator {};
  51. template <class _CharT>
  52. _LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {
  53. return *__cstr == _CharT('\0');
  54. }
  55. template <class _CharT>
  56. _LIBCPP_HIDE_FROM_ABI void
  57. __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {
  58. back_insert_iterator __out_it{__str};
  59. std::ranges::copy(__prefix, __nul_terminator{}, __out_it);
  60. char __buffer[8];
  61. to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);
  62. _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small");
  63. std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);
  64. __str += _CharT('}');
  65. }
  66. // [format.string.escaped]/2.2.1.2
  67. // ...
  68. // then the sequence \u{hex-digit-sequence} is appended to E, where
  69. // hex-digit-sequence is the shortest hexadecimal representation of C using
  70. // lower-case hexadecimal digits.
  71. template <class _CharT>
  72. _LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {
  73. __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));
  74. }
  75. // [format.string.escaped]/2.2.3
  76. // Otherwise (X is a sequence of ill-formed code units), each code unit U is
  77. // appended to E in order as the sequence \x{hex-digit-sequence}, where
  78. // hex-digit-sequence is the shortest hexadecimal representation of U using
  79. // lower-case hexadecimal digits.
  80. template <class _CharT>
  81. _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {
  82. __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));
  83. }
  84. template <class _CharT>
  85. [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) {
  86. # ifdef _LIBCPP_HAS_NO_UNICODE
  87. // For ASCII assume everything above 127 is printable.
  88. if (__value > 127)
  89. return false;
  90. # endif
  91. if (!__escaped_output_table::__needs_escape(__value))
  92. return false;
  93. __formatter::__write_well_formed_escaped_code_unit(__str, __value);
  94. return true;
  95. }
  96. template <class _CharT>
  97. [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {
  98. return static_cast<make_unsigned_t<_CharT>>(__value);
  99. }
  100. enum class __escape_quotation_mark { __apostrophe, __double_quote };
  101. // [format.string.escaped]/2
  102. template <class _CharT>
  103. [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
  104. __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) {
  105. // 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
  106. switch (__value) {
  107. case _CharT('\t'):
  108. __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");
  109. return true;
  110. case _CharT('\n'):
  111. __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");
  112. return true;
  113. case _CharT('\r'):
  114. __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");
  115. return true;
  116. case _CharT('\''):
  117. if (__mark == __escape_quotation_mark::__apostrophe)
  118. __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");
  119. else
  120. __str += __value;
  121. return true;
  122. case _CharT('"'):
  123. if (__mark == __escape_quotation_mark::__double_quote)
  124. __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");
  125. else
  126. __str += __value;
  127. return true;
  128. case _CharT('\\'):
  129. __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");
  130. return true;
  131. // 2.2.1.2 - Space
  132. case _CharT(' '):
  133. __str += __value;
  134. return true;
  135. }
  136. // 2.2.2
  137. // Otherwise, if X is a shift sequence, the effect on E and further
  138. // decoding of S is unspecified.
  139. // For now shift sequences are ignored and treated as Unicode. Other parts
  140. // of the format library do the same. It's unknown how ostream treats them.
  141. // TODO FMT determine what to do with shift sequences.
  142. // 2.2.1.2.1 and 2.2.1.2.2 - Escape
  143. return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value));
  144. }
  145. template <class _CharT>
  146. _LIBCPP_HIDE_FROM_ABI void
  147. __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
  148. __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
  149. while (!__view.__at_end()) {
  150. auto __first = __view.__position();
  151. typename __unicode::__consume_result __result = __view.__consume();
  152. if (__result.__status == __unicode::__consume_result::__ok) {
  153. if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark))
  154. // 2.2.1.3 - Add the character
  155. ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
  156. } else {
  157. // 2.2.3 sequence of ill-formed code units
  158. ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
  159. __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
  160. });
  161. }
  162. }
  163. }
  164. template <class _CharT>
  165. _LIBCPP_HIDE_FROM_ABI auto
  166. __format_escaped_char(_CharT __value,
  167. output_iterator<const _CharT&> auto __out_it,
  168. __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
  169. basic_string<_CharT> __str;
  170. __str += _CharT('\'');
  171. __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);
  172. __str += _CharT('\'');
  173. return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size());
  174. }
  175. template <class _CharT>
  176. _LIBCPP_HIDE_FROM_ABI auto
  177. __format_escaped_string(basic_string_view<_CharT> __values,
  178. output_iterator<const _CharT&> auto __out_it,
  179. __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
  180. basic_string<_CharT> __str;
  181. __str += _CharT('"');
  182. __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);
  183. __str += _CharT('"');
  184. return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs);
  185. }
  186. #endif // _LIBCPP_STD_VER >= 23
  187. } // namespace __formatter
  188. _LIBCPP_END_NAMESPACE_STD
  189. _LIBCPP_POP_MACROS
  190. #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H