format_analyser.h 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. #pragma once
  2. #include "format_arg.h"
  3. #include <util/generic/strbuf.h>
  4. #include <algorithm>
  5. #include <array>
  6. #include <string_view>
  7. namespace NYT::NDetail {
  8. ////////////////////////////////////////////////////////////////////////////////
  9. struct TFormatAnalyser
  10. {
  11. public:
  12. using TMarkerLocation = std::tuple<int, int>;
  13. // NB(arkady-e1ppa): Location is considered invalid (e.g. not filled)
  14. // if get<0> == get<1> == 0.
  15. template <class... TArgs>
  16. using TMarkerLocations = std::array<TMarkerLocation, sizeof...(TArgs)>;
  17. // NB(arkady-e1ppa): We can't cover all of them since that would require
  18. // dynamic storage for their coordinates and we do not have
  19. // constexpr context large enough to deallocate dynamic memory at the
  20. // correct time. Thus we store first 5 position and scanning afterwards
  21. // is pessimized. |-1| is for no position at all.
  22. // |-2| is used to imply runtime format.
  23. using TEscapeLocations = std::array<int, 5>;
  24. // TODO(arkady-e1ppa): Until clang-19 consteval functions
  25. // defined out of line produce symbols in rare cases
  26. // causing linker to crash.
  27. template <class... TArgs>
  28. static consteval auto AnalyzeFormat(std::string_view fmt)
  29. {
  30. return DoAnalyzeFormat<TArgs...>(fmt);
  31. }
  32. private:
  33. static consteval bool Contains(std::string_view sv, char symbol)
  34. {
  35. return sv.find(symbol) != std::string_view::npos;
  36. }
  37. struct TSpecifiers
  38. {
  39. std::string_view Conversion;
  40. std::string_view Flags;
  41. };
  42. template <class TArg>
  43. static consteval auto GetSpecifiers()
  44. {
  45. return TSpecifiers{
  46. .Conversion = std::string_view{
  47. std::data(TFormatArg<TArg>::ConversionSpecifiers),
  48. std::size(TFormatArg<TArg>::ConversionSpecifiers)},
  49. .Flags = std::string_view{
  50. std::data(TFormatArg<TArg>::FlagSpecifiers),
  51. std::size(TFormatArg<TArg>::FlagSpecifiers)},
  52. };
  53. }
  54. static constexpr char IntroductorySymbol = '%';
  55. template <class... TArgs>
  56. static consteval auto DoAnalyzeFormat(std::string_view format)
  57. {
  58. std::array<TSpecifiers, sizeof...(TArgs)> specifiers{GetSpecifiers<TArgs>()...};
  59. TMarkerLocations<TArgs...> markerLocations = {};
  60. TEscapeLocations escapeLocations = {};
  61. std::ranges::fill(escapeLocations, -1);
  62. int escapesCount = 0;
  63. int markerCount = 0;
  64. int currentMarkerStart = -1;
  65. for (int index = 0; index < std::ssize(format); ++index) {
  66. auto symbol = format[index];
  67. // Parse verbatim text.
  68. if (currentMarkerStart == -1) {
  69. if (symbol == IntroductorySymbol) {
  70. // Marker maybe begins.
  71. currentMarkerStart = index;
  72. }
  73. continue;
  74. }
  75. // NB: We check for %% first since
  76. // in order to verify if symbol is a specifier
  77. // we need markerCount to be within range of our
  78. // specifier array.
  79. if (symbol == IntroductorySymbol) {
  80. if (currentMarkerStart + 1 != index) {
  81. // '%a% detected'
  82. throw "You may not terminate flag sequence other than %% with \'%\' symbol";
  83. }
  84. // '%%' detected --- skip
  85. if (escapesCount < std::ssize(escapeLocations)) {
  86. escapeLocations[escapesCount] = currentMarkerStart;
  87. ++escapesCount;
  88. }
  89. currentMarkerStart = -1;
  90. continue;
  91. }
  92. // We are inside of marker.
  93. if (markerCount == std::ssize(markerLocations)) {
  94. // Too many markers
  95. throw "Number of arguments supplied to format is smaller than the number of flag sequences";
  96. }
  97. if (Contains(specifiers[markerCount].Conversion, symbol)) {
  98. // Marker has finished.
  99. markerLocations[markerCount]
  100. = std::tuple{currentMarkerStart, index + 1};
  101. currentMarkerStart = -1;
  102. ++markerCount;
  103. continue;
  104. }
  105. if (!Contains(specifiers[markerCount].Flags, symbol)) {
  106. throw "Symbol is not a valid flag specifier; See FlagSpecifiers";
  107. }
  108. }
  109. if (currentMarkerStart != -1) {
  110. // Runaway marker.
  111. throw "Unterminated flag sequence detected; Use \'%%\' to type plain %";
  112. }
  113. if (markerCount < std::ssize(markerLocations)) {
  114. // Missing markers.
  115. throw "Number of arguments supplied to format is greater than the number of flag sequences";
  116. }
  117. // TODO(arkady-e1ppa): Consider per-type verification
  118. // of markers.
  119. return std::tuple(markerLocations, escapeLocations);
  120. }
  121. };
  122. ////////////////////////////////////////////////////////////////////////////////
  123. } // namespace NYT::NDetail