Markup.cpp 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file defines the log symbolizer markup data model and parser.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/DebugInfo/Symbolize/Markup.h"
  14. #include "llvm/ADT/STLExtras.h"
  15. #include "llvm/ADT/StringExtras.h"
  16. namespace llvm {
  17. namespace symbolize {
  18. // Matches the following:
  19. // "\033[0m"
  20. // "\033[1m"
  21. // "\033[30m" -- "\033[37m"
  22. static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
  23. MarkupParser::MarkupParser(StringSet<> MultilineTags)
  24. : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
  25. static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
  26. return Str.take_front(Pos - Str.begin());
  27. }
  28. static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
  29. Str = Str.drop_front(Pos - Str.begin());
  30. }
  31. void MarkupParser::parseLine(StringRef Line) {
  32. Buffer.clear();
  33. NextIdx = 0;
  34. FinishedMultiline.clear();
  35. this->Line = Line;
  36. }
  37. std::optional<MarkupNode> MarkupParser::nextNode() {
  38. // Pull something out of the buffer if possible.
  39. if (!Buffer.empty()) {
  40. if (NextIdx < Buffer.size())
  41. return std::move(Buffer[NextIdx++]);
  42. NextIdx = 0;
  43. Buffer.clear();
  44. }
  45. // The buffer is empty, so parse the next bit of the line.
  46. if (Line.empty())
  47. return std::nullopt;
  48. if (!InProgressMultiline.empty()) {
  49. if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
  50. llvm::append_range(InProgressMultiline, *MultilineEnd);
  51. assert(FinishedMultiline.empty() &&
  52. "At most one multi-line element can be finished at a time.");
  53. FinishedMultiline.swap(InProgressMultiline);
  54. // Parse the multi-line element as if it were contiguous.
  55. advanceTo(Line, MultilineEnd->end());
  56. return *parseElement(FinishedMultiline);
  57. }
  58. // The whole line is part of the multi-line element.
  59. llvm::append_range(InProgressMultiline, Line);
  60. Line = Line.drop_front(Line.size());
  61. return std::nullopt;
  62. }
  63. // Find the first valid markup element, if any.
  64. if (std::optional<MarkupNode> Element = parseElement(Line)) {
  65. parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
  66. Buffer.push_back(std::move(*Element));
  67. advanceTo(Line, Element->Text.end());
  68. return nextNode();
  69. }
  70. // Since there were no valid elements remaining, see if the line opens a
  71. // multi-line element.
  72. if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
  73. // Emit any text before the element.
  74. parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
  75. // Begin recording the multi-line element.
  76. llvm::append_range(InProgressMultiline, *MultilineBegin);
  77. Line = Line.drop_front(Line.size());
  78. return nextNode();
  79. }
  80. // The line doesn't contain any more markup elements, so emit it as text.
  81. parseTextOutsideMarkup(Line);
  82. Line = Line.drop_front(Line.size());
  83. return nextNode();
  84. }
  85. void MarkupParser::flush() {
  86. Buffer.clear();
  87. NextIdx = 0;
  88. Line = {};
  89. if (InProgressMultiline.empty())
  90. return;
  91. FinishedMultiline.swap(InProgressMultiline);
  92. parseTextOutsideMarkup(FinishedMultiline);
  93. }
  94. // Finds and returns the next valid markup element in the given line. Returns
  95. // std::nullopt if the line contains no valid elements.
  96. std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
  97. while (true) {
  98. // Find next element using begin and end markers.
  99. size_t BeginPos = Line.find("{{{");
  100. if (BeginPos == StringRef::npos)
  101. return std::nullopt;
  102. size_t EndPos = Line.find("}}}", BeginPos + 3);
  103. if (EndPos == StringRef::npos)
  104. return std::nullopt;
  105. EndPos += 3;
  106. MarkupNode Element;
  107. Element.Text = Line.slice(BeginPos, EndPos);
  108. Line = Line.substr(EndPos);
  109. // Parse tag.
  110. StringRef Content = Element.Text.drop_front(3).drop_back(3);
  111. StringRef FieldsContent;
  112. std::tie(Element.Tag, FieldsContent) = Content.split(':');
  113. if (Element.Tag.empty())
  114. continue;
  115. // Parse fields.
  116. if (!FieldsContent.empty())
  117. FieldsContent.split(Element.Fields, ":");
  118. else if (Content.back() == ':')
  119. Element.Fields.push_back(FieldsContent);
  120. return Element;
  121. }
  122. }
  123. static MarkupNode textNode(StringRef Text) {
  124. MarkupNode Node;
  125. Node.Text = Text;
  126. return Node;
  127. }
  128. // Parses a region of text known to be outside any markup elements. Such text
  129. // may still contain SGR control codes, so the region is further subdivided into
  130. // control codes and true text regions.
  131. void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
  132. if (Text.empty())
  133. return;
  134. SmallVector<StringRef> Matches;
  135. while (SGRSyntax.match(Text, &Matches)) {
  136. // Emit any text before the SGR element.
  137. if (Matches.begin()->begin() != Text.begin())
  138. Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
  139. Buffer.push_back(textNode(*Matches.begin()));
  140. advanceTo(Text, Matches.begin()->end());
  141. }
  142. if (!Text.empty())
  143. Buffer.push_back(textNode(Text));
  144. }
  145. // Given that a line doesn't contain any valid markup, see if it ends with the
  146. // start of a multi-line element. If so, returns the beginning.
  147. std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
  148. // A multi-line begin marker must be the last one on the line.
  149. size_t BeginPos = Line.rfind("{{{");
  150. if (BeginPos == StringRef::npos)
  151. return std::nullopt;
  152. size_t BeginTagPos = BeginPos + 3;
  153. // If there are any end markers afterwards, the begin marker cannot belong to
  154. // a multi-line element.
  155. size_t EndPos = Line.find("}}}", BeginTagPos);
  156. if (EndPos != StringRef::npos)
  157. return std::nullopt;
  158. // Check whether the tag is registered multi-line.
  159. size_t EndTagPos = Line.find(':', BeginTagPos);
  160. if (EndTagPos == StringRef::npos)
  161. return std::nullopt;
  162. StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
  163. if (!MultilineTags.contains(Tag))
  164. return std::nullopt;
  165. return Line.substr(BeginPos);
  166. }
  167. // See if the line begins with the ending of an in-progress multi-line element.
  168. // If so, return the ending.
  169. std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
  170. size_t EndPos = Line.find("}}}");
  171. if (EndPos == StringRef::npos)
  172. return std::nullopt;
  173. return Line.take_front(EndPos + 3);
  174. }
  175. } // end namespace symbolize
  176. } // end namespace llvm