Markup.h 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- Markup.h -------------------------------------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. ///
  14. /// \file
  15. /// This file declares the log symbolizer markup data model and parser.
  16. ///
  17. /// See https://llvm.org/docs/SymbolizerMarkupFormat.html
  18. ///
  19. //===----------------------------------------------------------------------===//
  20. #ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
  21. #define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
  22. #include "llvm/ADT/SmallVector.h"
  23. #include "llvm/ADT/StringRef.h"
  24. #include "llvm/ADT/StringSet.h"
  25. #include "llvm/Support/Regex.h"
  26. namespace llvm {
  27. namespace symbolize {
  28. /// A node of symbolizer markup.
  29. ///
  30. /// If only the Text field is set, this represents a region of text outside a
  31. /// markup element. ANSI SGR control codes are also reported this way; if
  32. /// detected, then the control code will be the entirety of the Text field, and
  33. /// any surrounding text will be reported as preceding and following nodes.
  34. struct MarkupNode {
  35. /// The full text of this node in the input.
  36. StringRef Text;
  37. /// If this represents an element, the tag. Otherwise, empty.
  38. StringRef Tag;
  39. /// If this represents an element with fields, a list of the field contents.
  40. /// Otherwise, empty.
  41. SmallVector<StringRef> Fields;
  42. bool operator==(const MarkupNode &Other) const {
  43. return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields;
  44. }
  45. bool operator!=(const MarkupNode &Other) const { return !(*this == Other); }
  46. };
  47. /// Parses a log containing symbolizer markup into a sequence of nodes.
  48. class MarkupParser {
  49. public:
  50. MarkupParser(StringSet<> MultilineTags = {});
  51. /// Parses an individual \p Line of input.
  52. ///
  53. /// Nodes from the previous parseLine() call that haven't yet been extracted
  54. /// by nextNode() are discarded. The nodes returned by nextNode() may
  55. /// reference the input string, so it must be retained by the caller until the
  56. /// last use.
  57. ///
  58. /// Note that some elements may span multiple lines. If a line ends with the
  59. /// start of one of these elements, then no nodes will be produced until the
  60. /// either the end or something that cannot be part of an element is
  61. /// encountered. This may only occur after multiple calls to parseLine(),
  62. /// corresponding to the lines of the multi-line element.
  63. void parseLine(StringRef Line);
  64. /// Inform the parser of that the input stream has ended.
  65. ///
  66. /// This allows the parser to finish any deferred processing (e.g., an
  67. /// in-progress multi-line element) and may cause nextNode() to return
  68. /// additional nodes.
  69. void flush();
  70. /// Returns the next node in the input sequence.
  71. ///
  72. /// Calling nextNode() may invalidate the contents of the node returned by the
  73. /// previous call.
  74. ///
  75. /// \returns the next markup node or std::nullopt if none remain.
  76. std::optional<MarkupNode> nextNode();
  77. bool isSGR(const MarkupNode &Node) const {
  78. return SGRSyntax.match(Node.Text);
  79. }
  80. private:
  81. std::optional<MarkupNode> parseElement(StringRef Line);
  82. void parseTextOutsideMarkup(StringRef Text);
  83. std::optional<StringRef> parseMultiLineBegin(StringRef Line);
  84. std::optional<StringRef> parseMultiLineEnd(StringRef Line);
  85. // Tags of elements that can span multiple lines.
  86. const StringSet<> MultilineTags;
  87. // Contents of a multi-line element that has finished being parsed. Retained
  88. // to keep returned StringRefs for the contents valid.
  89. std::string FinishedMultiline;
  90. // Contents of a multi-line element that is still in the process of receiving
  91. // lines.
  92. std::string InProgressMultiline;
  93. // The line currently being parsed.
  94. StringRef Line;
  95. // Buffer for nodes parsed from the current line.
  96. SmallVector<MarkupNode> Buffer;
  97. // Next buffer index to return.
  98. size_t NextIdx;
  99. // Regular expression matching supported ANSI SGR escape sequences.
  100. const Regex SGRSyntax;
  101. };
  102. } // end namespace symbolize
  103. } // end namespace llvm
  104. #endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
  105. #ifdef __GNUC__
  106. #pragma GCC diagnostic pop
  107. #endif