grammar_notation.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. """
  2. pygments.lexers.grammar_notation
  3. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for grammar notations like BNF.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from pygments.lexer import RegexLexer, bygroups, include, this, using, words
  9. from pygments.token import Comment, Keyword, Literal, Name, Number, \
  10. Operator, Punctuation, String, Text, Whitespace
  11. __all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']
  12. class BnfLexer(RegexLexer):
  13. """
  14. This lexer is for grammar notations which are similar to
  15. original BNF.
  16. In order to maximize a number of targets of this lexer,
  17. let's decide some designs:
  18. * We don't distinguish `Terminal Symbol`.
  19. * We do assume that `NonTerminal Symbol` are always enclosed
  20. with arrow brackets.
  21. * We do assume that `NonTerminal Symbol` may include
  22. any printable characters except arrow brackets and ASCII 0x20.
  23. This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.
  24. * We do assume that target notation doesn't support comment.
  25. * We don't distinguish any operators and punctuation except
  26. `::=`.
  27. Though these decision making might cause too minimal highlighting
  28. and you might be disappointed, but it is reasonable for us.
  29. """
  30. name = 'BNF'
  31. aliases = ['bnf']
  32. filenames = ['*.bnf']
  33. mimetypes = ['text/x-bnf']
  34. url = 'https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form'
  35. version_added = '2.1'
  36. tokens = {
  37. 'root': [
  38. (r'(<)([ -;=?-~]+)(>)',
  39. bygroups(Punctuation, Name.Class, Punctuation)),
  40. # an only operator
  41. (r'::=', Operator),
  42. # fallback
  43. (r'[^<>:]+', Text), # for performance
  44. (r'.', Text),
  45. ],
  46. }
  47. class AbnfLexer(RegexLexer):
  48. """
  49. Lexer for IETF 7405 ABNF.
  50. (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars.
  51. """
  52. name = 'ABNF'
  53. url = 'http://www.ietf.org/rfc/rfc7405.txt'
  54. aliases = ['abnf']
  55. filenames = ['*.abnf']
  56. mimetypes = ['text/x-abnf']
  57. version_added = '2.1'
  58. _core_rules = (
  59. 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',
  60. 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',
  61. 'SP', 'VCHAR', 'WSP')
  62. tokens = {
  63. 'root': [
  64. # comment
  65. (r';.*$', Comment.Single),
  66. # quoted
  67. # double quote itself in this state, it is as '%x22'.
  68. (r'(%[si])?"[^"]*"', Literal),
  69. # binary (but i have never seen...)
  70. (r'%b[01]+\-[01]+\b', Literal), # range
  71. (r'%b[01]+(\.[01]+)*\b', Literal), # concat
  72. # decimal
  73. (r'%d[0-9]+\-[0-9]+\b', Literal), # range
  74. (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat
  75. # hexadecimal
  76. (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range
  77. (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat
  78. # repetition (<a>*<b>element) including nRule
  79. (r'\b[0-9]+\*[0-9]+', Operator),
  80. (r'\b[0-9]+\*', Operator),
  81. (r'\b[0-9]+', Operator),
  82. (r'\*', Operator),
  83. # Strictly speaking, these are not keyword but
  84. # are called `Core Rule'.
  85. (words(_core_rules, suffix=r'\b'), Keyword),
  86. # nonterminals (ALPHA *(ALPHA / DIGIT / "-"))
  87. (r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),
  88. # operators
  89. (r'(=/|=|/)', Operator),
  90. # punctuation
  91. (r'[\[\]()]', Punctuation),
  92. # fallback
  93. (r'\s+', Whitespace),
  94. (r'.', Text),
  95. ],
  96. }
  97. class JsgfLexer(RegexLexer):
  98. """
  99. For JSpeech Grammar Format grammars.
  100. """
  101. name = 'JSGF'
  102. url = 'https://www.w3.org/TR/jsgf/'
  103. aliases = ['jsgf']
  104. filenames = ['*.jsgf']
  105. mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']
  106. version_added = '2.2'
  107. tokens = {
  108. 'root': [
  109. include('comments'),
  110. include('non-comments'),
  111. ],
  112. 'comments': [
  113. (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
  114. (r'/\*[\w\W]*?\*/', Comment.Multiline),
  115. (r'//.*$', Comment.Single),
  116. ],
  117. 'non-comments': [
  118. (r'\A#JSGF[^;]*', Comment.Preproc),
  119. (r'\s+', Whitespace),
  120. (r';', Punctuation),
  121. (r'[=|()\[\]*+]', Operator),
  122. (r'/[^/]+/', Number.Float),
  123. (r'"', String.Double, 'string'),
  124. (r'\{', String.Other, 'tag'),
  125. (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
  126. (r'grammar\b', Keyword.Reserved, 'grammar name'),
  127. (r'(<)(NULL|VOID)(>)',
  128. bygroups(Punctuation, Name.Builtin, Punctuation)),
  129. (r'<', Punctuation, 'rulename'),
  130. (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),
  131. ],
  132. 'string': [
  133. (r'"', String.Double, '#pop'),
  134. (r'\\.', String.Escape),
  135. (r'[^\\"]+', String.Double),
  136. ],
  137. 'tag': [
  138. (r'\}', String.Other, '#pop'),
  139. (r'\\.', String.Escape),
  140. (r'[^\\}]+', String.Other),
  141. ],
  142. 'grammar name': [
  143. (r';', Punctuation, '#pop'),
  144. (r'\s+', Whitespace),
  145. (r'\.', Punctuation),
  146. (r'[^;\s.]+', Name.Namespace),
  147. ],
  148. 'rulename': [
  149. (r'>', Punctuation, '#pop'),
  150. (r'\*', Punctuation),
  151. (r'\s+', Whitespace),
  152. (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
  153. (r'[^.>]+', Name.Constant),
  154. ],
  155. 'documentation comment': [
  156. (r'\*/', Comment.Multiline, '#pop'),
  157. (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'
  158. r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
  159. bygroups(Whitespace, Comment.Multiline, Whitespace, Comment.Special,
  160. Whitespace, using(this, state='example'))),
  161. (r'(^\s*\*?\s*)(@\S*)',
  162. bygroups(Comment.Multiline, Comment.Special)),
  163. (r'[^*\n@]+|\w|\W', Comment.Multiline),
  164. ],
  165. 'example': [
  166. (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),
  167. include('non-comments'),
  168. (r'.', Comment.Multiline),
  169. ],
  170. }
  171. class PegLexer(RegexLexer):
  172. """
  173. This lexer is for Parsing Expression Grammars (PEG).
  174. Various implementations of PEG have made different decisions
  175. regarding the syntax, so let's try to be accommodating:
  176. * `<-`, `←`, `:`, and `=` are all accepted as rule operators.
  177. * Both `|` and `/` are choice operators.
  178. * `^`, `↑`, and `~` are cut operators.
  179. * A single `a-z` character immediately before a string, or
  180. multiple `a-z` characters following a string, are part of the
  181. string (e.g., `r"..."` or `"..."ilmsuxa`).
  182. """
  183. name = 'PEG'
  184. url = 'https://bford.info/pub/lang/peg.pdf'
  185. aliases = ['peg']
  186. filenames = ['*.peg']
  187. mimetypes = ['text/x-peg']
  188. version_added = '2.6'
  189. tokens = {
  190. 'root': [
  191. # Comments
  192. (r'#.*$', Comment.Single),
  193. # All operators
  194. (r'<-|[←:=/|&!?*+^↑~]', Operator),
  195. # Other punctuation
  196. (r'[()]', Punctuation),
  197. # Keywords
  198. (r'\.', Keyword),
  199. # Character classes
  200. (r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])',
  201. bygroups(Punctuation, String, Punctuation)),
  202. # Single and double quoted strings (with optional modifiers)
  203. (r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double),
  204. (r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single),
  205. # Nonterminals are not whitespace, operators, or punctuation
  206. (r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class),
  207. # Fallback
  208. (r'.', Text),
  209. ],
  210. }