grammar_notation.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.grammar_notation
  4. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for grammer notations like BNF.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import RegexLexer, bygroups, include, this, using, words
  11. from pygments.token import Comment, Keyword, Literal, Name, Number, \
  12. Operator, Punctuation, String, Text
  13. __all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer']
  14. class BnfLexer(RegexLexer):
  15. """
  16. This lexer is for grammer notations which are similar to
  17. original BNF.
  18. In order to maximize a number of targets of this lexer,
  19. let's decide some designs:
  20. * We don't distinguish `Terminal Symbol`.
  21. * We do assume that `NonTerminal Symbol` are always enclosed
  22. with arrow brackets.
  23. * We do assume that `NonTerminal Symbol` may include
  24. any printable characters except arrow brackets and ASCII 0x20.
  25. This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.
  26. * We do assume that target notation doesn't support comment.
  27. * We don't distinguish any operators and punctuation except
  28. `::=`.
  29. Though these desision making might cause too minimal highlighting
  30. and you might be disappointed, but it is reasonable for us.
  31. .. versionadded:: 2.1
  32. """
  33. name = 'BNF'
  34. aliases = ['bnf']
  35. filenames = ['*.bnf']
  36. mimetypes = ['text/x-bnf']
  37. tokens = {
  38. 'root': [
  39. (r'(<)([ -;=?-~]+)(>)',
  40. bygroups(Punctuation, Name.Class, Punctuation)),
  41. # an only operator
  42. (r'::=', Operator),
  43. # fallback
  44. (r'[^<>:]+', Text), # for performance
  45. (r'.', Text),
  46. ],
  47. }
  48. class AbnfLexer(RegexLexer):
  49. """
  50. Lexer for `IETF 7405 ABNF
  51. <http://www.ietf.org/rfc/rfc7405.txt>`_
  52. (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_)
  53. grammars.
  54. .. versionadded:: 2.1
  55. """
  56. name = 'ABNF'
  57. aliases = ['abnf']
  58. filenames = ['*.abnf']
  59. mimetypes = ['text/x-abnf']
  60. _core_rules = (
  61. 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',
  62. 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',
  63. 'SP', 'VCHAR', 'WSP')
  64. tokens = {
  65. 'root': [
  66. # comment
  67. (r';.*$', Comment.Single),
  68. # quoted
  69. # double quote itself in this state, it is as '%x22'.
  70. (r'(%[si])?"[^"]*"', Literal),
  71. # binary (but i have never seen...)
  72. (r'%b[01]+\-[01]+\b', Literal), # range
  73. (r'%b[01]+(\.[01]+)*\b', Literal), # concat
  74. # decimal
  75. (r'%d[0-9]+\-[0-9]+\b', Literal), # range
  76. (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat
  77. # hexadecimal
  78. (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range
  79. (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat
  80. # repetition (<a>*<b>element) including nRule
  81. (r'\b[0-9]+\*[0-9]+', Operator),
  82. (r'\b[0-9]+\*', Operator),
  83. (r'\b[0-9]+', Operator),
  84. (r'\*', Operator),
  85. # Strictly speaking, these are not keyword but
  86. # are called `Core Rule'.
  87. (words(_core_rules, suffix=r'\b'), Keyword),
  88. # nonterminals (ALPHA *(ALPHA / DIGIT / "-"))
  89. (r'[a-zA-Z][a-zA-Z0-9-]+\b', Name.Class),
  90. # operators
  91. (r'(=/|=|/)', Operator),
  92. # punctuation
  93. (r'[\[\]()]', Punctuation),
  94. # fallback
  95. (r'\s+', Text),
  96. (r'.', Text),
  97. ],
  98. }
  99. class JsgfLexer(RegexLexer):
  100. """
  101. For `JSpeech Grammar Format <https://www.w3.org/TR/jsgf/>`_
  102. grammars.
  103. .. versionadded:: 2.2
  104. """
  105. name = 'JSGF'
  106. aliases = ['jsgf']
  107. filenames = ['*.jsgf']
  108. mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']
  109. flags = re.MULTILINE | re.UNICODE
  110. tokens = {
  111. 'root': [
  112. include('comments'),
  113. include('non-comments'),
  114. ],
  115. 'comments': [
  116. (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
  117. (r'/\*[\w\W]*?\*/', Comment.Multiline),
  118. (r'//.*', Comment.Single),
  119. ],
  120. 'non-comments': [
  121. (r'\A#JSGF[^;]*', Comment.Preproc),
  122. (r'\s+', Text),
  123. (r';', Punctuation),
  124. (r'[=|()\[\]*+]', Operator),
  125. (r'/[^/]+/', Number.Float),
  126. (r'"', String.Double, 'string'),
  127. (r'\{', String.Other, 'tag'),
  128. (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
  129. (r'grammar\b', Keyword.Reserved, 'grammar name'),
  130. (r'(<)(NULL|VOID)(>)',
  131. bygroups(Punctuation, Name.Builtin, Punctuation)),
  132. (r'<', Punctuation, 'rulename'),
  133. (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),
  134. ],
  135. 'string': [
  136. (r'"', String.Double, '#pop'),
  137. (r'\\.', String.Escape),
  138. (r'[^\\"]+', String.Double),
  139. ],
  140. 'tag': [
  141. (r'\}', String.Other, '#pop'),
  142. (r'\\.', String.Escape),
  143. (r'[^\\}]+', String.Other),
  144. ],
  145. 'grammar name': [
  146. (r';', Punctuation, '#pop'),
  147. (r'\s+', Text),
  148. (r'\.', Punctuation),
  149. (r'[^;\s.]+', Name.Namespace),
  150. ],
  151. 'rulename': [
  152. (r'>', Punctuation, '#pop'),
  153. (r'\*', Punctuation),
  154. (r'\s+', Text),
  155. (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
  156. (r'[^.>]+', Name.Constant),
  157. ],
  158. 'documentation comment': [
  159. (r'\*/', Comment.Multiline, '#pop'),
  160. (r'(^\s*\*?\s*)(@(?:example|see)\s+)'
  161. r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
  162. bygroups(Comment.Multiline, Comment.Special,
  163. using(this, state='example'))),
  164. (r'(^\s*\*?\s*)(@\S*)',
  165. bygroups(Comment.Multiline, Comment.Special)),
  166. (r'[^*\n@]+|\w|\W', Comment.Multiline),
  167. ],
  168. 'example': [
  169. (r'\n\s*\*', Comment.Multiline),
  170. include('non-comments'),
  171. (r'.', Comment.Multiline),
  172. ],
  173. }