parsers.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798
  1. """
  2. pygments.lexers.parsers
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for parser generators.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, DelegatingLexer, \
  10. include, bygroups, using
  11. from pygments.token import Punctuation, Other, Text, Comment, Operator, \
  12. Keyword, Name, String, Number, Whitespace
  13. from pygments.lexers.jvm import JavaLexer
  14. from pygments.lexers.c_cpp import CLexer, CppLexer
  15. from pygments.lexers.objective import ObjectiveCLexer
  16. from pygments.lexers.d import DLexer
  17. from pygments.lexers.dotnet import CSharpLexer
  18. from pygments.lexers.ruby import RubyLexer
  19. from pygments.lexers.python import PythonLexer
  20. from pygments.lexers.perl import PerlLexer
  21. __all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
  22. 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
  23. 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
  24. 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
  25. 'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
  26. 'AntlrJavaLexer', 'AntlrActionScriptLexer',
  27. 'TreetopLexer', 'EbnfLexer']
  28. class RagelLexer(RegexLexer):
  29. """A pure `Ragel <www.colm.net/open-source/ragel>`_ lexer. Use this
  30. for fragments of Ragel. For ``.rl`` files, use
  31. :class:`RagelEmbeddedLexer` instead (or one of the
  32. language-specific subclasses).
  33. """
  34. name = 'Ragel'
  35. url = 'http://www.colm.net/open-source/ragel/'
  36. aliases = ['ragel']
  37. filenames = []
  38. version_added = '1.1'
  39. tokens = {
  40. 'whitespace': [
  41. (r'\s+', Whitespace)
  42. ],
  43. 'comments': [
  44. (r'\#.*$', Comment),
  45. ],
  46. 'keywords': [
  47. (r'(access|action|alphtype)\b', Keyword),
  48. (r'(getkey|write|machine|include)\b', Keyword),
  49. (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
  50. (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)
  51. ],
  52. 'numbers': [
  53. (r'0x[0-9A-Fa-f]+', Number.Hex),
  54. (r'[+-]?[0-9]+', Number.Integer),
  55. ],
  56. 'literals': [
  57. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  58. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  59. (r'\[(\\\\|\\[^\\]|[^\\\]])*\]', String), # square bracket literals
  60. (r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', String.Regex), # regular expressions
  61. ],
  62. 'identifiers': [
  63. (r'[a-zA-Z_]\w*', Name.Variable),
  64. ],
  65. 'operators': [
  66. (r',', Operator), # Join
  67. (r'\||&|--?', Operator), # Union, Intersection and Subtraction
  68. (r'\.|<:|:>>?', Operator), # Concatention
  69. (r':', Operator), # Label
  70. (r'->', Operator), # Epsilon Transition
  71. (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
  72. (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
  73. (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
  74. (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
  75. (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
  76. (r'>|@|\$|%', Operator), # Transition Actions and Priorities
  77. (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator), # Repetition
  78. (r'!|\^', Operator), # Negation
  79. (r'\(|\)', Operator), # Grouping
  80. ],
  81. 'root': [
  82. include('literals'),
  83. include('whitespace'),
  84. include('comments'),
  85. include('keywords'),
  86. include('numbers'),
  87. include('identifiers'),
  88. include('operators'),
  89. (r'\{', Punctuation, 'host'),
  90. (r'=', Operator),
  91. (r';', Punctuation),
  92. ],
  93. 'host': [
  94. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  95. r'[^{}\'"/#]+', # exclude unsafe characters
  96. r'[^\\]\\[{}]', # allow escaped { or }
  97. # strings and comments may safely contain unsafe characters
  98. r'"(\\\\|\\[^\\]|[^"\\])*"',
  99. r"'(\\\\|\\[^\\]|[^'\\])*'",
  100. r'//.*$\n?', # single line comment
  101. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  102. r'\#.*$\n?', # ruby comment
  103. # regular expression: There's no reason for it to start
  104. # with a * and this stops confusion with comments.
  105. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
  106. # / is safe now that we've handled regex and javadoc comments
  107. r'/',
  108. )) + r')+', Other),
  109. (r'\{', Punctuation, '#push'),
  110. (r'\}', Punctuation, '#pop'),
  111. ],
  112. }
  113. class RagelEmbeddedLexer(RegexLexer):
  114. """
  115. A lexer for Ragel embedded in a host language file.
  116. This will only highlight Ragel statements. If you want host language
  117. highlighting then call the language-specific Ragel lexer.
  118. """
  119. name = 'Embedded Ragel'
  120. aliases = ['ragel-em']
  121. filenames = ['*.rl']
  122. url = 'http://www.colm.net/open-source/ragel/'
  123. version_added = '1.1'
  124. tokens = {
  125. 'root': [
  126. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  127. r'[^%\'"/#]+', # exclude unsafe characters
  128. r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
  129. # strings and comments may safely contain unsafe characters
  130. r'"(\\\\|\\[^\\]|[^"\\])*"',
  131. r"'(\\\\|\\[^\\]|[^'\\])*'",
  132. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  133. r'//.*$\n?', # single line comment
  134. r'\#.*$\n?', # ruby/ragel comment
  135. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # regular expression
  136. # / is safe now that we've handled regex and javadoc comments
  137. r'/',
  138. )) + r')+', Other),
  139. # Single Line FSM.
  140. # Please don't put a quoted newline in a single line FSM.
  141. # That's just mean. It will break this.
  142. (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation,
  143. using(RagelLexer),
  144. Punctuation, Text)),
  145. # Multi Line FSM.
  146. (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'),
  147. ],
  148. 'multi-line-fsm': [
  149. (r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
  150. r'(' + r'|'.join((
  151. r'[^}\'"\[/#]', # exclude unsafe characters
  152. r'\}(?=[^%]|$)', # } is okay as long as it's not followed by %
  153. r'\}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
  154. r'[^\\]\\[{}]', # ...and } is okay if it's escaped
  155. # allow / if it's preceded with one of these symbols
  156. # (ragel EOF actions)
  157. r'(>|\$|%|<|@|<>)/',
  158. # specifically allow regex followed immediately by *
  159. # so it doesn't get mistaken for a comment
  160. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/\*',
  161. # allow / as long as it's not followed by another / or by a *
  162. r'/(?=[^/*]|$)',
  163. # We want to match as many of these as we can in one block.
  164. # Not sure if we need the + sign here,
  165. # does it help performance?
  166. )) + r')+',
  167. # strings and comments may safely contain unsafe characters
  168. r'"(\\\\|\\[^\\]|[^"\\])*"',
  169. r"'(\\\\|\\[^\\]|[^'\\])*'",
  170. r"\[(\\\\|\\[^\\]|[^\]\\])*\]", # square bracket literal
  171. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  172. r'//.*$\n?', # single line comment
  173. r'\#.*$\n?', # ruby/ragel comment
  174. )) + r')+', using(RagelLexer)),
  175. (r'\}%%', Punctuation, '#pop'),
  176. ]
  177. }
  178. def analyse_text(text):
  179. return '@LANG: indep' in text
  180. class RagelRubyLexer(DelegatingLexer):
  181. """
  182. A lexer for Ragel in a Ruby host file.
  183. """
  184. name = 'Ragel in Ruby Host'
  185. aliases = ['ragel-ruby', 'ragel-rb']
  186. filenames = ['*.rl']
  187. url = 'http://www.colm.net/open-source/ragel/'
  188. version_added = '1.1'
  189. def __init__(self, **options):
  190. super().__init__(RubyLexer, RagelEmbeddedLexer, **options)
  191. def analyse_text(text):
  192. return '@LANG: ruby' in text
  193. class RagelCLexer(DelegatingLexer):
  194. """
  195. A lexer for Ragel in a C host file.
  196. """
  197. name = 'Ragel in C Host'
  198. aliases = ['ragel-c']
  199. filenames = ['*.rl']
  200. url = 'http://www.colm.net/open-source/ragel/'
  201. version_added = '1.1'
  202. def __init__(self, **options):
  203. super().__init__(CLexer, RagelEmbeddedLexer, **options)
  204. def analyse_text(text):
  205. return '@LANG: c' in text
  206. class RagelDLexer(DelegatingLexer):
  207. """
  208. A lexer for Ragel in a D host file.
  209. """
  210. name = 'Ragel in D Host'
  211. aliases = ['ragel-d']
  212. filenames = ['*.rl']
  213. url = 'http://www.colm.net/open-source/ragel/'
  214. version_added = '1.1'
  215. def __init__(self, **options):
  216. super().__init__(DLexer, RagelEmbeddedLexer, **options)
  217. def analyse_text(text):
  218. return '@LANG: d' in text
  219. class RagelCppLexer(DelegatingLexer):
  220. """
  221. A lexer for Ragel in a C++ host file.
  222. """
  223. name = 'Ragel in CPP Host'
  224. aliases = ['ragel-cpp']
  225. filenames = ['*.rl']
  226. url = 'http://www.colm.net/open-source/ragel/'
  227. version_added = '1.1'
  228. def __init__(self, **options):
  229. super().__init__(CppLexer, RagelEmbeddedLexer, **options)
  230. def analyse_text(text):
  231. return '@LANG: c++' in text
  232. class RagelObjectiveCLexer(DelegatingLexer):
  233. """
  234. A lexer for Ragel in an Objective C host file.
  235. """
  236. name = 'Ragel in Objective C Host'
  237. aliases = ['ragel-objc']
  238. filenames = ['*.rl']
  239. url = 'http://www.colm.net/open-source/ragel/'
  240. version_added = '1.1'
  241. def __init__(self, **options):
  242. super().__init__(ObjectiveCLexer, RagelEmbeddedLexer, **options)
  243. def analyse_text(text):
  244. return '@LANG: objc' in text
  245. class RagelJavaLexer(DelegatingLexer):
  246. """
  247. A lexer for Ragel in a Java host file.
  248. """
  249. name = 'Ragel in Java Host'
  250. aliases = ['ragel-java']
  251. filenames = ['*.rl']
  252. url = 'http://www.colm.net/open-source/ragel/'
  253. version_added = '1.1'
  254. def __init__(self, **options):
  255. super().__init__(JavaLexer, RagelEmbeddedLexer, **options)
  256. def analyse_text(text):
  257. return '@LANG: java' in text
  258. class AntlrLexer(RegexLexer):
  259. """
  260. Generic ANTLR Lexer.
  261. Should not be called directly, instead
  262. use DelegatingLexer for your target language.
  263. """
  264. name = 'ANTLR'
  265. aliases = ['antlr']
  266. filenames = []
  267. url = 'https://www.antlr.org'
  268. version_added = '1.1'
  269. _id = r'[A-Za-z]\w*'
  270. _TOKEN_REF = r'[A-Z]\w*'
  271. _RULE_REF = r'[a-z]\w*'
  272. _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
  273. _INT = r'[0-9]+'
  274. tokens = {
  275. 'whitespace': [
  276. (r'\s+', Whitespace),
  277. ],
  278. 'comments': [
  279. (r'//.*$', Comment),
  280. (r'/\*(.|\n)*?\*/', Comment),
  281. ],
  282. 'root': [
  283. include('whitespace'),
  284. include('comments'),
  285. (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
  286. bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
  287. Punctuation)),
  288. # optionsSpec
  289. (r'options\b', Keyword, 'options'),
  290. # tokensSpec
  291. (r'tokens\b', Keyword, 'tokens'),
  292. # attrScope
  293. (r'(scope)(\s*)(' + _id + r')(\s*)(\{)',
  294. bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
  295. Punctuation), 'action'),
  296. # exception
  297. (r'(catch|finally)\b', Keyword, 'exception'),
  298. # action
  299. (r'(@' + _id + r')(\s*)(::)?(\s*)(' + _id + r')(\s*)(\{)',
  300. bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
  301. Name.Label, Whitespace, Punctuation), 'action'),
  302. # rule
  303. (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?',
  304. bygroups(Keyword, Whitespace, Name.Label, Punctuation),
  305. ('rule-alts', 'rule-prelims')),
  306. ],
  307. 'exception': [
  308. (r'\n', Whitespace, '#pop'),
  309. (r'\s', Whitespace),
  310. include('comments'),
  311. (r'\[', Punctuation, 'nested-arg-action'),
  312. (r'\{', Punctuation, 'action'),
  313. ],
  314. 'rule-prelims': [
  315. include('whitespace'),
  316. include('comments'),
  317. (r'returns\b', Keyword),
  318. (r'\[', Punctuation, 'nested-arg-action'),
  319. (r'\{', Punctuation, 'action'),
  320. # throwsSpec
  321. (r'(throws)(\s+)(' + _id + ')',
  322. bygroups(Keyword, Whitespace, Name.Label)),
  323. (r'(,)(\s*)(' + _id + ')',
  324. bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
  325. # optionsSpec
  326. (r'options\b', Keyword, 'options'),
  327. # ruleScopeSpec - scope followed by target language code or name of action
  328. # TODO finish implementing other possibilities for scope
  329. # L173 ANTLRv3.g from ANTLR book
  330. (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation),
  331. 'action'),
  332. (r'(scope)(\s+)(' + _id + r')(\s*)(;)',
  333. bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
  334. # ruleAction
  335. (r'(@' + _id + r')(\s*)(\{)',
  336. bygroups(Name.Label, Whitespace, Punctuation), 'action'),
  337. # finished prelims, go to rule alts!
  338. (r':', Punctuation, '#pop')
  339. ],
  340. 'rule-alts': [
  341. include('whitespace'),
  342. include('comments'),
  343. # These might need to go in a separate 'block' state triggered by (
  344. (r'options\b', Keyword, 'options'),
  345. (r':', Punctuation),
  346. # literals
  347. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  348. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  349. (r'<<([^>]|>[^>])>>', String),
  350. # identifiers
  351. # Tokens start with capital letter.
  352. (r'\$?[A-Z_]\w*', Name.Constant),
  353. # Rules start with small letter.
  354. (r'\$?[a-z_]\w*', Name.Variable),
  355. # operators
  356. (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
  357. (r',', Punctuation),
  358. (r'\[', Punctuation, 'nested-arg-action'),
  359. (r'\{', Punctuation, 'action'),
  360. (r';', Punctuation, '#pop')
  361. ],
  362. 'tokens': [
  363. include('whitespace'),
  364. include('comments'),
  365. (r'\{', Punctuation),
  366. (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL
  367. + r')?(\s*)(;)',
  368. bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
  369. String, Whitespace, Punctuation)),
  370. (r'\}', Punctuation, '#pop'),
  371. ],
  372. 'options': [
  373. include('whitespace'),
  374. include('comments'),
  375. (r'\{', Punctuation),
  376. (r'(' + _id + r')(\s*)(=)(\s*)(' +
  377. '|'.join((_id, _STRING_LITERAL, _INT, r'\*')) + r')(\s*)(;)',
  378. bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
  379. Text, Whitespace, Punctuation)),
  380. (r'\}', Punctuation, '#pop'),
  381. ],
  382. 'action': [
  383. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  384. r'[^${}\'"/\\]+', # exclude unsafe characters
  385. # strings and comments may safely contain unsafe characters
  386. r'"(\\\\|\\[^\\]|[^"\\])*"',
  387. r"'(\\\\|\\[^\\]|[^'\\])*'",
  388. r'//.*$\n?', # single line comment
  389. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  390. # regular expression: There's no reason for it to start
  391. # with a * and this stops confusion with comments.
  392. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
  393. # backslashes are okay, as long as we are not backslashing a %
  394. r'\\(?!%)',
  395. # Now that we've handled regex and javadoc comments
  396. # it's safe to let / through.
  397. r'/',
  398. )) + r')+', Other),
  399. (r'(\\)(%)', bygroups(Punctuation, Other)),
  400. (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
  401. bygroups(Name.Variable, Punctuation, Name.Property)),
  402. (r'\{', Punctuation, '#push'),
  403. (r'\}', Punctuation, '#pop'),
  404. ],
  405. 'nested-arg-action': [
  406. (r'(' + r'|'.join(( # keep host code in largest possible chunks.
  407. r'[^$\[\]\'"/]+', # exclude unsafe characters
  408. # strings and comments may safely contain unsafe characters
  409. r'"(\\\\|\\[^\\]|[^"\\])*"',
  410. r"'(\\\\|\\[^\\]|[^'\\])*'",
  411. r'//.*$\n?', # single line comment
  412. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  413. # regular expression: There's no reason for it to start
  414. # with a * and this stops confusion with comments.
  415. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
  416. # Now that we've handled regex and javadoc comments
  417. # it's safe to let / through.
  418. r'/',
  419. )) + r')+', Other),
  420. (r'\[', Punctuation, '#push'),
  421. (r'\]', Punctuation, '#pop'),
  422. (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
  423. bygroups(Name.Variable, Punctuation, Name.Property)),
  424. (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
  425. ]
  426. }
  427. def analyse_text(text):
  428. return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
  429. # http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
  430. class AntlrCppLexer(DelegatingLexer):
  431. """
  432. ANTLR with C++ Target
  433. """
  434. name = 'ANTLR With CPP Target'
  435. aliases = ['antlr-cpp']
  436. filenames = ['*.G', '*.g']
  437. url = 'https://www.antlr.org'
  438. version_added = '1.1'
  439. def __init__(self, **options):
  440. super().__init__(CppLexer, AntlrLexer, **options)
  441. def analyse_text(text):
  442. return AntlrLexer.analyse_text(text) and \
  443. re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
  444. class AntlrObjectiveCLexer(DelegatingLexer):
  445. """
  446. ANTLR with Objective-C Target
  447. """
  448. name = 'ANTLR With ObjectiveC Target'
  449. aliases = ['antlr-objc']
  450. filenames = ['*.G', '*.g']
  451. url = 'https://www.antlr.org'
  452. version_added = '1.1'
  453. def __init__(self, **options):
  454. super().__init__(ObjectiveCLexer, AntlrLexer, **options)
  455. def analyse_text(text):
  456. return AntlrLexer.analyse_text(text) and \
  457. re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
  458. class AntlrCSharpLexer(DelegatingLexer):
  459. """
  460. ANTLR with C# Target
  461. """
  462. name = 'ANTLR With C# Target'
  463. aliases = ['antlr-csharp', 'antlr-c#']
  464. filenames = ['*.G', '*.g']
  465. url = 'https://www.antlr.org'
  466. version_added = '1.1'
  467. def __init__(self, **options):
  468. super().__init__(CSharpLexer, AntlrLexer, **options)
  469. def analyse_text(text):
  470. return AntlrLexer.analyse_text(text) and \
  471. re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
  472. class AntlrPythonLexer(DelegatingLexer):
  473. """
  474. ANTLR with Python Target
  475. """
  476. name = 'ANTLR With Python Target'
  477. aliases = ['antlr-python']
  478. filenames = ['*.G', '*.g']
  479. url = 'https://www.antlr.org'
  480. version_added = '1.1'
  481. def __init__(self, **options):
  482. super().__init__(PythonLexer, AntlrLexer, **options)
  483. def analyse_text(text):
  484. return AntlrLexer.analyse_text(text) and \
  485. re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
  486. class AntlrJavaLexer(DelegatingLexer):
  487. """
  488. ANTLR with Java Target
  489. """
  490. name = 'ANTLR With Java Target'
  491. aliases = ['antlr-java']
  492. filenames = ['*.G', '*.g']
  493. url = 'https://www.antlr.org'
  494. version_added = '1.1'
  495. def __init__(self, **options):
  496. super().__init__(JavaLexer, AntlrLexer, **options)
  497. def analyse_text(text):
  498. # Antlr language is Java by default
  499. return AntlrLexer.analyse_text(text) and 0.9
  500. class AntlrRubyLexer(DelegatingLexer):
  501. """
  502. ANTLR with Ruby Target
  503. """
  504. name = 'ANTLR With Ruby Target'
  505. aliases = ['antlr-ruby', 'antlr-rb']
  506. filenames = ['*.G', '*.g']
  507. url = 'https://www.antlr.org'
  508. version_added = '1.1'
  509. def __init__(self, **options):
  510. super().__init__(RubyLexer, AntlrLexer, **options)
  511. def analyse_text(text):
  512. return AntlrLexer.analyse_text(text) and \
  513. re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
  514. class AntlrPerlLexer(DelegatingLexer):
  515. """
  516. ANTLR with Perl Target
  517. """
  518. name = 'ANTLR With Perl Target'
  519. aliases = ['antlr-perl']
  520. filenames = ['*.G', '*.g']
  521. url = 'https://www.antlr.org'
  522. version_added = '1.1'
  523. def __init__(self, **options):
  524. super().__init__(PerlLexer, AntlrLexer, **options)
  525. def analyse_text(text):
  526. return AntlrLexer.analyse_text(text) and \
  527. re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
  528. class AntlrActionScriptLexer(DelegatingLexer):
  529. """
  530. ANTLR with ActionScript Target
  531. """
  532. name = 'ANTLR With ActionScript Target'
  533. aliases = ['antlr-actionscript', 'antlr-as']
  534. filenames = ['*.G', '*.g']
  535. url = 'https://www.antlr.org'
  536. version_added = '1.1'
  537. def __init__(self, **options):
  538. from pygments.lexers.actionscript import ActionScriptLexer
  539. super().__init__(ActionScriptLexer, AntlrLexer, **options)
  540. def analyse_text(text):
  541. return AntlrLexer.analyse_text(text) and \
  542. re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
  543. class TreetopBaseLexer(RegexLexer):
  544. """
  545. A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
  546. Not for direct use; use :class:`TreetopLexer` instead.
  547. .. versionadded:: 1.6
  548. """
  549. tokens = {
  550. 'root': [
  551. include('space'),
  552. (r'require[ \t]+[^\n\r]+[\n\r]', Other),
  553. (r'module\b', Keyword.Namespace, 'module'),
  554. (r'grammar\b', Keyword, 'grammar'),
  555. ],
  556. 'module': [
  557. include('space'),
  558. include('end'),
  559. (r'module\b', Keyword, '#push'),
  560. (r'grammar\b', Keyword, 'grammar'),
  561. (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace),
  562. ],
  563. 'grammar': [
  564. include('space'),
  565. include('end'),
  566. (r'rule\b', Keyword, 'rule'),
  567. (r'include\b', Keyword, 'include'),
  568. (r'[A-Z]\w*', Name),
  569. ],
  570. 'include': [
  571. include('space'),
  572. (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'),
  573. ],
  574. 'rule': [
  575. include('space'),
  576. include('end'),
  577. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  578. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  579. (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
  580. (r'[A-Za-z_]\w*', Name),
  581. (r'[()]', Punctuation),
  582. (r'[?+*/&!~]', Operator),
  583. (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex),
  584. (r'([0-9]*)(\.\.)([0-9]*)',
  585. bygroups(Number.Integer, Operator, Number.Integer)),
  586. (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)),
  587. (r'\{', Punctuation, 'inline_module'),
  588. (r'\.', String.Regex),
  589. ],
  590. 'inline_module': [
  591. (r'\{', Other, 'ruby'),
  592. (r'\}', Punctuation, '#pop'),
  593. (r'[^{}]+', Other),
  594. ],
  595. 'ruby': [
  596. (r'\{', Other, '#push'),
  597. (r'\}', Other, '#pop'),
  598. (r'[^{}]+', Other),
  599. ],
  600. 'space': [
  601. (r'[ \t\n\r]+', Whitespace),
  602. (r'#[^\n]*', Comment.Single),
  603. ],
  604. 'end': [
  605. (r'end\b', Keyword, '#pop'),
  606. ],
  607. }
  608. class TreetopLexer(DelegatingLexer):
  609. """
  610. A lexer for Treetop grammars.
  611. """
  612. name = 'Treetop'
  613. aliases = ['treetop']
  614. filenames = ['*.treetop', '*.tt']
  615. url = 'https://cjheath.github.io/treetop'
  616. version_added = '1.6'
  617. def __init__(self, **options):
  618. super().__init__(RubyLexer, TreetopBaseLexer, **options)
  619. class EbnfLexer(RegexLexer):
  620. """
  621. Lexer for `ISO/IEC 14977 EBNF
  622. <https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_
  623. grammars.
  624. """
  625. name = 'EBNF'
  626. aliases = ['ebnf']
  627. filenames = ['*.ebnf']
  628. mimetypes = ['text/x-ebnf']
  629. url = 'https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form'
  630. version_added = '2.0'
  631. tokens = {
  632. 'root': [
  633. include('whitespace'),
  634. include('comment_start'),
  635. include('identifier'),
  636. (r'=', Operator, 'production'),
  637. ],
  638. 'production': [
  639. include('whitespace'),
  640. include('comment_start'),
  641. include('identifier'),
  642. (r'"[^"]*"', String.Double),
  643. (r"'[^']*'", String.Single),
  644. (r'(\?[^?]*\?)', Name.Entity),
  645. (r'[\[\]{}(),|]', Punctuation),
  646. (r'-', Operator),
  647. (r';', Punctuation, '#pop'),
  648. (r'\.', Punctuation, '#pop'),
  649. ],
  650. 'whitespace': [
  651. (r'\s+', Text),
  652. ],
  653. 'comment_start': [
  654. (r'\(\*', Comment.Multiline, 'comment'),
  655. ],
  656. 'comment': [
  657. (r'[^*)]', Comment.Multiline),
  658. include('comment_start'),
  659. (r'\*\)', Comment.Multiline, '#pop'),
  660. (r'[*)]', Comment.Multiline),
  661. ],
  662. 'identifier': [
  663. (r'([a-zA-Z][\w \-]*)', Keyword),
  664. ],
  665. }