parsers.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
  1. """
  2. pygments.lexers.parsers
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for parser generators.
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, DelegatingLexer, \
  10. include, bygroups, using
  11. from pygments.token import Punctuation, Other, Text, Comment, Operator, \
  12. Keyword, Name, String, Number, Whitespace
  13. from pygments.lexers.jvm import JavaLexer
  14. from pygments.lexers.c_cpp import CLexer, CppLexer
  15. from pygments.lexers.objective import ObjectiveCLexer
  16. from pygments.lexers.d import DLexer
  17. from pygments.lexers.dotnet import CSharpLexer
  18. from pygments.lexers.ruby import RubyLexer
  19. from pygments.lexers.python import PythonLexer
  20. from pygments.lexers.perl import PerlLexer
  21. __all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
  22. 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
  23. 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
  24. 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
  25. 'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
  26. 'AntlrJavaLexer', 'AntlrActionScriptLexer',
  27. 'TreetopLexer', 'EbnfLexer']
  28. class RagelLexer(RegexLexer):
  29. """A pure `Ragel <www.colm.net/open-source/ragel>`_ lexer. Use this
  30. for fragments of Ragel. For ``.rl`` files, use
  31. :class:`RagelEmbeddedLexer` instead (or one of the
  32. language-specific subclasses).
  33. .. versionadded:: 1.1
  34. """
  35. name = 'Ragel'
  36. url = 'http://www.colm.net/open-source/ragel/'
  37. aliases = ['ragel']
  38. filenames = []
  39. tokens = {
  40. 'whitespace': [
  41. (r'\s+', Whitespace)
  42. ],
  43. 'comments': [
  44. (r'\#.*$', Comment),
  45. ],
  46. 'keywords': [
  47. (r'(access|action|alphtype)\b', Keyword),
  48. (r'(getkey|write|machine|include)\b', Keyword),
  49. (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
  50. (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)
  51. ],
  52. 'numbers': [
  53. (r'0x[0-9A-Fa-f]+', Number.Hex),
  54. (r'[+-]?[0-9]+', Number.Integer),
  55. ],
  56. 'literals': [
  57. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  58. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  59. (r'\[(\\\\|\\[^\\]|[^\\\]])*\]', String), # square bracket literals
  60. (r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', String.Regex), # regular expressions
  61. ],
  62. 'identifiers': [
  63. (r'[a-zA-Z_]\w*', Name.Variable),
  64. ],
  65. 'operators': [
  66. (r',', Operator), # Join
  67. (r'\||&|--?', Operator), # Union, Intersection and Subtraction
  68. (r'\.|<:|:>>?', Operator), # Concatention
  69. (r':', Operator), # Label
  70. (r'->', Operator), # Epsilon Transition
  71. (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
  72. (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
  73. (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
  74. (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
  75. (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
  76. (r'>|@|\$|%', Operator), # Transition Actions and Priorities
  77. (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator), # Repetition
  78. (r'!|\^', Operator), # Negation
  79. (r'\(|\)', Operator), # Grouping
  80. ],
  81. 'root': [
  82. include('literals'),
  83. include('whitespace'),
  84. include('comments'),
  85. include('keywords'),
  86. include('numbers'),
  87. include('identifiers'),
  88. include('operators'),
  89. (r'\{', Punctuation, 'host'),
  90. (r'=', Operator),
  91. (r';', Punctuation),
  92. ],
  93. 'host': [
  94. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  95. r'[^{}\'"/#]+', # exclude unsafe characters
  96. r'[^\\]\\[{}]', # allow escaped { or }
  97. # strings and comments may safely contain unsafe characters
  98. r'"(\\\\|\\[^\\]|[^"\\])*"',
  99. r"'(\\\\|\\[^\\]|[^'\\])*'",
  100. r'//.*$\n?', # single line comment
  101. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  102. r'\#.*$\n?', # ruby comment
  103. # regular expression: There's no reason for it to start
  104. # with a * and this stops confusion with comments.
  105. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
  106. # / is safe now that we've handled regex and javadoc comments
  107. r'/',
  108. )) + r')+', Other),
  109. (r'\{', Punctuation, '#push'),
  110. (r'\}', Punctuation, '#pop'),
  111. ],
  112. }
  113. class RagelEmbeddedLexer(RegexLexer):
  114. """
  115. A lexer for Ragel embedded in a host language file.
  116. This will only highlight Ragel statements. If you want host language
  117. highlighting then call the language-specific Ragel lexer.
  118. .. versionadded:: 1.1
  119. """
  120. name = 'Embedded Ragel'
  121. aliases = ['ragel-em']
  122. filenames = ['*.rl']
  123. tokens = {
  124. 'root': [
  125. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  126. r'[^%\'"/#]+', # exclude unsafe characters
  127. r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
  128. # strings and comments may safely contain unsafe characters
  129. r'"(\\\\|\\[^\\]|[^"\\])*"',
  130. r"'(\\\\|\\[^\\]|[^'\\])*'",
  131. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  132. r'//.*$\n?', # single line comment
  133. r'\#.*$\n?', # ruby/ragel comment
  134. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # regular expression
  135. # / is safe now that we've handled regex and javadoc comments
  136. r'/',
  137. )) + r')+', Other),
  138. # Single Line FSM.
  139. # Please don't put a quoted newline in a single line FSM.
  140. # That's just mean. It will break this.
  141. (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation,
  142. using(RagelLexer),
  143. Punctuation, Text)),
  144. # Multi Line FSM.
  145. (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'),
  146. ],
  147. 'multi-line-fsm': [
  148. (r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
  149. r'(' + r'|'.join((
  150. r'[^}\'"\[/#]', # exclude unsafe characters
  151. r'\}(?=[^%]|$)', # } is okay as long as it's not followed by %
  152. r'\}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
  153. r'[^\\]\\[{}]', # ...and } is okay if it's escaped
  154. # allow / if it's preceded with one of these symbols
  155. # (ragel EOF actions)
  156. r'(>|\$|%|<|@|<>)/',
  157. # specifically allow regex followed immediately by *
  158. # so it doesn't get mistaken for a comment
  159. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/\*',
  160. # allow / as long as it's not followed by another / or by a *
  161. r'/(?=[^/*]|$)',
  162. # We want to match as many of these as we can in one block.
  163. # Not sure if we need the + sign here,
  164. # does it help performance?
  165. )) + r')+',
  166. # strings and comments may safely contain unsafe characters
  167. r'"(\\\\|\\[^\\]|[^"\\])*"',
  168. r"'(\\\\|\\[^\\]|[^'\\])*'",
  169. r"\[(\\\\|\\[^\\]|[^\]\\])*\]", # square bracket literal
  170. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  171. r'//.*$\n?', # single line comment
  172. r'\#.*$\n?', # ruby/ragel comment
  173. )) + r')+', using(RagelLexer)),
  174. (r'\}%%', Punctuation, '#pop'),
  175. ]
  176. }
  177. def analyse_text(text):
  178. return '@LANG: indep' in text
  179. class RagelRubyLexer(DelegatingLexer):
  180. """
  181. A lexer for Ragel in a Ruby host file.
  182. .. versionadded:: 1.1
  183. """
  184. name = 'Ragel in Ruby Host'
  185. aliases = ['ragel-ruby', 'ragel-rb']
  186. filenames = ['*.rl']
  187. def __init__(self, **options):
  188. super().__init__(RubyLexer, RagelEmbeddedLexer, **options)
  189. def analyse_text(text):
  190. return '@LANG: ruby' in text
  191. class RagelCLexer(DelegatingLexer):
  192. """
  193. A lexer for Ragel in a C host file.
  194. .. versionadded:: 1.1
  195. """
  196. name = 'Ragel in C Host'
  197. aliases = ['ragel-c']
  198. filenames = ['*.rl']
  199. def __init__(self, **options):
  200. super().__init__(CLexer, RagelEmbeddedLexer, **options)
  201. def analyse_text(text):
  202. return '@LANG: c' in text
  203. class RagelDLexer(DelegatingLexer):
  204. """
  205. A lexer for Ragel in a D host file.
  206. .. versionadded:: 1.1
  207. """
  208. name = 'Ragel in D Host'
  209. aliases = ['ragel-d']
  210. filenames = ['*.rl']
  211. def __init__(self, **options):
  212. super().__init__(DLexer, RagelEmbeddedLexer, **options)
  213. def analyse_text(text):
  214. return '@LANG: d' in text
  215. class RagelCppLexer(DelegatingLexer):
  216. """
  217. A lexer for Ragel in a C++ host file.
  218. .. versionadded:: 1.1
  219. """
  220. name = 'Ragel in CPP Host'
  221. aliases = ['ragel-cpp']
  222. filenames = ['*.rl']
  223. def __init__(self, **options):
  224. super().__init__(CppLexer, RagelEmbeddedLexer, **options)
  225. def analyse_text(text):
  226. return '@LANG: c++' in text
  227. class RagelObjectiveCLexer(DelegatingLexer):
  228. """
  229. A lexer for Ragel in an Objective C host file.
  230. .. versionadded:: 1.1
  231. """
  232. name = 'Ragel in Objective C Host'
  233. aliases = ['ragel-objc']
  234. filenames = ['*.rl']
  235. def __init__(self, **options):
  236. super().__init__(ObjectiveCLexer, RagelEmbeddedLexer, **options)
  237. def analyse_text(text):
  238. return '@LANG: objc' in text
  239. class RagelJavaLexer(DelegatingLexer):
  240. """
  241. A lexer for Ragel in a Java host file.
  242. .. versionadded:: 1.1
  243. """
  244. name = 'Ragel in Java Host'
  245. aliases = ['ragel-java']
  246. filenames = ['*.rl']
  247. def __init__(self, **options):
  248. super().__init__(JavaLexer, RagelEmbeddedLexer, **options)
  249. def analyse_text(text):
  250. return '@LANG: java' in text
  251. class AntlrLexer(RegexLexer):
  252. """
  253. Generic `ANTLR`_ Lexer.
  254. Should not be called directly, instead
  255. use DelegatingLexer for your target language.
  256. .. versionadded:: 1.1
  257. .. _ANTLR: http://www.antlr.org/
  258. """
  259. name = 'ANTLR'
  260. aliases = ['antlr']
  261. filenames = []
  262. _id = r'[A-Za-z]\w*'
  263. _TOKEN_REF = r'[A-Z]\w*'
  264. _RULE_REF = r'[a-z]\w*'
  265. _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
  266. _INT = r'[0-9]+'
  267. tokens = {
  268. 'whitespace': [
  269. (r'\s+', Whitespace),
  270. ],
  271. 'comments': [
  272. (r'//.*$', Comment),
  273. (r'/\*(.|\n)*?\*/', Comment),
  274. ],
  275. 'root': [
  276. include('whitespace'),
  277. include('comments'),
  278. (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
  279. bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
  280. Punctuation)),
  281. # optionsSpec
  282. (r'options\b', Keyword, 'options'),
  283. # tokensSpec
  284. (r'tokens\b', Keyword, 'tokens'),
  285. # attrScope
  286. (r'(scope)(\s*)(' + _id + r')(\s*)(\{)',
  287. bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
  288. Punctuation), 'action'),
  289. # exception
  290. (r'(catch|finally)\b', Keyword, 'exception'),
  291. # action
  292. (r'(@' + _id + r')(\s*)(::)?(\s*)(' + _id + r')(\s*)(\{)',
  293. bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
  294. Name.Label, Whitespace, Punctuation), 'action'),
  295. # rule
  296. (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?',
  297. bygroups(Keyword, Whitespace, Name.Label, Punctuation),
  298. ('rule-alts', 'rule-prelims')),
  299. ],
  300. 'exception': [
  301. (r'\n', Whitespace, '#pop'),
  302. (r'\s', Whitespace),
  303. include('comments'),
  304. (r'\[', Punctuation, 'nested-arg-action'),
  305. (r'\{', Punctuation, 'action'),
  306. ],
  307. 'rule-prelims': [
  308. include('whitespace'),
  309. include('comments'),
  310. (r'returns\b', Keyword),
  311. (r'\[', Punctuation, 'nested-arg-action'),
  312. (r'\{', Punctuation, 'action'),
  313. # throwsSpec
  314. (r'(throws)(\s+)(' + _id + ')',
  315. bygroups(Keyword, Whitespace, Name.Label)),
  316. (r'(,)(\s*)(' + _id + ')',
  317. bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
  318. # optionsSpec
  319. (r'options\b', Keyword, 'options'),
  320. # ruleScopeSpec - scope followed by target language code or name of action
  321. # TODO finish implementing other possibilities for scope
  322. # L173 ANTLRv3.g from ANTLR book
  323. (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation),
  324. 'action'),
  325. (r'(scope)(\s+)(' + _id + r')(\s*)(;)',
  326. bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
  327. # ruleAction
  328. (r'(@' + _id + r')(\s*)(\{)',
  329. bygroups(Name.Label, Whitespace, Punctuation), 'action'),
  330. # finished prelims, go to rule alts!
  331. (r':', Punctuation, '#pop')
  332. ],
  333. 'rule-alts': [
  334. include('whitespace'),
  335. include('comments'),
  336. # These might need to go in a separate 'block' state triggered by (
  337. (r'options\b', Keyword, 'options'),
  338. (r':', Punctuation),
  339. # literals
  340. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  341. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  342. (r'<<([^>]|>[^>])>>', String),
  343. # identifiers
  344. # Tokens start with capital letter.
  345. (r'\$?[A-Z_]\w*', Name.Constant),
  346. # Rules start with small letter.
  347. (r'\$?[a-z_]\w*', Name.Variable),
  348. # operators
  349. (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
  350. (r',', Punctuation),
  351. (r'\[', Punctuation, 'nested-arg-action'),
  352. (r'\{', Punctuation, 'action'),
  353. (r';', Punctuation, '#pop')
  354. ],
  355. 'tokens': [
  356. include('whitespace'),
  357. include('comments'),
  358. (r'\{', Punctuation),
  359. (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL
  360. + r')?(\s*)(;)',
  361. bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
  362. String, Whitespace, Punctuation)),
  363. (r'\}', Punctuation, '#pop'),
  364. ],
  365. 'options': [
  366. include('whitespace'),
  367. include('comments'),
  368. (r'\{', Punctuation),
  369. (r'(' + _id + r')(\s*)(=)(\s*)(' +
  370. '|'.join((_id, _STRING_LITERAL, _INT, r'\*')) + r')(\s*)(;)',
  371. bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
  372. Text, Whitespace, Punctuation)),
  373. (r'\}', Punctuation, '#pop'),
  374. ],
  375. 'action': [
  376. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  377. r'[^${}\'"/\\]+', # exclude unsafe characters
  378. # strings and comments may safely contain unsafe characters
  379. r'"(\\\\|\\[^\\]|[^"\\])*"',
  380. r"'(\\\\|\\[^\\]|[^'\\])*'",
  381. r'//.*$\n?', # single line comment
  382. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  383. # regular expression: There's no reason for it to start
  384. # with a * and this stops confusion with comments.
  385. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
  386. # backslashes are okay, as long as we are not backslashing a %
  387. r'\\(?!%)',
  388. # Now that we've handled regex and javadoc comments
  389. # it's safe to let / through.
  390. r'/',
  391. )) + r')+', Other),
  392. (r'(\\)(%)', bygroups(Punctuation, Other)),
  393. (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
  394. bygroups(Name.Variable, Punctuation, Name.Property)),
  395. (r'\{', Punctuation, '#push'),
  396. (r'\}', Punctuation, '#pop'),
  397. ],
  398. 'nested-arg-action': [
  399. (r'(' + r'|'.join(( # keep host code in largest possible chunks.
  400. r'[^$\[\]\'"/]+', # exclude unsafe characters
  401. # strings and comments may safely contain unsafe characters
  402. r'"(\\\\|\\[^\\]|[^"\\])*"',
  403. r"'(\\\\|\\[^\\]|[^'\\])*'",
  404. r'//.*$\n?', # single line comment
  405. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  406. # regular expression: There's no reason for it to start
  407. # with a * and this stops confusion with comments.
  408. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
  409. # Now that we've handled regex and javadoc comments
  410. # it's safe to let / through.
  411. r'/',
  412. )) + r')+', Other),
  413. (r'\[', Punctuation, '#push'),
  414. (r'\]', Punctuation, '#pop'),
  415. (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
  416. bygroups(Name.Variable, Punctuation, Name.Property)),
  417. (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
  418. ]
  419. }
  420. def analyse_text(text):
  421. return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
  422. # http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
  423. class AntlrCppLexer(DelegatingLexer):
  424. """
  425. ANTLR with C++ Target
  426. .. versionadded:: 1.1
  427. """
  428. name = 'ANTLR With CPP Target'
  429. aliases = ['antlr-cpp']
  430. filenames = ['*.G', '*.g']
  431. def __init__(self, **options):
  432. super().__init__(CppLexer, AntlrLexer, **options)
  433. def analyse_text(text):
  434. return AntlrLexer.analyse_text(text) and \
  435. re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
  436. class AntlrObjectiveCLexer(DelegatingLexer):
  437. """
  438. ANTLR with Objective-C Target
  439. .. versionadded:: 1.1
  440. """
  441. name = 'ANTLR With ObjectiveC Target'
  442. aliases = ['antlr-objc']
  443. filenames = ['*.G', '*.g']
  444. def __init__(self, **options):
  445. super().__init__(ObjectiveCLexer, AntlrLexer, **options)
  446. def analyse_text(text):
  447. return AntlrLexer.analyse_text(text) and \
  448. re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
  449. class AntlrCSharpLexer(DelegatingLexer):
  450. """
  451. ANTLR with C# Target
  452. .. versionadded:: 1.1
  453. """
  454. name = 'ANTLR With C# Target'
  455. aliases = ['antlr-csharp', 'antlr-c#']
  456. filenames = ['*.G', '*.g']
  457. def __init__(self, **options):
  458. super().__init__(CSharpLexer, AntlrLexer, **options)
  459. def analyse_text(text):
  460. return AntlrLexer.analyse_text(text) and \
  461. re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
  462. class AntlrPythonLexer(DelegatingLexer):
  463. """
  464. ANTLR with Python Target
  465. .. versionadded:: 1.1
  466. """
  467. name = 'ANTLR With Python Target'
  468. aliases = ['antlr-python']
  469. filenames = ['*.G', '*.g']
  470. def __init__(self, **options):
  471. super().__init__(PythonLexer, AntlrLexer, **options)
  472. def analyse_text(text):
  473. return AntlrLexer.analyse_text(text) and \
  474. re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
  475. class AntlrJavaLexer(DelegatingLexer):
  476. """
  477. ANTLR with Java Target
  478. .. versionadded:: 1.
  479. """
  480. name = 'ANTLR With Java Target'
  481. aliases = ['antlr-java']
  482. filenames = ['*.G', '*.g']
  483. def __init__(self, **options):
  484. super().__init__(JavaLexer, AntlrLexer, **options)
  485. def analyse_text(text):
  486. # Antlr language is Java by default
  487. return AntlrLexer.analyse_text(text) and 0.9
  488. class AntlrRubyLexer(DelegatingLexer):
  489. """
  490. ANTLR with Ruby Target
  491. .. versionadded:: 1.1
  492. """
  493. name = 'ANTLR With Ruby Target'
  494. aliases = ['antlr-ruby', 'antlr-rb']
  495. filenames = ['*.G', '*.g']
  496. def __init__(self, **options):
  497. super().__init__(RubyLexer, AntlrLexer, **options)
  498. def analyse_text(text):
  499. return AntlrLexer.analyse_text(text) and \
  500. re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
  501. class AntlrPerlLexer(DelegatingLexer):
  502. """
  503. ANTLR with Perl Target
  504. .. versionadded:: 1.1
  505. """
  506. name = 'ANTLR With Perl Target'
  507. aliases = ['antlr-perl']
  508. filenames = ['*.G', '*.g']
  509. def __init__(self, **options):
  510. super().__init__(PerlLexer, AntlrLexer, **options)
  511. def analyse_text(text):
  512. return AntlrLexer.analyse_text(text) and \
  513. re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
  514. class AntlrActionScriptLexer(DelegatingLexer):
  515. """
  516. ANTLR with ActionScript Target
  517. .. versionadded:: 1.1
  518. """
  519. name = 'ANTLR With ActionScript Target'
  520. aliases = ['antlr-actionscript', 'antlr-as']
  521. filenames = ['*.G', '*.g']
  522. def __init__(self, **options):
  523. from pygments.lexers.actionscript import ActionScriptLexer
  524. super().__init__(ActionScriptLexer, AntlrLexer, **options)
  525. def analyse_text(text):
  526. return AntlrLexer.analyse_text(text) and \
  527. re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
  528. class TreetopBaseLexer(RegexLexer):
  529. """
  530. A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
  531. Not for direct use; use :class:`TreetopLexer` instead.
  532. .. versionadded:: 1.6
  533. """
  534. tokens = {
  535. 'root': [
  536. include('space'),
  537. (r'require[ \t]+[^\n\r]+[\n\r]', Other),
  538. (r'module\b', Keyword.Namespace, 'module'),
  539. (r'grammar\b', Keyword, 'grammar'),
  540. ],
  541. 'module': [
  542. include('space'),
  543. include('end'),
  544. (r'module\b', Keyword, '#push'),
  545. (r'grammar\b', Keyword, 'grammar'),
  546. (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace),
  547. ],
  548. 'grammar': [
  549. include('space'),
  550. include('end'),
  551. (r'rule\b', Keyword, 'rule'),
  552. (r'include\b', Keyword, 'include'),
  553. (r'[A-Z]\w*', Name),
  554. ],
  555. 'include': [
  556. include('space'),
  557. (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'),
  558. ],
  559. 'rule': [
  560. include('space'),
  561. include('end'),
  562. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  563. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  564. (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
  565. (r'[A-Za-z_]\w*', Name),
  566. (r'[()]', Punctuation),
  567. (r'[?+*/&!~]', Operator),
  568. (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex),
  569. (r'([0-9]*)(\.\.)([0-9]*)',
  570. bygroups(Number.Integer, Operator, Number.Integer)),
  571. (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)),
  572. (r'\{', Punctuation, 'inline_module'),
  573. (r'\.', String.Regex),
  574. ],
  575. 'inline_module': [
  576. (r'\{', Other, 'ruby'),
  577. (r'\}', Punctuation, '#pop'),
  578. (r'[^{}]+', Other),
  579. ],
  580. 'ruby': [
  581. (r'\{', Other, '#push'),
  582. (r'\}', Other, '#pop'),
  583. (r'[^{}]+', Other),
  584. ],
  585. 'space': [
  586. (r'[ \t\n\r]+', Whitespace),
  587. (r'#[^\n]*', Comment.Single),
  588. ],
  589. 'end': [
  590. (r'end\b', Keyword, '#pop'),
  591. ],
  592. }
  593. class TreetopLexer(DelegatingLexer):
  594. """
  595. A lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
  596. .. versionadded:: 1.6
  597. """
  598. name = 'Treetop'
  599. aliases = ['treetop']
  600. filenames = ['*.treetop', '*.tt']
  601. def __init__(self, **options):
  602. super().__init__(RubyLexer, TreetopBaseLexer, **options)
  603. class EbnfLexer(RegexLexer):
  604. """
  605. Lexer for `ISO/IEC 14977 EBNF
  606. <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_
  607. grammars.
  608. .. versionadded:: 2.0
  609. """
  610. name = 'EBNF'
  611. aliases = ['ebnf']
  612. filenames = ['*.ebnf']
  613. mimetypes = ['text/x-ebnf']
  614. tokens = {
  615. 'root': [
  616. include('whitespace'),
  617. include('comment_start'),
  618. include('identifier'),
  619. (r'=', Operator, 'production'),
  620. ],
  621. 'production': [
  622. include('whitespace'),
  623. include('comment_start'),
  624. include('identifier'),
  625. (r'"[^"]*"', String.Double),
  626. (r"'[^']*'", String.Single),
  627. (r'(\?[^?]*\?)', Name.Entity),
  628. (r'[\[\]{}(),|]', Punctuation),
  629. (r'-', Operator),
  630. (r';', Punctuation, '#pop'),
  631. (r'\.', Punctuation, '#pop'),
  632. ],
  633. 'whitespace': [
  634. (r'\s+', Text),
  635. ],
  636. 'comment_start': [
  637. (r'\(\*', Comment.Multiline, 'comment'),
  638. ],
  639. 'comment': [
  640. (r'[^*)]', Comment.Multiline),
  641. include('comment_start'),
  642. (r'\*\)', Comment.Multiline, '#pop'),
  643. (r'[*)]', Comment.Multiline),
  644. ],
  645. 'identifier': [
  646. (r'([a-zA-Z][\w \-]*)', Keyword),
  647. ],
  648. }