parsers.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.parsers
  4. ~~~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for parser generators.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import RegexLexer, DelegatingLexer, \
  11. include, bygroups, using
  12. from pygments.token import Punctuation, Other, Text, Comment, Operator, \
  13. Keyword, Name, String, Number, Whitespace
  14. from pygments.lexers.jvm import JavaLexer
  15. from pygments.lexers.c_cpp import CLexer, CppLexer
  16. from pygments.lexers.objective import ObjectiveCLexer
  17. from pygments.lexers.d import DLexer
  18. from pygments.lexers.dotnet import CSharpLexer
  19. from pygments.lexers.ruby import RubyLexer
  20. from pygments.lexers.python import PythonLexer
  21. from pygments.lexers.perl import PerlLexer
  22. __all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
  23. 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
  24. 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
  25. 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
  26. # 'AntlrCLexer',
  27. 'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
  28. 'AntlrJavaLexer', 'AntlrActionScriptLexer',
  29. 'TreetopLexer', 'EbnfLexer']
  30. class RagelLexer(RegexLexer):
  31. """
  32. A pure `Ragel <http://www.complang.org/ragel/>`_ lexer. Use this for
  33. fragments of Ragel. For ``.rl`` files, use RagelEmbeddedLexer instead
  34. (or one of the language-specific subclasses).
  35. .. versionadded:: 1.1
  36. """
  37. name = 'Ragel'
  38. aliases = ['ragel']
  39. filenames = []
  40. tokens = {
  41. 'whitespace': [
  42. (r'\s+', Whitespace)
  43. ],
  44. 'comments': [
  45. (r'\#.*$', Comment),
  46. ],
  47. 'keywords': [
  48. (r'(access|action|alphtype)\b', Keyword),
  49. (r'(getkey|write|machine|include)\b', Keyword),
  50. (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
  51. (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)
  52. ],
  53. 'numbers': [
  54. (r'0x[0-9A-Fa-f]+', Number.Hex),
  55. (r'[+-]?[0-9]+', Number.Integer),
  56. ],
  57. 'literals': [
  58. (r'"(\\\\|\\"|[^"])*"', String), # double quote string
  59. (r"'(\\\\|\\'|[^'])*'", String), # single quote string
  60. (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals
  61. (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions
  62. ],
  63. 'identifiers': [
  64. (r'[a-zA-Z_]\w*', Name.Variable),
  65. ],
  66. 'operators': [
  67. (r',', Operator), # Join
  68. (r'\||&|--?', Operator), # Union, Intersection and Subtraction
  69. (r'\.|<:|:>>?', Operator), # Concatention
  70. (r':', Operator), # Label
  71. (r'->', Operator), # Epsilon Transition
  72. (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
  73. (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
  74. (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
  75. (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
  76. (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
  77. (r'>|@|\$|%', Operator), # Transition Actions and Priorities
  78. (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator), # Repetition
  79. (r'!|\^', Operator), # Negation
  80. (r'\(|\)', Operator), # Grouping
  81. ],
  82. 'root': [
  83. include('literals'),
  84. include('whitespace'),
  85. include('comments'),
  86. include('keywords'),
  87. include('numbers'),
  88. include('identifiers'),
  89. include('operators'),
  90. (r'\{', Punctuation, 'host'),
  91. (r'=', Operator),
  92. (r';', Punctuation),
  93. ],
  94. 'host': [
  95. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  96. r'[^{}\'"/#]+', # exclude unsafe characters
  97. r'[^\\]\\[{}]', # allow escaped { or }
  98. # strings and comments may safely contain unsafe characters
  99. r'"(\\\\|\\"|[^"])*"', # double quote string
  100. r"'(\\\\|\\'|[^'])*'", # single quote string
  101. r'//.*$\n?', # single line comment
  102. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  103. r'\#.*$\n?', # ruby comment
  104. # regular expression: There's no reason for it to start
  105. # with a * and this stops confusion with comments.
  106. r'/(?!\*)(\\\\|\\/|[^/])*/',
  107. # / is safe now that we've handled regex and javadoc comments
  108. r'/',
  109. )) + r')+', Other),
  110. (r'\{', Punctuation, '#push'),
  111. (r'\}', Punctuation, '#pop'),
  112. ],
  113. }
  114. class RagelEmbeddedLexer(RegexLexer):
  115. """
  116. A lexer for `Ragel`_ embedded in a host language file.
  117. This will only highlight Ragel statements. If you want host language
  118. highlighting then call the language-specific Ragel lexer.
  119. .. versionadded:: 1.1
  120. """
  121. name = 'Embedded Ragel'
  122. aliases = ['ragel-em']
  123. filenames = ['*.rl']
  124. tokens = {
  125. 'root': [
  126. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  127. r'[^%\'"/#]+', # exclude unsafe characters
  128. r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
  129. # strings and comments may safely contain unsafe characters
  130. r'"(\\\\|\\"|[^"])*"', # double quote string
  131. r"'(\\\\|\\'|[^'])*'", # single quote string
  132. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  133. r'//.*$\n?', # single line comment
  134. r'\#.*$\n?', # ruby/ragel comment
  135. r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression
  136. # / is safe now that we've handled regex and javadoc comments
  137. r'/',
  138. )) + r')+', Other),
  139. # Single Line FSM.
  140. # Please don't put a quoted newline in a single line FSM.
  141. # That's just mean. It will break this.
  142. (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation,
  143. using(RagelLexer),
  144. Punctuation, Text)),
  145. # Multi Line FSM.
  146. (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'),
  147. ],
  148. 'multi-line-fsm': [
  149. (r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
  150. r'(' + r'|'.join((
  151. r'[^}\'"\[/#]', # exclude unsafe characters
  152. r'\}(?=[^%]|$)', # } is okay as long as it's not followed by %
  153. r'\}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
  154. r'[^\\]\\[{}]', # ...and } is okay if it's escaped
  155. # allow / if it's preceded with one of these symbols
  156. # (ragel EOF actions)
  157. r'(>|\$|%|<|@|<>)/',
  158. # specifically allow regex followed immediately by *
  159. # so it doesn't get mistaken for a comment
  160. r'/(?!\*)(\\\\|\\/|[^/])*/\*',
  161. # allow / as long as it's not followed by another / or by a *
  162. r'/(?=[^/*]|$)',
  163. # We want to match as many of these as we can in one block.
  164. # Not sure if we need the + sign here,
  165. # does it help performance?
  166. )) + r')+',
  167. # strings and comments may safely contain unsafe characters
  168. r'"(\\\\|\\"|[^"])*"', # double quote string
  169. r"'(\\\\|\\'|[^'])*'", # single quote string
  170. r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal
  171. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  172. r'//.*$\n?', # single line comment
  173. r'\#.*$\n?', # ruby/ragel comment
  174. )) + r')+', using(RagelLexer)),
  175. (r'\}%%', Punctuation, '#pop'),
  176. ]
  177. }
  178. def analyse_text(text):
  179. return '@LANG: indep' in text
  180. class RagelRubyLexer(DelegatingLexer):
  181. """
  182. A lexer for `Ragel`_ in a Ruby host file.
  183. .. versionadded:: 1.1
  184. """
  185. name = 'Ragel in Ruby Host'
  186. aliases = ['ragel-ruby', 'ragel-rb']
  187. filenames = ['*.rl']
  188. def __init__(self, **options):
  189. super(RagelRubyLexer, self).__init__(RubyLexer, RagelEmbeddedLexer,
  190. **options)
  191. def analyse_text(text):
  192. return '@LANG: ruby' in text
  193. class RagelCLexer(DelegatingLexer):
  194. """
  195. A lexer for `Ragel`_ in a C host file.
  196. .. versionadded:: 1.1
  197. """
  198. name = 'Ragel in C Host'
  199. aliases = ['ragel-c']
  200. filenames = ['*.rl']
  201. def __init__(self, **options):
  202. super(RagelCLexer, self).__init__(CLexer, RagelEmbeddedLexer,
  203. **options)
  204. def analyse_text(text):
  205. return '@LANG: c' in text
  206. class RagelDLexer(DelegatingLexer):
  207. """
  208. A lexer for `Ragel`_ in a D host file.
  209. .. versionadded:: 1.1
  210. """
  211. name = 'Ragel in D Host'
  212. aliases = ['ragel-d']
  213. filenames = ['*.rl']
  214. def __init__(self, **options):
  215. super(RagelDLexer, self).__init__(DLexer, RagelEmbeddedLexer, **options)
  216. def analyse_text(text):
  217. return '@LANG: d' in text
  218. class RagelCppLexer(DelegatingLexer):
  219. """
  220. A lexer for `Ragel`_ in a CPP host file.
  221. .. versionadded:: 1.1
  222. """
  223. name = 'Ragel in CPP Host'
  224. aliases = ['ragel-cpp']
  225. filenames = ['*.rl']
  226. def __init__(self, **options):
  227. super(RagelCppLexer, self).__init__(CppLexer, RagelEmbeddedLexer, **options)
  228. def analyse_text(text):
  229. return '@LANG: c++' in text
  230. class RagelObjectiveCLexer(DelegatingLexer):
  231. """
  232. A lexer for `Ragel`_ in an Objective C host file.
  233. .. versionadded:: 1.1
  234. """
  235. name = 'Ragel in Objective C Host'
  236. aliases = ['ragel-objc']
  237. filenames = ['*.rl']
  238. def __init__(self, **options):
  239. super(RagelObjectiveCLexer, self).__init__(ObjectiveCLexer,
  240. RagelEmbeddedLexer,
  241. **options)
  242. def analyse_text(text):
  243. return '@LANG: objc' in text
  244. class RagelJavaLexer(DelegatingLexer):
  245. """
  246. A lexer for `Ragel`_ in a Java host file.
  247. .. versionadded:: 1.1
  248. """
  249. name = 'Ragel in Java Host'
  250. aliases = ['ragel-java']
  251. filenames = ['*.rl']
  252. def __init__(self, **options):
  253. super(RagelJavaLexer, self).__init__(JavaLexer, RagelEmbeddedLexer,
  254. **options)
  255. def analyse_text(text):
  256. return '@LANG: java' in text
  257. class AntlrLexer(RegexLexer):
  258. """
  259. Generic `ANTLR`_ Lexer.
  260. Should not be called directly, instead
  261. use DelegatingLexer for your target language.
  262. .. versionadded:: 1.1
  263. .. _ANTLR: http://www.antlr.org/
  264. """
  265. name = 'ANTLR'
  266. aliases = ['antlr']
  267. filenames = []
  268. _id = r'[A-Za-z]\w*'
  269. _TOKEN_REF = r'[A-Z]\w*'
  270. _RULE_REF = r'[a-z]\w*'
  271. _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
  272. _INT = r'[0-9]+'
  273. tokens = {
  274. 'whitespace': [
  275. (r'\s+', Whitespace),
  276. ],
  277. 'comments': [
  278. (r'//.*$', Comment),
  279. (r'/\*(.|\n)*?\*/', Comment),
  280. ],
  281. 'root': [
  282. include('whitespace'),
  283. include('comments'),
  284. (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
  285. bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
  286. Punctuation)),
  287. # optionsSpec
  288. (r'options\b', Keyword, 'options'),
  289. # tokensSpec
  290. (r'tokens\b', Keyword, 'tokens'),
  291. # attrScope
  292. (r'(scope)(\s*)(' + _id + r')(\s*)(\{)',
  293. bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
  294. Punctuation), 'action'),
  295. # exception
  296. (r'(catch|finally)\b', Keyword, 'exception'),
  297. # action
  298. (r'(@' + _id + r')(\s*)(::)?(\s*)(' + _id + r')(\s*)(\{)',
  299. bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
  300. Name.Label, Whitespace, Punctuation), 'action'),
  301. # rule
  302. (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?',
  303. bygroups(Keyword, Whitespace, Name.Label, Punctuation),
  304. ('rule-alts', 'rule-prelims')),
  305. ],
  306. 'exception': [
  307. (r'\n', Whitespace, '#pop'),
  308. (r'\s', Whitespace),
  309. include('comments'),
  310. (r'\[', Punctuation, 'nested-arg-action'),
  311. (r'\{', Punctuation, 'action'),
  312. ],
  313. 'rule-prelims': [
  314. include('whitespace'),
  315. include('comments'),
  316. (r'returns\b', Keyword),
  317. (r'\[', Punctuation, 'nested-arg-action'),
  318. (r'\{', Punctuation, 'action'),
  319. # throwsSpec
  320. (r'(throws)(\s+)(' + _id + ')',
  321. bygroups(Keyword, Whitespace, Name.Label)),
  322. (r'(,)(\s*)(' + _id + ')',
  323. bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
  324. # optionsSpec
  325. (r'options\b', Keyword, 'options'),
  326. # ruleScopeSpec - scope followed by target language code or name of action
  327. # TODO finish implementing other possibilities for scope
  328. # L173 ANTLRv3.g from ANTLR book
  329. (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation),
  330. 'action'),
  331. (r'(scope)(\s+)(' + _id + r')(\s*)(;)',
  332. bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
  333. # ruleAction
  334. (r'(@' + _id + r')(\s*)(\{)',
  335. bygroups(Name.Label, Whitespace, Punctuation), 'action'),
  336. # finished prelims, go to rule alts!
  337. (r':', Punctuation, '#pop')
  338. ],
  339. 'rule-alts': [
  340. include('whitespace'),
  341. include('comments'),
  342. # These might need to go in a separate 'block' state triggered by (
  343. (r'options\b', Keyword, 'options'),
  344. (r':', Punctuation),
  345. # literals
  346. (r"'(\\\\|\\'|[^'])*'", String),
  347. (r'"(\\\\|\\"|[^"])*"', String),
  348. (r'<<([^>]|>[^>])>>', String),
  349. # identifiers
  350. # Tokens start with capital letter.
  351. (r'\$?[A-Z_]\w*', Name.Constant),
  352. # Rules start with small letter.
  353. (r'\$?[a-z_]\w*', Name.Variable),
  354. # operators
  355. (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
  356. (r',', Punctuation),
  357. (r'\[', Punctuation, 'nested-arg-action'),
  358. (r'\{', Punctuation, 'action'),
  359. (r';', Punctuation, '#pop')
  360. ],
  361. 'tokens': [
  362. include('whitespace'),
  363. include('comments'),
  364. (r'\{', Punctuation),
  365. (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL
  366. + r')?(\s*)(;)',
  367. bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
  368. String, Whitespace, Punctuation)),
  369. (r'\}', Punctuation, '#pop'),
  370. ],
  371. 'options': [
  372. include('whitespace'),
  373. include('comments'),
  374. (r'\{', Punctuation),
  375. (r'(' + _id + r')(\s*)(=)(\s*)(' +
  376. '|'.join((_id, _STRING_LITERAL, _INT, r'\*')) + r')(\s*)(;)',
  377. bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
  378. Text, Whitespace, Punctuation)),
  379. (r'\}', Punctuation, '#pop'),
  380. ],
  381. 'action': [
  382. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  383. r'[^${}\'"/\\]+', # exclude unsafe characters
  384. # strings and comments may safely contain unsafe characters
  385. r'"(\\\\|\\"|[^"])*"', # double quote string
  386. r"'(\\\\|\\'|[^'])*'", # single quote string
  387. r'//.*$\n?', # single line comment
  388. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  389. # regular expression: There's no reason for it to start
  390. # with a * and this stops confusion with comments.
  391. r'/(?!\*)(\\\\|\\/|[^/])*/',
  392. # backslashes are okay, as long as we are not backslashing a %
  393. r'\\(?!%)',
  394. # Now that we've handled regex and javadoc comments
  395. # it's safe to let / through.
  396. r'/',
  397. )) + r')+', Other),
  398. (r'(\\)(%)', bygroups(Punctuation, Other)),
  399. (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
  400. bygroups(Name.Variable, Punctuation, Name.Property)),
  401. (r'\{', Punctuation, '#push'),
  402. (r'\}', Punctuation, '#pop'),
  403. ],
  404. 'nested-arg-action': [
  405. (r'(' + r'|'.join(( # keep host code in largest possible chunks.
  406. r'[^$\[\]\'"/]+', # exclude unsafe characters
  407. # strings and comments may safely contain unsafe characters
  408. r'"(\\\\|\\"|[^"])*"', # double quote string
  409. r"'(\\\\|\\'|[^'])*'", # single quote string
  410. r'//.*$\n?', # single line comment
  411. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  412. # regular expression: There's no reason for it to start
  413. # with a * and this stops confusion with comments.
  414. r'/(?!\*)(\\\\|\\/|[^/])*/',
  415. # Now that we've handled regex and javadoc comments
  416. # it's safe to let / through.
  417. r'/',
  418. )) + r')+', Other),
  419. (r'\[', Punctuation, '#push'),
  420. (r'\]', Punctuation, '#pop'),
  421. (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
  422. bygroups(Name.Variable, Punctuation, Name.Property)),
  423. (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
  424. ]
  425. }
  426. def analyse_text(text):
  427. return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
  428. # http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
  429. # TH: I'm not aware of any language features of C++ that will cause
  430. # incorrect lexing of C files. Antlr doesn't appear to make a distinction,
  431. # so just assume they're C++. No idea how to make Objective C work in the
  432. # future.
  433. # class AntlrCLexer(DelegatingLexer):
  434. # """
  435. # ANTLR with C Target
  436. #
  437. # .. versionadded:: 1.1
  438. # """
  439. #
  440. # name = 'ANTLR With C Target'
  441. # aliases = ['antlr-c']
  442. # filenames = ['*.G', '*.g']
  443. #
  444. # def __init__(self, **options):
  445. # super(AntlrCLexer, self).__init__(CLexer, AntlrLexer, **options)
  446. #
  447. # def analyse_text(text):
  448. # return re.match(r'^\s*language\s*=\s*C\s*;', text)
  449. class AntlrCppLexer(DelegatingLexer):
  450. """
  451. `ANTLR`_ with CPP Target
  452. .. versionadded:: 1.1
  453. """
  454. name = 'ANTLR With CPP Target'
  455. aliases = ['antlr-cpp']
  456. filenames = ['*.G', '*.g']
  457. def __init__(self, **options):
  458. super(AntlrCppLexer, self).__init__(CppLexer, AntlrLexer, **options)
  459. def analyse_text(text):
  460. return AntlrLexer.analyse_text(text) and \
  461. re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
  462. class AntlrObjectiveCLexer(DelegatingLexer):
  463. """
  464. `ANTLR`_ with Objective-C Target
  465. .. versionadded:: 1.1
  466. """
  467. name = 'ANTLR With ObjectiveC Target'
  468. aliases = ['antlr-objc']
  469. filenames = ['*.G', '*.g']
  470. def __init__(self, **options):
  471. super(AntlrObjectiveCLexer, self).__init__(ObjectiveCLexer,
  472. AntlrLexer, **options)
  473. def analyse_text(text):
  474. return AntlrLexer.analyse_text(text) and \
  475. re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
  476. class AntlrCSharpLexer(DelegatingLexer):
  477. """
  478. `ANTLR`_ with C# Target
  479. .. versionadded:: 1.1
  480. """
  481. name = 'ANTLR With C# Target'
  482. aliases = ['antlr-csharp', 'antlr-c#']
  483. filenames = ['*.G', '*.g']
  484. def __init__(self, **options):
  485. super(AntlrCSharpLexer, self).__init__(CSharpLexer, AntlrLexer,
  486. **options)
  487. def analyse_text(text):
  488. return AntlrLexer.analyse_text(text) and \
  489. re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
  490. class AntlrPythonLexer(DelegatingLexer):
  491. """
  492. `ANTLR`_ with Python Target
  493. .. versionadded:: 1.1
  494. """
  495. name = 'ANTLR With Python Target'
  496. aliases = ['antlr-python']
  497. filenames = ['*.G', '*.g']
  498. def __init__(self, **options):
  499. super(AntlrPythonLexer, self).__init__(PythonLexer, AntlrLexer,
  500. **options)
  501. def analyse_text(text):
  502. return AntlrLexer.analyse_text(text) and \
  503. re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
  504. class AntlrJavaLexer(DelegatingLexer):
  505. """
  506. `ANTLR`_ with Java Target
  507. .. versionadded:: 1.
  508. """
  509. name = 'ANTLR With Java Target'
  510. aliases = ['antlr-java']
  511. filenames = ['*.G', '*.g']
  512. def __init__(self, **options):
  513. super(AntlrJavaLexer, self).__init__(JavaLexer, AntlrLexer,
  514. **options)
  515. def analyse_text(text):
  516. # Antlr language is Java by default
  517. return AntlrLexer.analyse_text(text) and 0.9
  518. class AntlrRubyLexer(DelegatingLexer):
  519. """
  520. `ANTLR`_ with Ruby Target
  521. .. versionadded:: 1.1
  522. """
  523. name = 'ANTLR With Ruby Target'
  524. aliases = ['antlr-ruby', 'antlr-rb']
  525. filenames = ['*.G', '*.g']
  526. def __init__(self, **options):
  527. super(AntlrRubyLexer, self).__init__(RubyLexer, AntlrLexer,
  528. **options)
  529. def analyse_text(text):
  530. return AntlrLexer.analyse_text(text) and \
  531. re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
  532. class AntlrPerlLexer(DelegatingLexer):
  533. """
  534. `ANTLR`_ with Perl Target
  535. .. versionadded:: 1.1
  536. """
  537. name = 'ANTLR With Perl Target'
  538. aliases = ['antlr-perl']
  539. filenames = ['*.G', '*.g']
  540. def __init__(self, **options):
  541. super(AntlrPerlLexer, self).__init__(PerlLexer, AntlrLexer,
  542. **options)
  543. def analyse_text(text):
  544. return AntlrLexer.analyse_text(text) and \
  545. re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
  546. class AntlrActionScriptLexer(DelegatingLexer):
  547. """
  548. `ANTLR`_ with ActionScript Target
  549. .. versionadded:: 1.1
  550. """
  551. name = 'ANTLR With ActionScript Target'
  552. aliases = ['antlr-as', 'antlr-actionscript']
  553. filenames = ['*.G', '*.g']
  554. def __init__(self, **options):
  555. from pygments.lexers.actionscript import ActionScriptLexer
  556. super(AntlrActionScriptLexer, self).__init__(ActionScriptLexer,
  557. AntlrLexer, **options)
  558. def analyse_text(text):
  559. return AntlrLexer.analyse_text(text) and \
  560. re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
  561. class TreetopBaseLexer(RegexLexer):
  562. """
  563. A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
  564. Not for direct use; use TreetopLexer instead.
  565. .. versionadded:: 1.6
  566. """
  567. tokens = {
  568. 'root': [
  569. include('space'),
  570. (r'require[ \t]+[^\n\r]+[\n\r]', Other),
  571. (r'module\b', Keyword.Namespace, 'module'),
  572. (r'grammar\b', Keyword, 'grammar'),
  573. ],
  574. 'module': [
  575. include('space'),
  576. include('end'),
  577. (r'module\b', Keyword, '#push'),
  578. (r'grammar\b', Keyword, 'grammar'),
  579. (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace),
  580. ],
  581. 'grammar': [
  582. include('space'),
  583. include('end'),
  584. (r'rule\b', Keyword, 'rule'),
  585. (r'include\b', Keyword, 'include'),
  586. (r'[A-Z]\w*', Name),
  587. ],
  588. 'include': [
  589. include('space'),
  590. (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'),
  591. ],
  592. 'rule': [
  593. include('space'),
  594. include('end'),
  595. (r'"(\\\\|\\"|[^"])*"', String.Double),
  596. (r"'(\\\\|\\'|[^'])*'", String.Single),
  597. (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
  598. (r'[A-Za-z_]\w*', Name),
  599. (r'[()]', Punctuation),
  600. (r'[?+*/&!~]', Operator),
  601. (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex),
  602. (r'([0-9]*)(\.\.)([0-9]*)',
  603. bygroups(Number.Integer, Operator, Number.Integer)),
  604. (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)),
  605. (r'\{', Punctuation, 'inline_module'),
  606. (r'\.', String.Regex),
  607. ],
  608. 'inline_module': [
  609. (r'\{', Other, 'ruby'),
  610. (r'\}', Punctuation, '#pop'),
  611. (r'[^{}]+', Other),
  612. ],
  613. 'ruby': [
  614. (r'\{', Other, '#push'),
  615. (r'\}', Other, '#pop'),
  616. (r'[^{}]+', Other),
  617. ],
  618. 'space': [
  619. (r'[ \t\n\r]+', Whitespace),
  620. (r'#[^\n]*', Comment.Single),
  621. ],
  622. 'end': [
  623. (r'end\b', Keyword, '#pop'),
  624. ],
  625. }
  626. class TreetopLexer(DelegatingLexer):
  627. """
  628. A lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
  629. .. versionadded:: 1.6
  630. """
  631. name = 'Treetop'
  632. aliases = ['treetop']
  633. filenames = ['*.treetop', '*.tt']
  634. def __init__(self, **options):
  635. super(TreetopLexer, self).__init__(RubyLexer, TreetopBaseLexer, **options)
  636. class EbnfLexer(RegexLexer):
  637. """
  638. Lexer for `ISO/IEC 14977 EBNF
  639. <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_
  640. grammars.
  641. .. versionadded:: 2.0
  642. """
  643. name = 'EBNF'
  644. aliases = ['ebnf']
  645. filenames = ['*.ebnf']
  646. mimetypes = ['text/x-ebnf']
  647. tokens = {
  648. 'root': [
  649. include('whitespace'),
  650. include('comment_start'),
  651. include('identifier'),
  652. (r'=', Operator, 'production'),
  653. ],
  654. 'production': [
  655. include('whitespace'),
  656. include('comment_start'),
  657. include('identifier'),
  658. (r'"[^"]*"', String.Double),
  659. (r"'[^']*'", String.Single),
  660. (r'(\?[^?]*\?)', Name.Entity),
  661. (r'[\[\]{}(),|]', Punctuation),
  662. (r'-', Operator),
  663. (r';', Punctuation, '#pop'),
  664. (r'\.', Punctuation, '#pop'),
  665. ],
  666. 'whitespace': [
  667. (r'\s+', Text),
  668. ],
  669. 'comment_start': [
  670. (r'\(\*', Comment.Multiline, 'comment'),
  671. ],
  672. 'comment': [
  673. (r'[^*)]', Comment.Multiline),
  674. include('comment_start'),
  675. (r'\*\)', Comment.Multiline, '#pop'),
  676. (r'[*)]', Comment.Multiline),
  677. ],
  678. 'identifier': [
  679. (r'([a-zA-Z][\w \-]*)', Keyword),
  680. ],
  681. }