markup.py 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521
  1. """
  2. pygments.lexers.markup
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for non-HTML markup languages.
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexers.html import XmlLexer
  10. from pygments.lexers.javascript import JavascriptLexer
  11. from pygments.lexers.css import CssLexer
  12. from pygments.lexers.lilypond import LilyPondLexer
  13. from pygments.lexers.data import JsonLexer
  14. from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
  15. using, this, do_insertions, default, words
  16. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  17. Number, Punctuation, Generic, Other, Whitespace
  18. from pygments.util import get_bool_opt, ClassNotFound
  19. __all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
  20. 'MozPreprocHashLexer', 'MozPreprocPercentLexer',
  21. 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
  22. 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer', 'WikitextLexer']
  23. class BBCodeLexer(RegexLexer):
  24. """
  25. A lexer that highlights BBCode(-like) syntax.
  26. .. versionadded:: 0.6
  27. """
  28. name = 'BBCode'
  29. aliases = ['bbcode']
  30. mimetypes = ['text/x-bbcode']
  31. tokens = {
  32. 'root': [
  33. (r'[^[]+', Text),
  34. # tag/end tag begin
  35. (r'\[/?\w+', Keyword, 'tag'),
  36. # stray bracket
  37. (r'\[', Text),
  38. ],
  39. 'tag': [
  40. (r'\s+', Text),
  41. # attribute with value
  42. (r'(\w+)(=)("?[^\s"\]]+"?)',
  43. bygroups(Name.Attribute, Operator, String)),
  44. # tag argument (a la [color=green])
  45. (r'(=)("?[^\s"\]]+"?)',
  46. bygroups(Operator, String)),
  47. # tag end
  48. (r'\]', Keyword, '#pop'),
  49. ],
  50. }
  51. class MoinWikiLexer(RegexLexer):
  52. """
  53. For MoinMoin (and Trac) Wiki markup.
  54. .. versionadded:: 0.7
  55. """
  56. name = 'MoinMoin/Trac Wiki markup'
  57. aliases = ['trac-wiki', 'moin']
  58. filenames = []
  59. mimetypes = ['text/x-trac-wiki']
  60. flags = re.MULTILINE | re.IGNORECASE
  61. tokens = {
  62. 'root': [
  63. (r'^#.*$', Comment),
  64. (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
  65. # Titles
  66. (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
  67. bygroups(Generic.Heading, using(this), Generic.Heading, String)),
  68. # Literal code blocks, with optional shebang
  69. (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
  70. (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
  71. # Lists
  72. (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
  73. (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
  74. # Other Formatting
  75. (r'\[\[\w+.*?\]\]', Keyword), # Macro
  76. (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
  77. bygroups(Keyword, String, Keyword)), # Link
  78. (r'^----+$', Keyword), # Horizontal rules
  79. (r'[^\n\'\[{!_~^,|]+', Text),
  80. (r'\n', Text),
  81. (r'.', Text),
  82. ],
  83. 'codeblock': [
  84. (r'\}\}\}', Name.Builtin, '#pop'),
  85. # these blocks are allowed to be nested in Trac, but not MoinMoin
  86. (r'\{\{\{', Text, '#push'),
  87. (r'[^{}]+', Comment.Preproc), # slurp boring text
  88. (r'.', Comment.Preproc), # allow loose { or }
  89. ],
  90. }
  91. class RstLexer(RegexLexer):
  92. """
  93. For reStructuredText markup.
  94. .. versionadded:: 0.7
  95. Additional options accepted:
  96. `handlecodeblocks`
  97. Highlight the contents of ``.. sourcecode:: language``,
  98. ``.. code:: language`` and ``.. code-block:: language``
  99. directives with a lexer for the given language (default:
  100. ``True``).
  101. .. versionadded:: 0.8
  102. """
  103. name = 'reStructuredText'
  104. url = 'https://docutils.sourceforge.io/rst.html'
  105. aliases = ['restructuredtext', 'rst', 'rest']
  106. filenames = ['*.rst', '*.rest']
  107. mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
  108. flags = re.MULTILINE
  109. def _handle_sourcecode(self, match):
  110. from pygments.lexers import get_lexer_by_name
  111. # section header
  112. yield match.start(1), Punctuation, match.group(1)
  113. yield match.start(2), Text, match.group(2)
  114. yield match.start(3), Operator.Word, match.group(3)
  115. yield match.start(4), Punctuation, match.group(4)
  116. yield match.start(5), Text, match.group(5)
  117. yield match.start(6), Keyword, match.group(6)
  118. yield match.start(7), Text, match.group(7)
  119. # lookup lexer if wanted and existing
  120. lexer = None
  121. if self.handlecodeblocks:
  122. try:
  123. lexer = get_lexer_by_name(match.group(6).strip())
  124. except ClassNotFound:
  125. pass
  126. indention = match.group(8)
  127. indention_size = len(indention)
  128. code = (indention + match.group(9) + match.group(10) + match.group(11))
  129. # no lexer for this language. handle it like it was a code block
  130. if lexer is None:
  131. yield match.start(8), String, code
  132. return
  133. # highlight the lines with the lexer.
  134. ins = []
  135. codelines = code.splitlines(True)
  136. code = ''
  137. for line in codelines:
  138. if len(line) > indention_size:
  139. ins.append((len(code), [(0, Text, line[:indention_size])]))
  140. code += line[indention_size:]
  141. else:
  142. code += line
  143. yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))
  144. # from docutils.parsers.rst.states
  145. closers = '\'")]}>\u2019\u201d\xbb!?'
  146. unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'
  147. end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
  148. % (re.escape(unicode_delimiters),
  149. re.escape(closers)))
  150. tokens = {
  151. 'root': [
  152. # Heading with overline
  153. (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
  154. r'(.+)(\n)(\1)(\n)',
  155. bygroups(Generic.Heading, Text, Generic.Heading,
  156. Text, Generic.Heading, Text)),
  157. # Plain heading
  158. (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
  159. r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
  160. bygroups(Generic.Heading, Text, Generic.Heading, Text)),
  161. # Bulleted lists
  162. (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
  163. bygroups(Text, Number, using(this, state='inline'))),
  164. # Numbered lists
  165. (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
  166. bygroups(Text, Number, using(this, state='inline'))),
  167. (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
  168. bygroups(Text, Number, using(this, state='inline'))),
  169. # Numbered, but keep words at BOL from becoming lists
  170. (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
  171. bygroups(Text, Number, using(this, state='inline'))),
  172. (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
  173. bygroups(Text, Number, using(this, state='inline'))),
  174. # Line blocks
  175. (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
  176. bygroups(Text, Operator, using(this, state='inline'))),
  177. # Sourcecode directives
  178. (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
  179. r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',
  180. _handle_sourcecode),
  181. # A directive
  182. (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
  183. bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
  184. using(this, state='inline'))),
  185. # A reference target
  186. (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
  187. bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
  188. # A footnote/citation target
  189. (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
  190. bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
  191. # A substitution def
  192. (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
  193. bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
  194. Punctuation, Text, using(this, state='inline'))),
  195. # Comments
  196. (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
  197. # Field list marker
  198. (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
  199. bygroups(Text, Name.Class, Text)),
  200. # Definition list
  201. (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
  202. bygroups(using(this, state='inline'), using(this, state='inline'))),
  203. # Code blocks
  204. (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',
  205. bygroups(String.Escape, Text, String, String, Text, String)),
  206. include('inline'),
  207. ],
  208. 'inline': [
  209. (r'\\.', Text), # escape
  210. (r'``', String, 'literal'), # code
  211. (r'(`.+?)(<.+?>)(`__?)', # reference with inline target
  212. bygroups(String, String.Interpol, String)),
  213. (r'`.+?`__?', String), # reference
  214. (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
  215. bygroups(Name.Variable, Name.Attribute)), # role
  216. (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
  217. bygroups(Name.Attribute, Name.Variable)), # role (content first)
  218. (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
  219. (r'\*.+?\*', Generic.Emph), # Emphasis
  220. (r'\[.*?\]_', String), # Footnote or citation
  221. (r'<.+?>', Name.Tag), # Hyperlink
  222. (r'[^\\\n\[*`:]+', Text),
  223. (r'.', Text),
  224. ],
  225. 'literal': [
  226. (r'[^`]+', String),
  227. (r'``' + end_string_suffix, String, '#pop'),
  228. (r'`', String),
  229. ]
  230. }
  231. def __init__(self, **options):
  232. self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
  233. RegexLexer.__init__(self, **options)
  234. def analyse_text(text):
  235. if text[:2] == '..' and text[2:3] != '.':
  236. return 0.3
  237. p1 = text.find("\n")
  238. p2 = text.find("\n", p1 + 1)
  239. if (p2 > -1 and # has two lines
  240. p1 * 2 + 1 == p2 and # they are the same length
  241. text[p1+1] in '-=' and # the next line both starts and ends with
  242. text[p1+1] == text[p2-1]): # ...a sufficiently high header
  243. return 0.5
  244. class TexLexer(RegexLexer):
  245. """
  246. Lexer for the TeX and LaTeX typesetting languages.
  247. """
  248. name = 'TeX'
  249. aliases = ['tex', 'latex']
  250. filenames = ['*.tex', '*.aux', '*.toc']
  251. mimetypes = ['text/x-tex', 'text/x-latex']
  252. tokens = {
  253. 'general': [
  254. (r'%.*?\n', Comment),
  255. (r'[{}]', Name.Builtin),
  256. (r'[&_^]', Name.Builtin),
  257. ],
  258. 'root': [
  259. (r'\\\[', String.Backtick, 'displaymath'),
  260. (r'\\\(', String, 'inlinemath'),
  261. (r'\$\$', String.Backtick, 'displaymath'),
  262. (r'\$', String, 'inlinemath'),
  263. (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
  264. (r'\\$', Keyword),
  265. include('general'),
  266. (r'[^\\$%&_^{}]+', Text),
  267. ],
  268. 'math': [
  269. (r'\\([a-zA-Z]+|.)', Name.Variable),
  270. include('general'),
  271. (r'[0-9]+', Number),
  272. (r'[-=!+*/()\[\]]', Operator),
  273. (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
  274. ],
  275. 'inlinemath': [
  276. (r'\\\)', String, '#pop'),
  277. (r'\$', String, '#pop'),
  278. include('math'),
  279. ],
  280. 'displaymath': [
  281. (r'\\\]', String, '#pop'),
  282. (r'\$\$', String, '#pop'),
  283. (r'\$', Name.Builtin),
  284. include('math'),
  285. ],
  286. 'command': [
  287. (r'\[.*?\]', Name.Attribute),
  288. (r'\*', Keyword),
  289. default('#pop'),
  290. ],
  291. }
  292. def analyse_text(text):
  293. for start in ("\\documentclass", "\\input", "\\documentstyle",
  294. "\\relax"):
  295. if text[:len(start)] == start:
  296. return True
  297. class GroffLexer(RegexLexer):
  298. """
  299. Lexer for the (g)roff typesetting language, supporting groff
  300. extensions. Mainly useful for highlighting manpage sources.
  301. .. versionadded:: 0.6
  302. """
  303. name = 'Groff'
  304. aliases = ['groff', 'nroff', 'man']
  305. filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm']
  306. mimetypes = ['application/x-troff', 'text/troff']
  307. tokens = {
  308. 'root': [
  309. (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
  310. (r'\.', Punctuation, 'request'),
  311. # Regular characters, slurp till we find a backslash or newline
  312. (r'[^\\\n]+', Text, 'textline'),
  313. default('textline'),
  314. ],
  315. 'textline': [
  316. include('escapes'),
  317. (r'[^\\\n]+', Text),
  318. (r'\n', Text, '#pop'),
  319. ],
  320. 'escapes': [
  321. # groff has many ways to write escapes.
  322. (r'\\"[^\n]*', Comment),
  323. (r'\\[fn]\w', String.Escape),
  324. (r'\\\(.{2}', String.Escape),
  325. (r'\\.\[.*\]', String.Escape),
  326. (r'\\.', String.Escape),
  327. (r'\\\n', Text, 'request'),
  328. ],
  329. 'request': [
  330. (r'\n', Text, '#pop'),
  331. include('escapes'),
  332. (r'"[^\n"]+"', String.Double),
  333. (r'\d+', Number),
  334. (r'\S+', String),
  335. (r'\s+', Text),
  336. ],
  337. }
  338. def analyse_text(text):
  339. if text[:1] != '.':
  340. return False
  341. if text[:3] == '.\\"':
  342. return True
  343. if text[:4] == '.TH ':
  344. return True
  345. if text[1:3].isalnum() and text[3].isspace():
  346. return 0.9
  347. class MozPreprocHashLexer(RegexLexer):
  348. """
  349. Lexer for Mozilla Preprocessor files (with '#' as the marker).
  350. Other data is left untouched.
  351. .. versionadded:: 2.0
  352. """
  353. name = 'mozhashpreproc'
  354. aliases = [name]
  355. filenames = []
  356. mimetypes = []
  357. tokens = {
  358. 'root': [
  359. (r'^#', Comment.Preproc, ('expr', 'exprstart')),
  360. (r'.+', Other),
  361. ],
  362. 'exprstart': [
  363. (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
  364. (words((
  365. 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
  366. 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
  367. 'include', 'includesubst', 'error')),
  368. Comment.Preproc, '#pop'),
  369. ],
  370. 'expr': [
  371. (words(('!', '!=', '==', '&&', '||')), Operator),
  372. (r'(defined)(\()', bygroups(Keyword, Punctuation)),
  373. (r'\)', Punctuation),
  374. (r'[0-9]+', Number.Decimal),
  375. (r'__\w+?__', Name.Variable),
  376. (r'@\w+?@', Name.Class),
  377. (r'\w+', Name),
  378. (r'\n', Text, '#pop'),
  379. (r'\s+', Text),
  380. (r'\S', Punctuation),
  381. ],
  382. }
  383. class MozPreprocPercentLexer(MozPreprocHashLexer):
  384. """
  385. Lexer for Mozilla Preprocessor files (with '%' as the marker).
  386. Other data is left untouched.
  387. .. versionadded:: 2.0
  388. """
  389. name = 'mozpercentpreproc'
  390. aliases = [name]
  391. filenames = []
  392. mimetypes = []
  393. tokens = {
  394. 'root': [
  395. (r'^%', Comment.Preproc, ('expr', 'exprstart')),
  396. (r'.+', Other),
  397. ],
  398. }
  399. class MozPreprocXulLexer(DelegatingLexer):
  400. """
  401. Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
  402. `XmlLexer`.
  403. .. versionadded:: 2.0
  404. """
  405. name = "XUL+mozpreproc"
  406. aliases = ['xul+mozpreproc']
  407. filenames = ['*.xul.in']
  408. mimetypes = []
  409. def __init__(self, **options):
  410. super().__init__(XmlLexer, MozPreprocHashLexer, **options)
  411. class MozPreprocJavascriptLexer(DelegatingLexer):
  412. """
  413. Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
  414. `JavascriptLexer`.
  415. .. versionadded:: 2.0
  416. """
  417. name = "Javascript+mozpreproc"
  418. aliases = ['javascript+mozpreproc']
  419. filenames = ['*.js.in']
  420. mimetypes = []
  421. def __init__(self, **options):
  422. super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)
  423. class MozPreprocCssLexer(DelegatingLexer):
  424. """
  425. Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
  426. `CssLexer`.
  427. .. versionadded:: 2.0
  428. """
  429. name = "CSS+mozpreproc"
  430. aliases = ['css+mozpreproc']
  431. filenames = ['*.css.in']
  432. mimetypes = []
  433. def __init__(self, **options):
  434. super().__init__(CssLexer, MozPreprocPercentLexer, **options)
  435. class MarkdownLexer(RegexLexer):
  436. """
  437. For Markdown markup.
  438. .. versionadded:: 2.2
  439. """
  440. name = 'Markdown'
  441. url = 'https://daringfireball.net/projects/markdown/'
  442. aliases = ['markdown', 'md']
  443. filenames = ['*.md', '*.markdown']
  444. mimetypes = ["text/x-markdown"]
  445. flags = re.MULTILINE
  446. def _handle_codeblock(self, match):
  447. from pygments.lexers import get_lexer_by_name
  448. yield match.start('initial'), String.Backtick, match.group('initial')
  449. yield match.start('lang'), String.Backtick, match.group('lang')
  450. if match.group('afterlang') is not None:
  451. yield match.start('whitespace'), Whitespace, match.group('whitespace')
  452. yield match.start('extra'), Text, match.group('extra')
  453. yield match.start('newline'), Whitespace, match.group('newline')
  454. # lookup lexer if wanted and existing
  455. lexer = None
  456. if self.handlecodeblocks:
  457. try:
  458. lexer = get_lexer_by_name(match.group('lang').strip())
  459. except ClassNotFound:
  460. pass
  461. code = match.group('code')
  462. # no lexer for this language. handle it like it was a code block
  463. if lexer is None:
  464. yield match.start('code'), String, code
  465. else:
  466. # FIXME: aren't the offsets wrong?
  467. yield from do_insertions([], lexer.get_tokens_unprocessed(code))
  468. yield match.start('terminator'), String.Backtick, match.group('terminator')
  469. tokens = {
  470. 'root': [
  471. # heading with '#' prefix (atx-style)
  472. (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),
  473. # subheading with '#' prefix (atx-style)
  474. (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),
  475. # heading with '=' underlines (Setext-style)
  476. (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),
  477. # subheading with '-' underlines (Setext-style)
  478. (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),
  479. # task list
  480. (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
  481. bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))),
  482. # bulleted list
  483. (r'^(\s*)([*-])(\s)(.+\n)',
  484. bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))),
  485. # numbered list
  486. (r'^(\s*)([0-9]+\.)( .+\n)',
  487. bygroups(Whitespace, Keyword, using(this, state='inline'))),
  488. # quote
  489. (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
  490. # code block fenced by 3 backticks
  491. (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick),
  492. # code block with language
  493. # Some tools include extra stuff after the language name, just
  494. # highlight that as text. For example: https://docs.enola.dev/use/execmd
  495. (r'''(?x)
  496. ^(?P<initial>\s*```)
  497. (?P<lang>[\w\-]+)
  498. (?P<afterlang>
  499. (?P<whitespace>[^\S\n]+)
  500. (?P<extra>.*))?
  501. (?P<newline>\n)
  502. (?P<code>(.|\n)*?)
  503. (?P<terminator>^\s*```$\n)
  504. ''',
  505. _handle_codeblock),
  506. include('inline'),
  507. ],
  508. 'inline': [
  509. # escape
  510. (r'\\.', Text),
  511. # inline code
  512. (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)),
  513. # warning: the following rules eat outer tags.
  514. # eg. **foo _bar_ baz** => foo and baz are not recognized as bold
  515. # bold fenced by '**'
  516. (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)),
  517. # bold fenced by '__'
  518. (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)),
  519. # italics fenced by '*'
  520. (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)),
  521. # italics fenced by '_'
  522. (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)),
  523. # strikethrough
  524. (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)),
  525. # mentions and topics (twitter and github stuff)
  526. (r'[@#][\w/:]+', Name.Entity),
  527. # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
  528. (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
  529. bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
  530. # reference-style links, e.g.:
  531. # [an example][id]
  532. # [id]: http://example.com/
  533. (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
  534. bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
  535. (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
  536. bygroups(Text, Name.Label, Text, Name.Attribute)),
  537. # general text, must come last!
  538. (r'[^\\\s]+', Text),
  539. (r'.', Text),
  540. ],
  541. }
  542. def __init__(self, **options):
  543. self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
  544. RegexLexer.__init__(self, **options)
  545. class TiddlyWiki5Lexer(RegexLexer):
  546. """
  547. For TiddlyWiki5 markup.
  548. .. versionadded:: 2.7
  549. """
  550. name = 'tiddler'
  551. url = 'https://tiddlywiki.com/#TiddlerFiles'
  552. aliases = ['tid']
  553. filenames = ['*.tid']
  554. mimetypes = ["text/vnd.tiddlywiki"]
  555. flags = re.MULTILINE
  556. def _handle_codeblock(self, match):
  557. """
  558. match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
  559. """
  560. from pygments.lexers import get_lexer_by_name
  561. # section header
  562. yield match.start(1), String, match.group(1)
  563. yield match.start(2), String, match.group(2)
  564. yield match.start(3), Text, match.group(3)
  565. # lookup lexer if wanted and existing
  566. lexer = None
  567. if self.handlecodeblocks:
  568. try:
  569. lexer = get_lexer_by_name(match.group(2).strip())
  570. except ClassNotFound:
  571. pass
  572. code = match.group(4)
  573. # no lexer for this language. handle it like it was a code block
  574. if lexer is None:
  575. yield match.start(4), String, code
  576. return
  577. yield from do_insertions([], lexer.get_tokens_unprocessed(code))
  578. yield match.start(5), String, match.group(5)
  579. def _handle_cssblock(self, match):
  580. """
  581. match args: 1:style tag 2:newline, 3:code, 4:closing style tag
  582. """
  583. from pygments.lexers import get_lexer_by_name
  584. # section header
  585. yield match.start(1), String, match.group(1)
  586. yield match.start(2), String, match.group(2)
  587. lexer = None
  588. if self.handlecodeblocks:
  589. try:
  590. lexer = get_lexer_by_name('css')
  591. except ClassNotFound:
  592. pass
  593. code = match.group(3)
  594. # no lexer for this language. handle it like it was a code block
  595. if lexer is None:
  596. yield match.start(3), String, code
  597. return
  598. yield from do_insertions([], lexer.get_tokens_unprocessed(code))
  599. yield match.start(4), String, match.group(4)
  600. tokens = {
  601. 'root': [
  602. # title in metadata section
  603. (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),
  604. # headings
  605. (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),
  606. (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
  607. # bulleted or numbered lists or single-line block quotes
  608. # (can be mixed)
  609. (r'^(\s*)([*#>]+)(\s*)(.+\n)',
  610. bygroups(Text, Keyword, Text, using(this, state='inline'))),
  611. # multi-line block quotes
  612. (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),
  613. # table header
  614. (r'^(\|.*?\|h)$', bygroups(Generic.Strong)),
  615. # table footer or caption
  616. (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),
  617. # table class
  618. (r'^(\|.*?\|k)$', bygroups(Name.Tag)),
  619. # definitions
  620. (r'^(;.*)$', bygroups(Generic.Strong)),
  621. # text block
  622. (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
  623. # code block with language
  624. (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
  625. # CSS style block
  626. (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),
  627. include('keywords'),
  628. include('inline'),
  629. ],
  630. 'keywords': [
  631. (words((
  632. '\\define', '\\end', 'caption', 'created', 'modified', 'tags',
  633. 'title', 'type'), prefix=r'^', suffix=r'\b'),
  634. Keyword),
  635. ],
  636. 'inline': [
  637. # escape
  638. (r'\\.', Text),
  639. # created or modified date
  640. (r'\d{17}', Number.Integer),
  641. # italics
  642. (r'(\s)(//[^/]+//)((?=\W|\n))',
  643. bygroups(Text, Generic.Emph, Text)),
  644. # superscript
  645. (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),
  646. # subscript
  647. (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),
  648. # underscore
  649. (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),
  650. # bold
  651. (r"(\s)(''[^']+'')((?=\W|\n))",
  652. bygroups(Text, Generic.Strong, Text)),
  653. # strikethrough
  654. (r'(\s)(~~[^~]+~~)((?=\W|\n))',
  655. bygroups(Text, Generic.Deleted, Text)),
  656. # TiddlyWiki variables
  657. (r'<<[^>]+>>', Name.Tag),
  658. (r'\$\$[^$]+\$\$', Name.Tag),
  659. (r'\$\([^)]+\)\$', Name.Tag),
  660. # TiddlyWiki style or class
  661. (r'^@@.*$', Name.Tag),
  662. # HTML tags
  663. (r'</?[^>]+>', Name.Tag),
  664. # inline code
  665. (r'`[^`]+`', String.Backtick),
  666. # HTML escaped symbols
  667. (r'&\S*?;', String.Regex),
  668. # Wiki links
  669. (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),
  670. # External links
  671. (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',
  672. bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),
  673. # Transclusion
  674. (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),
  675. # URLs
  676. (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),
  677. # general text, must come last!
  678. (r'[\w]+', Text),
  679. (r'.', Text)
  680. ],
  681. }
  682. def __init__(self, **options):
  683. self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
  684. RegexLexer.__init__(self, **options)
  685. class WikitextLexer(RegexLexer):
  686. """
  687. For MediaWiki Wikitext.
  688. Parsing Wikitext is tricky, and results vary between different MediaWiki
  689. installations, so we only highlight common syntaxes (built-in or from
  690. popular extensions), and also assume templates produce no unbalanced
  691. syntaxes.
  692. .. versionadded:: 2.15
  693. """
  694. name = 'Wikitext'
  695. url = 'https://www.mediawiki.org/wiki/Wikitext'
  696. aliases = ['wikitext', 'mediawiki']
  697. filenames = []
  698. mimetypes = ['text/x-wiki']
  699. flags = re.MULTILINE
  700. def nowiki_tag_rules(tag_name):
  701. return [
  702. (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation,
  703. Name.Tag, Whitespace, Punctuation), '#pop'),
  704. include('entity'),
  705. include('text'),
  706. ]
  707. def plaintext_tag_rules(tag_name):
  708. return [
  709. (r'(?si)(.*?)(</)({})(\s*)(>)'.format(tag_name), bygroups(Text,
  710. Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'),
  711. ]
  712. def delegate_tag_rules(tag_name, lexer):
  713. return [
  714. (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation,
  715. Name.Tag, Whitespace, Punctuation), '#pop'),
  716. (r'(?si).+?(?=</{}\s*>)'.format(tag_name), using(lexer)),
  717. ]
  718. def text_rules(token):
  719. return [
  720. (r'\w+', token),
  721. (r'[^\S\n]+', token),
  722. (r'(?s).', token),
  723. ]
  724. def handle_syntaxhighlight(self, match, ctx):
  725. from pygments.lexers import get_lexer_by_name
  726. attr_content = match.group()
  727. start = 0
  728. index = 0
  729. while True:
  730. index = attr_content.find('>', start)
  731. # Exclude comment end (-->)
  732. if attr_content[index-2:index] != '--':
  733. break
  734. start = index + 1
  735. if index == -1:
  736. # No tag end
  737. yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
  738. return
  739. attr = attr_content[:index]
  740. yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
  741. yield match.start(3) + index, Punctuation, '>'
  742. lexer = None
  743. content = attr_content[index+1:]
  744. lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
  745. if len(lang_match) >= 1:
  746. # Pick the last match in case of multiple matches
  747. lang = lang_match[-1][1]
  748. try:
  749. lexer = get_lexer_by_name(lang)
  750. except ClassNotFound:
  751. pass
  752. if lexer is None:
  753. yield match.start() + index + 1, Text, content
  754. else:
  755. yield from lexer.get_tokens_unprocessed(content)
  756. def handle_score(self, match, ctx):
  757. attr_content = match.group()
  758. start = 0
  759. index = 0
  760. while True:
  761. index = attr_content.find('>', start)
  762. # Exclude comment end (-->)
  763. if attr_content[index-2:index] != '--':
  764. break
  765. start = index + 1
  766. if index == -1:
  767. # No tag end
  768. yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
  769. return
  770. attr = attr_content[:index]
  771. content = attr_content[index+1:]
  772. yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
  773. yield match.start(3) + index, Punctuation, '>'
  774. lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
  775. # Pick the last match in case of multiple matches
  776. lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond'
  777. if lang == 'lilypond': # Case sensitive
  778. yield from LilyPondLexer().get_tokens_unprocessed(content)
  779. else: # ABC
  780. # FIXME: Use ABC lexer in the future
  781. yield match.start() + index + 1, Text, content
  782. # a-z removed to prevent linter from complaining, REMEMBER to use (?i)
  783. title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF'
  784. nbsp_char = r'(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])'
  785. link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])'
  786. link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]'
  787. double_slashes_i = {
  788. '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__',
  789. '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__',
  790. }
  791. double_slashes = {
  792. '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__',
  793. '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__',
  794. '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__',
  795. }
  796. protocols = {
  797. 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://',
  798. 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://',
  799. 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:',
  800. 'worldwind://', 'xmpp:', '//',
  801. }
  802. non_relative_protocols = protocols - {'//'}
  803. html_tags = {
  804. 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code',
  805. 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5',
  806. 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp',
  807. 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
  808. 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr',
  809. }
  810. parser_tags = {
  811. 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math',
  812. 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages',
  813. 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar',
  814. 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery',
  815. 'maplink', 'ce', 'references',
  816. }
  817. variant_langs = {
  818. # ZhConverter.php
  819. 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw',
  820. # UnConverter.php
  821. 'uz', 'uz-latn', 'uz-cyrl',
  822. # TlyConverter.php
  823. 'tly', 'tly-cyrl',
  824. # TgConverter.php
  825. 'tg', 'tg-latn',
  826. # SrConverter.php
  827. 'sr', 'sr-ec', 'sr-el',
  828. # ShiConverter.php
  829. 'shi', 'shi-tfng', 'shi-latn',
  830. # ShConverter.php
  831. 'sh-latn', 'sh-cyrl',
  832. # KuConverter.php
  833. 'ku', 'ku-arab', 'ku-latn',
  834. # KkConverter.php
  835. 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn',
  836. # IuConverter.php
  837. 'iu', 'ike-cans', 'ike-latn',
  838. # GanConverter.php
  839. 'gan', 'gan-hans', 'gan-hant',
  840. # EnConverter.php
  841. 'en', 'en-x-piglatin',
  842. # CrhConverter.php
  843. 'crh', 'crh-cyrl', 'crh-latn',
  844. # BanConverter.php
  845. 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku',
  846. }
  847. magic_vars_i = {
  848. 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH',
  849. }
  850. magic_vars = {
  851. '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE',
  852. 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR',
  853. 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME',
  854. 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK',
  855. 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY',
  856. 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2',
  857. 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME',
  858. 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER',
  859. 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS',
  860. 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE',
  861. 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1',
  862. 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
  863. 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
  864. 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
  865. 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
  866. }
  867. parser_functions_i = {
  868. 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM',
  869. 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL',
  870. 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST',
  871. 'URLENCODE',
  872. }
  873. parser_functions = {
  874. 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY',
  875. 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE',
  876. 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS',
  877. 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS',
  878. 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY',
  879. 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH',
  880. 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
  881. 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
  882. 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
  883. 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
  884. 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS',
  885. }
  886. tokens = {
  887. 'root': [
  888. # Redirects
  889. (r"""(?xi)
  890. (\A\s*?)(\#REDIRECT:?) # may contain a colon
  891. (\s+)(\[\[) (?=[^\]\n]* \]\]$)
  892. """,
  893. bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'),
  894. # Subheadings
  895. (r'^(={2,6})(.+?)(\1)(\s*$\n)',
  896. bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)),
  897. # Headings
  898. (r'^(=.+?=)(\s*$\n)',
  899. bygroups(Generic.Heading, Whitespace)),
  900. # Double-slashed magic words
  901. (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic),
  902. (words(double_slashes), Name.Function.Magic),
  903. # Raw URLs
  904. (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols),
  905. link_address, link_char_class), Name.Label),
  906. # Magic links
  907. (r'\b(?:RFC|PMID){}+[0-9]+\b'.format(nbsp_char),
  908. Name.Function.Magic),
  909. (r"""(?x)
  910. \bISBN {nbsp_char}
  911. (?: 97[89] {nbsp_dash}? )?
  912. (?: [0-9] {nbsp_dash}? ){{9}} # escape format()
  913. [0-9Xx]\b
  914. """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic),
  915. include('list'),
  916. include('inline'),
  917. include('text'),
  918. ],
  919. 'redirect-inner': [
  920. (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'),
  921. (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)),
  922. (r'(?i)[{}]+'.format(title_char), Name.Tag),
  923. ],
  924. 'list': [
  925. # Description lists
  926. (r'^;', Keyword, 'dt'),
  927. # Ordered lists, unordered lists and indents
  928. (r'^[#:*]+', Keyword),
  929. # Horizontal rules
  930. (r'^-{4,}', Keyword),
  931. ],
  932. 'inline': [
  933. # Signatures
  934. (r'~{3,5}', Keyword),
  935. # Entities
  936. include('entity'),
  937. # Bold & italic
  938. (r"('')(''')(?!')", bygroups(Generic.Emph,
  939. Generic.EmphStrong), 'inline-italic-bold'),
  940. (r"'''(?!')", Generic.Strong, 'inline-bold'),
  941. (r"''(?!')", Generic.Emph, 'inline-italic'),
  942. # Comments & parameters & templates
  943. include('replaceable'),
  944. # Media links
  945. (
  946. r"""(?xi)
  947. (\[\[)
  948. (File|Image) (:)
  949. ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*)
  950. (?: (\#) ([%s]*?) )?
  951. """ % (title_char, f'{title_char}#'),
  952. bygroups(Punctuation, Name.Namespace, Punctuation,
  953. using(this, state=['wikilink-name']), Punctuation, Name.Label),
  954. 'medialink-inner'
  955. ),
  956. # Wikilinks
  957. (
  958. r"""(?xi)
  959. (\[\[)(?!%s) # Should not contain URLs
  960. (?: ([%s]*) (:))?
  961. ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*?)
  962. (?: (\#) ([%s]*?) )?
  963. (\]\])
  964. """ % ('|'.join(protocols), title_char.replace('/', ''),
  965. title_char, f'{title_char}#'),
  966. bygroups(Punctuation, Name.Namespace, Punctuation,
  967. using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation)
  968. ),
  969. (
  970. r"""(?xi)
  971. (\[\[)(?!%s)
  972. (?: ([%s]*) (:))?
  973. ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*?)
  974. (?: (\#) ([%s]*?) )?
  975. (\|)
  976. """ % ('|'.join(protocols), title_char.replace('/', ''),
  977. title_char, f'{title_char}#'),
  978. bygroups(Punctuation, Name.Namespace, Punctuation,
  979. using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation),
  980. 'wikilink-inner'
  981. ),
  982. # External links
  983. (
  984. r"""(?xi)
  985. (\[)
  986. ((?:{}) {} {}*)
  987. (\s*)
  988. """.format('|'.join(protocols), link_address, link_char_class),
  989. bygroups(Punctuation, Name.Label, Whitespace),
  990. 'extlink-inner'
  991. ),
  992. # Tables
  993. (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword,
  994. Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'),
  995. # HTML tags
  996. (r'(?i)(<)({})\b'.format('|'.join(html_tags)),
  997. bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
  998. (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)),
  999. bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
  1000. # <nowiki>
  1001. (r'(?i)(<)(nowiki)\b', bygroups(Punctuation,
  1002. Name.Tag), ('tag-nowiki', 'tag-inner')),
  1003. # <pre>
  1004. (r'(?i)(<)(pre)\b', bygroups(Punctuation,
  1005. Name.Tag), ('tag-pre', 'tag-inner')),
  1006. # <categorytree>
  1007. (r'(?i)(<)(categorytree)\b', bygroups(
  1008. Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')),
  1009. # <hiero>
  1010. (r'(?i)(<)(hiero)\b', bygroups(Punctuation,
  1011. Name.Tag), ('tag-hiero', 'tag-inner')),
  1012. # <math>
  1013. (r'(?i)(<)(math)\b', bygroups(Punctuation,
  1014. Name.Tag), ('tag-math', 'tag-inner')),
  1015. # <chem>
  1016. (r'(?i)(<)(chem)\b', bygroups(Punctuation,
  1017. Name.Tag), ('tag-chem', 'tag-inner')),
  1018. # <ce>
  1019. (r'(?i)(<)(ce)\b', bygroups(Punctuation,
  1020. Name.Tag), ('tag-ce', 'tag-inner')),
  1021. # <charinsert>
  1022. (r'(?i)(<)(charinsert)\b', bygroups(
  1023. Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')),
  1024. # <templatedata>
  1025. (r'(?i)(<)(templatedata)\b', bygroups(
  1026. Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')),
  1027. # <gallery>
  1028. (r'(?i)(<)(gallery)\b', bygroups(
  1029. Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')),
  1030. # <graph>
  1031. (r'(?i)(<)(gallery)\b', bygroups(
  1032. Punctuation, Name.Tag), ('tag-graph', 'tag-inner')),
  1033. # <dynamicpagelist>
  1034. (r'(?i)(<)(dynamicpagelist)\b', bygroups(
  1035. Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')),
  1036. # <inputbox>
  1037. (r'(?i)(<)(inputbox)\b', bygroups(
  1038. Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')),
  1039. # <rss>
  1040. (r'(?i)(<)(rss)\b', bygroups(
  1041. Punctuation, Name.Tag), ('tag-rss', 'tag-inner')),
  1042. # <imagemap>
  1043. (r'(?i)(<)(imagemap)\b', bygroups(
  1044. Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')),
  1045. # <syntaxhighlight>
  1046. (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)',
  1047. bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
  1048. (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
  1049. bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
  1050. # <syntaxhighlight>: Fallback case for self-closing tags
  1051. (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
  1052. Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
  1053. # <source>
  1054. (r'(?i)(</)(source)\b(\s*)(>)',
  1055. bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
  1056. (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
  1057. bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
  1058. # <source>: Fallback case for self-closing tags
  1059. (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
  1060. Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
  1061. # <score>
  1062. (r'(?i)(</)(score)\b(\s*)(>)',
  1063. bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
  1064. (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
  1065. bygroups(Punctuation, Name.Tag, handle_score)),
  1066. # <score>: Fallback case for self-closing tags
  1067. (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
  1068. Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
  1069. # Other parser tags
  1070. (r'(?i)(<)({})\b'.format('|'.join(parser_tags)),
  1071. bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
  1072. (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)),
  1073. bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
  1074. # LanguageConverter markups
  1075. (
  1076. r"""(?xi)
  1077. (-\{{) # Escape format()
  1078. (?: ([^|]) (\|))?
  1079. (?: (\s* (?:{variants}) \s*) (=>))?
  1080. (\s* (?:{variants}) \s*) (:)
  1081. """.format(variants='|'.join(variant_langs)),
  1082. bygroups(Punctuation, Keyword, Punctuation,
  1083. Name.Label, Operator, Name.Label, Punctuation),
  1084. 'lc-inner'
  1085. ),
  1086. (r'-\{(?!\{)', Punctuation, 'lc-raw'),
  1087. ],
  1088. 'wikilink-name': [
  1089. include('replaceable'),
  1090. (r'[^{<]+', Name.Tag),
  1091. (r'(?s).', Name.Tag),
  1092. ],
  1093. 'wikilink-inner': [
  1094. # Quit in case of another wikilink
  1095. (r'(?=\[\[)', Punctuation, '#pop'),
  1096. (r'\]\]', Punctuation, '#pop'),
  1097. include('inline'),
  1098. include('text'),
  1099. ],
  1100. 'medialink-inner': [
  1101. (r'\]\]', Punctuation, '#pop'),
  1102. (r'(\|)([^\n=|]*)(=)',
  1103. bygroups(Punctuation, Name.Attribute, Operator)),
  1104. (r'\|', Punctuation),
  1105. include('inline'),
  1106. include('text'),
  1107. ],
  1108. 'quote-common': [
  1109. # Quit in case of link/template endings
  1110. (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'),
  1111. (r'\n', Text, '#pop'),
  1112. ],
  1113. 'inline-italic': [
  1114. include('quote-common'),
  1115. (r"('')(''')(?!')", bygroups(Generic.Emph,
  1116. Generic.Strong), ('#pop', 'inline-bold')),
  1117. (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')),
  1118. (r"''(?!')", Generic.Emph, '#pop'),
  1119. include('inline'),
  1120. include('text-italic'),
  1121. ],
  1122. 'inline-bold': [
  1123. include('quote-common'),
  1124. (r"(''')('')(?!')", bygroups(
  1125. Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')),
  1126. (r"'''(?!')", Generic.Strong, '#pop'),
  1127. (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')),
  1128. include('inline'),
  1129. include('text-bold'),
  1130. ],
  1131. 'inline-bold-italic': [
  1132. include('quote-common'),
  1133. (r"('')(''')(?!')", bygroups(Generic.EmphStrong,
  1134. Generic.Strong), '#pop'),
  1135. (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),
  1136. (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),
  1137. include('inline'),
  1138. include('text-bold-italic'),
  1139. ],
  1140. 'inline-italic-bold': [
  1141. include('quote-common'),
  1142. (r"(''')('')(?!')", bygroups(
  1143. Generic.EmphStrong, Generic.Emph), '#pop'),
  1144. (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),
  1145. (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),
  1146. include('inline'),
  1147. include('text-bold-italic'),
  1148. ],
  1149. 'lc-inner': [
  1150. (
  1151. r"""(?xi)
  1152. (;)
  1153. (?: (\s* (?:{variants}) \s*) (=>))?
  1154. (\s* (?:{variants}) \s*) (:)
  1155. """.format(variants='|'.join(variant_langs)),
  1156. bygroups(Punctuation, Name.Label,
  1157. Operator, Name.Label, Punctuation)
  1158. ),
  1159. (r';?\s*?\}-', Punctuation, '#pop'),
  1160. include('inline'),
  1161. include('text'),
  1162. ],
  1163. 'lc-raw': [
  1164. (r'\}-', Punctuation, '#pop'),
  1165. include('inline'),
  1166. include('text'),
  1167. ],
  1168. 'replaceable': [
  1169. # Comments
  1170. (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
  1171. # Parameters
  1172. (
  1173. r"""(?x)
  1174. (\{{3})
  1175. ([^|]*?)
  1176. (?=\}{3}|\|)
  1177. """,
  1178. bygroups(Punctuation, Name.Variable),
  1179. 'parameter-inner',
  1180. ),
  1181. # Magic variables
  1182. (r'(?i)(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars_i),
  1183. bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
  1184. (r'(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars),
  1185. bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
  1186. # Parser functions & templates
  1187. (r'\{\{', Punctuation, 'template-begin-space'),
  1188. # <tvar> legacy syntax
  1189. (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation,
  1190. Name.Tag, Punctuation, String, Punctuation)),
  1191. (r'</>', Punctuation, '#pop'),
  1192. # <tvar>
  1193. (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
  1194. (r'(?i)(</)(tvar)\b(\s*)(>)',
  1195. bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
  1196. ],
  1197. 'parameter-inner': [
  1198. (r'\}{3}', Punctuation, '#pop'),
  1199. (r'\|', Punctuation),
  1200. include('inline'),
  1201. include('text'),
  1202. ],
  1203. 'template-begin-space': [
  1204. # Templates allow line breaks at the beginning, and due to how MediaWiki handles
  1205. # comments, an extra state is required to handle things like {{\n<!---->\n name}}
  1206. (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
  1207. (r'\s+', Whitespace),
  1208. # Parser functions
  1209. (
  1210. r'(?i)(\#[%s]*?|%s)(:)' % (title_char,
  1211. '|'.join(parser_functions_i)),
  1212. bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
  1213. ),
  1214. (
  1215. r'(%s)(:)' % ('|'.join(parser_functions)),
  1216. bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
  1217. ),
  1218. # Templates
  1219. (
  1220. r'(?i)([%s]*?)(:)' % title_char,
  1221. bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name')
  1222. ),
  1223. default(('#pop', 'template-name'),),
  1224. ],
  1225. 'template-name': [
  1226. (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')),
  1227. (r'\}\}', Punctuation, '#pop'),
  1228. (r'\n', Text, '#pop'),
  1229. include('replaceable'),
  1230. *text_rules(Name.Tag),
  1231. ],
  1232. 'template-inner': [
  1233. (r'\}\}', Punctuation, '#pop'),
  1234. (r'\|', Punctuation),
  1235. (
  1236. r"""(?x)
  1237. (?<=\|)
  1238. ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags
  1239. (=)
  1240. """,
  1241. bygroups(Name.Label, Operator)
  1242. ),
  1243. include('inline'),
  1244. include('text'),
  1245. ],
  1246. 'table': [
  1247. # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior
  1248. # Endings
  1249. (r'^([ \t\n\r\0\x0B]*?)(\|\})',
  1250. bygroups(Whitespace, Punctuation), '#pop'),
  1251. # Table rows
  1252. (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation,
  1253. using(this, state=['root', 'attr']))),
  1254. # Captions
  1255. (
  1256. r"""(?x)
  1257. ^([ \t\n\r\0\x0B]*?)(\|\+)
  1258. # Exclude links, template and tags
  1259. (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )?
  1260. (.*?)$
  1261. """,
  1262. bygroups(Whitespace, Punctuation, using(this, state=[
  1263. 'root', 'attr']), Punctuation, Generic.Heading),
  1264. ),
  1265. # Table data
  1266. (
  1267. r"""(?x)
  1268. ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| )
  1269. (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
  1270. """,
  1271. bygroups(Punctuation, using(this, state=[
  1272. 'root', 'attr']), Punctuation),
  1273. ),
  1274. # Table headers
  1275. (
  1276. r"""(?x)
  1277. ( ^(?:[ \t\n\r\0\x0B]*?)! )
  1278. (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
  1279. """,
  1280. bygroups(Punctuation, using(this, state=[
  1281. 'root', 'attr']), Punctuation),
  1282. 'table-header',
  1283. ),
  1284. include('list'),
  1285. include('inline'),
  1286. include('text'),
  1287. ],
  1288. 'table-header': [
  1289. # Requires another state for || handling inside headers
  1290. (r'\n', Text, '#pop'),
  1291. (
  1292. r"""(?x)
  1293. (!!|\|\|)
  1294. (?:
  1295. ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )
  1296. (\|)(?!\|)
  1297. )?
  1298. """,
  1299. bygroups(Punctuation, using(this, state=[
  1300. 'root', 'attr']), Punctuation)
  1301. ),
  1302. *text_rules(Generic.Subheading),
  1303. ],
  1304. 'entity': [
  1305. (r'&\S*?;', Name.Entity),
  1306. ],
  1307. 'dt': [
  1308. (r'\n', Text, '#pop'),
  1309. include('inline'),
  1310. (r':', Keyword, '#pop'),
  1311. include('text'),
  1312. ],
  1313. 'extlink-inner': [
  1314. (r'\]', Punctuation, '#pop'),
  1315. include('inline'),
  1316. include('text'),
  1317. ],
  1318. 'nowiki-ish': [
  1319. include('entity'),
  1320. include('text'),
  1321. ],
  1322. 'attr': [
  1323. include('replaceable'),
  1324. (r'\s+', Whitespace),
  1325. (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'),
  1326. (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'),
  1327. (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'),
  1328. (r'[\w:-]+', Name.Attribute),
  1329. ],
  1330. 'attr-val-0': [
  1331. (r'\s', Whitespace, '#pop'),
  1332. include('replaceable'),
  1333. *text_rules(String),
  1334. ],
  1335. 'attr-val-1': [
  1336. (r"'", String.Single, '#pop'),
  1337. include('replaceable'),
  1338. *text_rules(String.Single),
  1339. ],
  1340. 'attr-val-2': [
  1341. (r'"', String.Double, '#pop'),
  1342. include('replaceable'),
  1343. *text_rules(String.Double),
  1344. ],
  1345. 'tag-inner-ordinary': [
  1346. (r'/?\s*>', Punctuation, '#pop'),
  1347. include('tag-attr'),
  1348. ],
  1349. 'tag-inner': [
  1350. # Return to root state for self-closing tags
  1351. (r'/\s*>', Punctuation, '#pop:2'),
  1352. (r'\s*>', Punctuation, '#pop'),
  1353. include('tag-attr'),
  1354. ],
  1355. # There states below are just like their non-tag variants, the key difference is
  1356. # they forcibly quit when encountering tag closing markup
  1357. 'tag-attr': [
  1358. include('replaceable'),
  1359. (r'\s+', Whitespace),
  1360. (r'(=)(\s*)(")', bygroups(Operator,
  1361. Whitespace, String.Double), 'tag-attr-val-2'),
  1362. (r"(=)(\s*)(')", bygroups(Operator,
  1363. Whitespace, String.Single), 'tag-attr-val-1'),
  1364. (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'),
  1365. (r'[\w:-]+', Name.Attribute),
  1366. ],
  1367. 'tag-attr-val-0': [
  1368. (r'\s', Whitespace, '#pop'),
  1369. (r'/?>', Punctuation, '#pop:2'),
  1370. include('replaceable'),
  1371. *text_rules(String),
  1372. ],
  1373. 'tag-attr-val-1': [
  1374. (r"'", String.Single, '#pop'),
  1375. (r'/?>', Punctuation, '#pop:2'),
  1376. include('replaceable'),
  1377. *text_rules(String.Single),
  1378. ],
  1379. 'tag-attr-val-2': [
  1380. (r'"', String.Double, '#pop'),
  1381. (r'/?>', Punctuation, '#pop:2'),
  1382. include('replaceable'),
  1383. *text_rules(String.Double),
  1384. ],
  1385. 'tag-nowiki': nowiki_tag_rules('nowiki'),
  1386. 'tag-pre': nowiki_tag_rules('pre'),
  1387. 'tag-categorytree': plaintext_tag_rules('categorytree'),
  1388. 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'),
  1389. 'tag-hiero': plaintext_tag_rules('hiero'),
  1390. 'tag-inputbox': plaintext_tag_rules('inputbox'),
  1391. 'tag-imagemap': plaintext_tag_rules('imagemap'),
  1392. 'tag-charinsert': plaintext_tag_rules('charinsert'),
  1393. 'tag-timeline': plaintext_tag_rules('timeline'),
  1394. 'tag-gallery': plaintext_tag_rules('gallery'),
  1395. 'tag-graph': plaintext_tag_rules('graph'),
  1396. 'tag-rss': plaintext_tag_rules('rss'),
  1397. 'tag-math': delegate_tag_rules('math', TexLexer),
  1398. 'tag-chem': delegate_tag_rules('chem', TexLexer),
  1399. 'tag-ce': delegate_tag_rules('ce', TexLexer),
  1400. 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer),
  1401. 'text-italic': text_rules(Generic.Emph),
  1402. 'text-bold': text_rules(Generic.Strong),
  1403. 'text-bold-italic': text_rules(Generic.EmphStrong),
  1404. 'text': text_rules(Text),
  1405. }