123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521 |
- """
- pygments.lexers.markup
- ~~~~~~~~~~~~~~~~~~~~~~
- Lexers for non-HTML markup languages.
- :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from pygments.lexers.html import XmlLexer
- from pygments.lexers.javascript import JavascriptLexer
- from pygments.lexers.css import CssLexer
- from pygments.lexers.lilypond import LilyPondLexer
- from pygments.lexers.data import JsonLexer
- from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
- using, this, do_insertions, default, words
- from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation, Generic, Other, Whitespace
- from pygments.util import get_bool_opt, ClassNotFound
- __all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
- 'MozPreprocHashLexer', 'MozPreprocPercentLexer',
- 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
- 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer', 'WikitextLexer']
- class BBCodeLexer(RegexLexer):
- """
- A lexer that highlights BBCode(-like) syntax.
- .. versionadded:: 0.6
- """
- name = 'BBCode'
- aliases = ['bbcode']
- mimetypes = ['text/x-bbcode']
- tokens = {
- 'root': [
- (r'[^[]+', Text),
- # tag/end tag begin
- (r'\[/?\w+', Keyword, 'tag'),
- # stray bracket
- (r'\[', Text),
- ],
- 'tag': [
- (r'\s+', Text),
- # attribute with value
- (r'(\w+)(=)("?[^\s"\]]+"?)',
- bygroups(Name.Attribute, Operator, String)),
- # tag argument (a la [color=green])
- (r'(=)("?[^\s"\]]+"?)',
- bygroups(Operator, String)),
- # tag end
- (r'\]', Keyword, '#pop'),
- ],
- }
- class MoinWikiLexer(RegexLexer):
- """
- For MoinMoin (and Trac) Wiki markup.
- .. versionadded:: 0.7
- """
- name = 'MoinMoin/Trac Wiki markup'
- aliases = ['trac-wiki', 'moin']
- filenames = []
- mimetypes = ['text/x-trac-wiki']
- flags = re.MULTILINE | re.IGNORECASE
- tokens = {
- 'root': [
- (r'^#.*$', Comment),
- (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
- # Titles
- (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
- bygroups(Generic.Heading, using(this), Generic.Heading, String)),
- # Literal code blocks, with optional shebang
- (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
- (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
- # Lists
- (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
- (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
- # Other Formatting
- (r'\[\[\w+.*?\]\]', Keyword), # Macro
- (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
- bygroups(Keyword, String, Keyword)), # Link
- (r'^----+$', Keyword), # Horizontal rules
- (r'[^\n\'\[{!_~^,|]+', Text),
- (r'\n', Text),
- (r'.', Text),
- ],
- 'codeblock': [
- (r'\}\}\}', Name.Builtin, '#pop'),
- # these blocks are allowed to be nested in Trac, but not MoinMoin
- (r'\{\{\{', Text, '#push'),
- (r'[^{}]+', Comment.Preproc), # slurp boring text
- (r'.', Comment.Preproc), # allow loose { or }
- ],
- }
- class RstLexer(RegexLexer):
- """
- For reStructuredText markup.
- .. versionadded:: 0.7
- Additional options accepted:
- `handlecodeblocks`
- Highlight the contents of ``.. sourcecode:: language``,
- ``.. code:: language`` and ``.. code-block:: language``
- directives with a lexer for the given language (default:
- ``True``).
- .. versionadded:: 0.8
- """
- name = 'reStructuredText'
- url = 'https://docutils.sourceforge.io/rst.html'
- aliases = ['restructuredtext', 'rst', 'rest']
- filenames = ['*.rst', '*.rest']
- mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
- flags = re.MULTILINE
- def _handle_sourcecode(self, match):
- from pygments.lexers import get_lexer_by_name
- # section header
- yield match.start(1), Punctuation, match.group(1)
- yield match.start(2), Text, match.group(2)
- yield match.start(3), Operator.Word, match.group(3)
- yield match.start(4), Punctuation, match.group(4)
- yield match.start(5), Text, match.group(5)
- yield match.start(6), Keyword, match.group(6)
- yield match.start(7), Text, match.group(7)
- # lookup lexer if wanted and existing
- lexer = None
- if self.handlecodeblocks:
- try:
- lexer = get_lexer_by_name(match.group(6).strip())
- except ClassNotFound:
- pass
- indention = match.group(8)
- indention_size = len(indention)
- code = (indention + match.group(9) + match.group(10) + match.group(11))
- # no lexer for this language. handle it like it was a code block
- if lexer is None:
- yield match.start(8), String, code
- return
- # highlight the lines with the lexer.
- ins = []
- codelines = code.splitlines(True)
- code = ''
- for line in codelines:
- if len(line) > indention_size:
- ins.append((len(code), [(0, Text, line[:indention_size])]))
- code += line[indention_size:]
- else:
- code += line
- yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))
- # from docutils.parsers.rst.states
- closers = '\'")]}>\u2019\u201d\xbb!?'
- unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'
- end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
- % (re.escape(unicode_delimiters),
- re.escape(closers)))
- tokens = {
- 'root': [
- # Heading with overline
- (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
- r'(.+)(\n)(\1)(\n)',
- bygroups(Generic.Heading, Text, Generic.Heading,
- Text, Generic.Heading, Text)),
- # Plain heading
- (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
- r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
- bygroups(Generic.Heading, Text, Generic.Heading, Text)),
- # Bulleted lists
- (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
- bygroups(Text, Number, using(this, state='inline'))),
- # Numbered lists
- (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
- bygroups(Text, Number, using(this, state='inline'))),
- (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
- bygroups(Text, Number, using(this, state='inline'))),
- # Numbered, but keep words at BOL from becoming lists
- (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
- bygroups(Text, Number, using(this, state='inline'))),
- (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
- bygroups(Text, Number, using(this, state='inline'))),
- # Line blocks
- (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
- bygroups(Text, Operator, using(this, state='inline'))),
- # Sourcecode directives
- (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
- r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',
- _handle_sourcecode),
- # A directive
- (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
- bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
- using(this, state='inline'))),
- # A reference target
- (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
- bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
- # A footnote/citation target
- (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
- bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
- # A substitution def
- (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
- bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
- Punctuation, Text, using(this, state='inline'))),
- # Comments
- (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
- # Field list marker
- (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
- bygroups(Text, Name.Class, Text)),
- # Definition list
- (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
- bygroups(using(this, state='inline'), using(this, state='inline'))),
- # Code blocks
- (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',
- bygroups(String.Escape, Text, String, String, Text, String)),
- include('inline'),
- ],
- 'inline': [
- (r'\\.', Text), # escape
- (r'``', String, 'literal'), # code
- (r'(`.+?)(<.+?>)(`__?)', # reference with inline target
- bygroups(String, String.Interpol, String)),
- (r'`.+?`__?', String), # reference
- (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
- bygroups(Name.Variable, Name.Attribute)), # role
- (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
- bygroups(Name.Attribute, Name.Variable)), # role (content first)
- (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
- (r'\*.+?\*', Generic.Emph), # Emphasis
- (r'\[.*?\]_', String), # Footnote or citation
- (r'<.+?>', Name.Tag), # Hyperlink
- (r'[^\\\n\[*`:]+', Text),
- (r'.', Text),
- ],
- 'literal': [
- (r'[^`]+', String),
- (r'``' + end_string_suffix, String, '#pop'),
- (r'`', String),
- ]
- }
- def __init__(self, **options):
- self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
- RegexLexer.__init__(self, **options)
- def analyse_text(text):
- if text[:2] == '..' and text[2:3] != '.':
- return 0.3
- p1 = text.find("\n")
- p2 = text.find("\n", p1 + 1)
- if (p2 > -1 and # has two lines
- p1 * 2 + 1 == p2 and # they are the same length
- text[p1+1] in '-=' and # the next line both starts and ends with
- text[p1+1] == text[p2-1]): # ...a sufficiently high header
- return 0.5
- class TexLexer(RegexLexer):
- """
- Lexer for the TeX and LaTeX typesetting languages.
- """
- name = 'TeX'
- aliases = ['tex', 'latex']
- filenames = ['*.tex', '*.aux', '*.toc']
- mimetypes = ['text/x-tex', 'text/x-latex']
- tokens = {
- 'general': [
- (r'%.*?\n', Comment),
- (r'[{}]', Name.Builtin),
- (r'[&_^]', Name.Builtin),
- ],
- 'root': [
- (r'\\\[', String.Backtick, 'displaymath'),
- (r'\\\(', String, 'inlinemath'),
- (r'\$\$', String.Backtick, 'displaymath'),
- (r'\$', String, 'inlinemath'),
- (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
- (r'\\$', Keyword),
- include('general'),
- (r'[^\\$%&_^{}]+', Text),
- ],
- 'math': [
- (r'\\([a-zA-Z]+|.)', Name.Variable),
- include('general'),
- (r'[0-9]+', Number),
- (r'[-=!+*/()\[\]]', Operator),
- (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
- ],
- 'inlinemath': [
- (r'\\\)', String, '#pop'),
- (r'\$', String, '#pop'),
- include('math'),
- ],
- 'displaymath': [
- (r'\\\]', String, '#pop'),
- (r'\$\$', String, '#pop'),
- (r'\$', Name.Builtin),
- include('math'),
- ],
- 'command': [
- (r'\[.*?\]', Name.Attribute),
- (r'\*', Keyword),
- default('#pop'),
- ],
- }
- def analyse_text(text):
- for start in ("\\documentclass", "\\input", "\\documentstyle",
- "\\relax"):
- if text[:len(start)] == start:
- return True
- class GroffLexer(RegexLexer):
- """
- Lexer for the (g)roff typesetting language, supporting groff
- extensions. Mainly useful for highlighting manpage sources.
- .. versionadded:: 0.6
- """
- name = 'Groff'
- aliases = ['groff', 'nroff', 'man']
- filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm']
- mimetypes = ['application/x-troff', 'text/troff']
- tokens = {
- 'root': [
- (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
- (r'\.', Punctuation, 'request'),
- # Regular characters, slurp till we find a backslash or newline
- (r'[^\\\n]+', Text, 'textline'),
- default('textline'),
- ],
- 'textline': [
- include('escapes'),
- (r'[^\\\n]+', Text),
- (r'\n', Text, '#pop'),
- ],
- 'escapes': [
- # groff has many ways to write escapes.
- (r'\\"[^\n]*', Comment),
- (r'\\[fn]\w', String.Escape),
- (r'\\\(.{2}', String.Escape),
- (r'\\.\[.*\]', String.Escape),
- (r'\\.', String.Escape),
- (r'\\\n', Text, 'request'),
- ],
- 'request': [
- (r'\n', Text, '#pop'),
- include('escapes'),
- (r'"[^\n"]+"', String.Double),
- (r'\d+', Number),
- (r'\S+', String),
- (r'\s+', Text),
- ],
- }
- def analyse_text(text):
- if text[:1] != '.':
- return False
- if text[:3] == '.\\"':
- return True
- if text[:4] == '.TH ':
- return True
- if text[1:3].isalnum() and text[3].isspace():
- return 0.9
- class MozPreprocHashLexer(RegexLexer):
- """
- Lexer for Mozilla Preprocessor files (with '#' as the marker).
- Other data is left untouched.
- .. versionadded:: 2.0
- """
- name = 'mozhashpreproc'
- aliases = [name]
- filenames = []
- mimetypes = []
- tokens = {
- 'root': [
- (r'^#', Comment.Preproc, ('expr', 'exprstart')),
- (r'.+', Other),
- ],
- 'exprstart': [
- (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
- (words((
- 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
- 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
- 'include', 'includesubst', 'error')),
- Comment.Preproc, '#pop'),
- ],
- 'expr': [
- (words(('!', '!=', '==', '&&', '||')), Operator),
- (r'(defined)(\()', bygroups(Keyword, Punctuation)),
- (r'\)', Punctuation),
- (r'[0-9]+', Number.Decimal),
- (r'__\w+?__', Name.Variable),
- (r'@\w+?@', Name.Class),
- (r'\w+', Name),
- (r'\n', Text, '#pop'),
- (r'\s+', Text),
- (r'\S', Punctuation),
- ],
- }
- class MozPreprocPercentLexer(MozPreprocHashLexer):
- """
- Lexer for Mozilla Preprocessor files (with '%' as the marker).
- Other data is left untouched.
- .. versionadded:: 2.0
- """
- name = 'mozpercentpreproc'
- aliases = [name]
- filenames = []
- mimetypes = []
- tokens = {
- 'root': [
- (r'^%', Comment.Preproc, ('expr', 'exprstart')),
- (r'.+', Other),
- ],
- }
- class MozPreprocXulLexer(DelegatingLexer):
- """
- Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
- `XmlLexer`.
- .. versionadded:: 2.0
- """
- name = "XUL+mozpreproc"
- aliases = ['xul+mozpreproc']
- filenames = ['*.xul.in']
- mimetypes = []
- def __init__(self, **options):
- super().__init__(XmlLexer, MozPreprocHashLexer, **options)
- class MozPreprocJavascriptLexer(DelegatingLexer):
- """
- Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
- `JavascriptLexer`.
- .. versionadded:: 2.0
- """
- name = "Javascript+mozpreproc"
- aliases = ['javascript+mozpreproc']
- filenames = ['*.js.in']
- mimetypes = []
- def __init__(self, **options):
- super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)
- class MozPreprocCssLexer(DelegatingLexer):
- """
- Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
- `CssLexer`.
- .. versionadded:: 2.0
- """
- name = "CSS+mozpreproc"
- aliases = ['css+mozpreproc']
- filenames = ['*.css.in']
- mimetypes = []
- def __init__(self, **options):
- super().__init__(CssLexer, MozPreprocPercentLexer, **options)
- class MarkdownLexer(RegexLexer):
- """
- For Markdown markup.
- .. versionadded:: 2.2
- """
- name = 'Markdown'
- url = 'https://daringfireball.net/projects/markdown/'
- aliases = ['markdown', 'md']
- filenames = ['*.md', '*.markdown']
- mimetypes = ["text/x-markdown"]
- flags = re.MULTILINE
- def _handle_codeblock(self, match):
- from pygments.lexers import get_lexer_by_name
- yield match.start('initial'), String.Backtick, match.group('initial')
- yield match.start('lang'), String.Backtick, match.group('lang')
- if match.group('afterlang') is not None:
- yield match.start('whitespace'), Whitespace, match.group('whitespace')
- yield match.start('extra'), Text, match.group('extra')
- yield match.start('newline'), Whitespace, match.group('newline')
- # lookup lexer if wanted and existing
- lexer = None
- if self.handlecodeblocks:
- try:
- lexer = get_lexer_by_name(match.group('lang').strip())
- except ClassNotFound:
- pass
- code = match.group('code')
- # no lexer for this language. handle it like it was a code block
- if lexer is None:
- yield match.start('code'), String, code
- else:
- # FIXME: aren't the offsets wrong?
- yield from do_insertions([], lexer.get_tokens_unprocessed(code))
- yield match.start('terminator'), String.Backtick, match.group('terminator')
- tokens = {
- 'root': [
- # heading with '#' prefix (atx-style)
- (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),
- # subheading with '#' prefix (atx-style)
- (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),
- # heading with '=' underlines (Setext-style)
- (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),
- # subheading with '-' underlines (Setext-style)
- (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),
- # task list
- (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
- bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))),
- # bulleted list
- (r'^(\s*)([*-])(\s)(.+\n)',
- bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))),
- # numbered list
- (r'^(\s*)([0-9]+\.)( .+\n)',
- bygroups(Whitespace, Keyword, using(this, state='inline'))),
- # quote
- (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
- # code block fenced by 3 backticks
- (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick),
- # code block with language
- # Some tools include extra stuff after the language name, just
- # highlight that as text. For example: https://docs.enola.dev/use/execmd
- (r'''(?x)
- ^(?P<initial>\s*```)
- (?P<lang>[\w\-]+)
- (?P<afterlang>
- (?P<whitespace>[^\S\n]+)
- (?P<extra>.*))?
- (?P<newline>\n)
- (?P<code>(.|\n)*?)
- (?P<terminator>^\s*```$\n)
- ''',
- _handle_codeblock),
- include('inline'),
- ],
- 'inline': [
- # escape
- (r'\\.', Text),
- # inline code
- (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)),
- # warning: the following rules eat outer tags.
- # eg. **foo _bar_ baz** => foo and baz are not recognized as bold
- # bold fenced by '**'
- (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)),
- # bold fenced by '__'
- (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)),
- # italics fenced by '*'
- (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)),
- # italics fenced by '_'
- (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)),
- # strikethrough
- (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)),
- # mentions and topics (twitter and github stuff)
- (r'[@#][\w/:]+', Name.Entity),
- # (image?) links eg: 
- (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
- bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
- # reference-style links, e.g.:
- # [an example][id]
- # [id]: http://example.com/
- (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
- bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
- (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
- bygroups(Text, Name.Label, Text, Name.Attribute)),
- # general text, must come last!
- (r'[^\\\s]+', Text),
- (r'.', Text),
- ],
- }
- def __init__(self, **options):
- self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
- RegexLexer.__init__(self, **options)
- class TiddlyWiki5Lexer(RegexLexer):
- """
- For TiddlyWiki5 markup.
- .. versionadded:: 2.7
- """
- name = 'tiddler'
- url = 'https://tiddlywiki.com/#TiddlerFiles'
- aliases = ['tid']
- filenames = ['*.tid']
- mimetypes = ["text/vnd.tiddlywiki"]
- flags = re.MULTILINE
- def _handle_codeblock(self, match):
- """
- match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
- """
- from pygments.lexers import get_lexer_by_name
- # section header
- yield match.start(1), String, match.group(1)
- yield match.start(2), String, match.group(2)
- yield match.start(3), Text, match.group(3)
- # lookup lexer if wanted and existing
- lexer = None
- if self.handlecodeblocks:
- try:
- lexer = get_lexer_by_name(match.group(2).strip())
- except ClassNotFound:
- pass
- code = match.group(4)
- # no lexer for this language. handle it like it was a code block
- if lexer is None:
- yield match.start(4), String, code
- return
- yield from do_insertions([], lexer.get_tokens_unprocessed(code))
- yield match.start(5), String, match.group(5)
- def _handle_cssblock(self, match):
- """
- match args: 1:style tag 2:newline, 3:code, 4:closing style tag
- """
- from pygments.lexers import get_lexer_by_name
- # section header
- yield match.start(1), String, match.group(1)
- yield match.start(2), String, match.group(2)
- lexer = None
- if self.handlecodeblocks:
- try:
- lexer = get_lexer_by_name('css')
- except ClassNotFound:
- pass
- code = match.group(3)
- # no lexer for this language. handle it like it was a code block
- if lexer is None:
- yield match.start(3), String, code
- return
- yield from do_insertions([], lexer.get_tokens_unprocessed(code))
- yield match.start(4), String, match.group(4)
- tokens = {
- 'root': [
- # title in metadata section
- (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),
- # headings
- (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),
- (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
- # bulleted or numbered lists or single-line block quotes
- # (can be mixed)
- (r'^(\s*)([*#>]+)(\s*)(.+\n)',
- bygroups(Text, Keyword, Text, using(this, state='inline'))),
- # multi-line block quotes
- (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),
- # table header
- (r'^(\|.*?\|h)$', bygroups(Generic.Strong)),
- # table footer or caption
- (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),
- # table class
- (r'^(\|.*?\|k)$', bygroups(Name.Tag)),
- # definitions
- (r'^(;.*)$', bygroups(Generic.Strong)),
- # text block
- (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
- # code block with language
- (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
- # CSS style block
- (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),
- include('keywords'),
- include('inline'),
- ],
- 'keywords': [
- (words((
- '\\define', '\\end', 'caption', 'created', 'modified', 'tags',
- 'title', 'type'), prefix=r'^', suffix=r'\b'),
- Keyword),
- ],
- 'inline': [
- # escape
- (r'\\.', Text),
- # created or modified date
- (r'\d{17}', Number.Integer),
- # italics
- (r'(\s)(//[^/]+//)((?=\W|\n))',
- bygroups(Text, Generic.Emph, Text)),
- # superscript
- (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),
- # subscript
- (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),
- # underscore
- (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),
- # bold
- (r"(\s)(''[^']+'')((?=\W|\n))",
- bygroups(Text, Generic.Strong, Text)),
- # strikethrough
- (r'(\s)(~~[^~]+~~)((?=\W|\n))',
- bygroups(Text, Generic.Deleted, Text)),
- # TiddlyWiki variables
- (r'<<[^>]+>>', Name.Tag),
- (r'\$\$[^$]+\$\$', Name.Tag),
- (r'\$\([^)]+\)\$', Name.Tag),
- # TiddlyWiki style or class
- (r'^@@.*$', Name.Tag),
- # HTML tags
- (r'</?[^>]+>', Name.Tag),
- # inline code
- (r'`[^`]+`', String.Backtick),
- # HTML escaped symbols
- (r'&\S*?;', String.Regex),
- # Wiki links
- (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),
- # External links
- (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',
- bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),
- # Transclusion
- (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),
- # URLs
- (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),
- # general text, must come last!
- (r'[\w]+', Text),
- (r'.', Text)
- ],
- }
- def __init__(self, **options):
- self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
- RegexLexer.__init__(self, **options)
- class WikitextLexer(RegexLexer):
- """
- For MediaWiki Wikitext.
- Parsing Wikitext is tricky, and results vary between different MediaWiki
- installations, so we only highlight common syntaxes (built-in or from
- popular extensions), and also assume templates produce no unbalanced
- syntaxes.
- .. versionadded:: 2.15
- """
- name = 'Wikitext'
- url = 'https://www.mediawiki.org/wiki/Wikitext'
- aliases = ['wikitext', 'mediawiki']
- filenames = []
- mimetypes = ['text/x-wiki']
- flags = re.MULTILINE
- def nowiki_tag_rules(tag_name):
- return [
- (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation,
- Name.Tag, Whitespace, Punctuation), '#pop'),
- include('entity'),
- include('text'),
- ]
- def plaintext_tag_rules(tag_name):
- return [
- (r'(?si)(.*?)(</)({})(\s*)(>)'.format(tag_name), bygroups(Text,
- Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'),
- ]
- def delegate_tag_rules(tag_name, lexer):
- return [
- (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation,
- Name.Tag, Whitespace, Punctuation), '#pop'),
- (r'(?si).+?(?=</{}\s*>)'.format(tag_name), using(lexer)),
- ]
- def text_rules(token):
- return [
- (r'\w+', token),
- (r'[^\S\n]+', token),
- (r'(?s).', token),
- ]
- def handle_syntaxhighlight(self, match, ctx):
- from pygments.lexers import get_lexer_by_name
- attr_content = match.group()
- start = 0
- index = 0
- while True:
- index = attr_content.find('>', start)
- # Exclude comment end (-->)
- if attr_content[index-2:index] != '--':
- break
- start = index + 1
- if index == -1:
- # No tag end
- yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
- return
- attr = attr_content[:index]
- yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
- yield match.start(3) + index, Punctuation, '>'
- lexer = None
- content = attr_content[index+1:]
- lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
- if len(lang_match) >= 1:
- # Pick the last match in case of multiple matches
- lang = lang_match[-1][1]
- try:
- lexer = get_lexer_by_name(lang)
- except ClassNotFound:
- pass
- if lexer is None:
- yield match.start() + index + 1, Text, content
- else:
- yield from lexer.get_tokens_unprocessed(content)
- def handle_score(self, match, ctx):
- attr_content = match.group()
- start = 0
- index = 0
- while True:
- index = attr_content.find('>', start)
- # Exclude comment end (-->)
- if attr_content[index-2:index] != '--':
- break
- start = index + 1
- if index == -1:
- # No tag end
- yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
- return
- attr = attr_content[:index]
- content = attr_content[index+1:]
- yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
- yield match.start(3) + index, Punctuation, '>'
- lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
- # Pick the last match in case of multiple matches
- lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond'
- if lang == 'lilypond': # Case sensitive
- yield from LilyPondLexer().get_tokens_unprocessed(content)
- else: # ABC
- # FIXME: Use ABC lexer in the future
- yield match.start() + index + 1, Text, content
- # a-z removed to prevent linter from complaining, REMEMBER to use (?i)
- title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF'
- nbsp_char = r'(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])'
- link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])'
- link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]'
- double_slashes_i = {
- '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__',
- '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__',
- }
- double_slashes = {
- '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__',
- '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__',
- '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__',
- }
- protocols = {
- 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://',
- 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://',
- 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:',
- 'worldwind://', 'xmpp:', '//',
- }
- non_relative_protocols = protocols - {'//'}
- html_tags = {
- 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code',
- 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5',
- 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp',
- 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
- 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr',
- }
- parser_tags = {
- 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math',
- 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages',
- 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar',
- 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery',
- 'maplink', 'ce', 'references',
- }
- variant_langs = {
- # ZhConverter.php
- 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw',
- # UnConverter.php
- 'uz', 'uz-latn', 'uz-cyrl',
- # TlyConverter.php
- 'tly', 'tly-cyrl',
- # TgConverter.php
- 'tg', 'tg-latn',
- # SrConverter.php
- 'sr', 'sr-ec', 'sr-el',
- # ShiConverter.php
- 'shi', 'shi-tfng', 'shi-latn',
- # ShConverter.php
- 'sh-latn', 'sh-cyrl',
- # KuConverter.php
- 'ku', 'ku-arab', 'ku-latn',
- # KkConverter.php
- 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn',
- # IuConverter.php
- 'iu', 'ike-cans', 'ike-latn',
- # GanConverter.php
- 'gan', 'gan-hans', 'gan-hant',
- # EnConverter.php
- 'en', 'en-x-piglatin',
- # CrhConverter.php
- 'crh', 'crh-cyrl', 'crh-latn',
- # BanConverter.php
- 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku',
- }
- magic_vars_i = {
- 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH',
- }
- magic_vars = {
- '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE',
- 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR',
- 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME',
- 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK',
- 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY',
- 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2',
- 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME',
- 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER',
- 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS',
- 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE',
- 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1',
- 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
- 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
- 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
- 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
- }
- parser_functions_i = {
- 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM',
- 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL',
- 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST',
- 'URLENCODE',
- }
- parser_functions = {
- 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY',
- 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE',
- 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS',
- 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS',
- 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY',
- 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH',
- 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
- 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
- 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
- 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
- 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS',
- }
- tokens = {
- 'root': [
- # Redirects
- (r"""(?xi)
- (\A\s*?)(\#REDIRECT:?) # may contain a colon
- (\s+)(\[\[) (?=[^\]\n]* \]\]$)
- """,
- bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'),
- # Subheadings
- (r'^(={2,6})(.+?)(\1)(\s*$\n)',
- bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)),
- # Headings
- (r'^(=.+?=)(\s*$\n)',
- bygroups(Generic.Heading, Whitespace)),
- # Double-slashed magic words
- (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic),
- (words(double_slashes), Name.Function.Magic),
- # Raw URLs
- (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols),
- link_address, link_char_class), Name.Label),
- # Magic links
- (r'\b(?:RFC|PMID){}+[0-9]+\b'.format(nbsp_char),
- Name.Function.Magic),
- (r"""(?x)
- \bISBN {nbsp_char}
- (?: 97[89] {nbsp_dash}? )?
- (?: [0-9] {nbsp_dash}? ){{9}} # escape format()
- [0-9Xx]\b
- """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic),
- include('list'),
- include('inline'),
- include('text'),
- ],
- 'redirect-inner': [
- (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'),
- (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)),
- (r'(?i)[{}]+'.format(title_char), Name.Tag),
- ],
- 'list': [
- # Description lists
- (r'^;', Keyword, 'dt'),
- # Ordered lists, unordered lists and indents
- (r'^[#:*]+', Keyword),
- # Horizontal rules
- (r'^-{4,}', Keyword),
- ],
- 'inline': [
- # Signatures
- (r'~{3,5}', Keyword),
- # Entities
- include('entity'),
- # Bold & italic
- (r"('')(''')(?!')", bygroups(Generic.Emph,
- Generic.EmphStrong), 'inline-italic-bold'),
- (r"'''(?!')", Generic.Strong, 'inline-bold'),
- (r"''(?!')", Generic.Emph, 'inline-italic'),
- # Comments & parameters & templates
- include('replaceable'),
- # Media links
- (
- r"""(?xi)
- (\[\[)
- (File|Image) (:)
- ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*)
- (?: (\#) ([%s]*?) )?
- """ % (title_char, f'{title_char}#'),
- bygroups(Punctuation, Name.Namespace, Punctuation,
- using(this, state=['wikilink-name']), Punctuation, Name.Label),
- 'medialink-inner'
- ),
- # Wikilinks
- (
- r"""(?xi)
- (\[\[)(?!%s) # Should not contain URLs
- (?: ([%s]*) (:))?
- ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*?)
- (?: (\#) ([%s]*?) )?
- (\]\])
- """ % ('|'.join(protocols), title_char.replace('/', ''),
- title_char, f'{title_char}#'),
- bygroups(Punctuation, Name.Namespace, Punctuation,
- using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation)
- ),
- (
- r"""(?xi)
- (\[\[)(?!%s)
- (?: ([%s]*) (:))?
- ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*?)
- (?: (\#) ([%s]*?) )?
- (\|)
- """ % ('|'.join(protocols), title_char.replace('/', ''),
- title_char, f'{title_char}#'),
- bygroups(Punctuation, Name.Namespace, Punctuation,
- using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation),
- 'wikilink-inner'
- ),
- # External links
- (
- r"""(?xi)
- (\[)
- ((?:{}) {} {}*)
- (\s*)
- """.format('|'.join(protocols), link_address, link_char_class),
- bygroups(Punctuation, Name.Label, Whitespace),
- 'extlink-inner'
- ),
- # Tables
- (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword,
- Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'),
- # HTML tags
- (r'(?i)(<)({})\b'.format('|'.join(html_tags)),
- bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
- (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)),
- bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
- # <nowiki>
- (r'(?i)(<)(nowiki)\b', bygroups(Punctuation,
- Name.Tag), ('tag-nowiki', 'tag-inner')),
- # <pre>
- (r'(?i)(<)(pre)\b', bygroups(Punctuation,
- Name.Tag), ('tag-pre', 'tag-inner')),
- # <categorytree>
- (r'(?i)(<)(categorytree)\b', bygroups(
- Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')),
- # <hiero>
- (r'(?i)(<)(hiero)\b', bygroups(Punctuation,
- Name.Tag), ('tag-hiero', 'tag-inner')),
- # <math>
- (r'(?i)(<)(math)\b', bygroups(Punctuation,
- Name.Tag), ('tag-math', 'tag-inner')),
- # <chem>
- (r'(?i)(<)(chem)\b', bygroups(Punctuation,
- Name.Tag), ('tag-chem', 'tag-inner')),
- # <ce>
- (r'(?i)(<)(ce)\b', bygroups(Punctuation,
- Name.Tag), ('tag-ce', 'tag-inner')),
- # <charinsert>
- (r'(?i)(<)(charinsert)\b', bygroups(
- Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')),
- # <templatedata>
- (r'(?i)(<)(templatedata)\b', bygroups(
- Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')),
- # <gallery>
- (r'(?i)(<)(gallery)\b', bygroups(
- Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')),
- # <graph>
- (r'(?i)(<)(gallery)\b', bygroups(
- Punctuation, Name.Tag), ('tag-graph', 'tag-inner')),
- # <dynamicpagelist>
- (r'(?i)(<)(dynamicpagelist)\b', bygroups(
- Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')),
- # <inputbox>
- (r'(?i)(<)(inputbox)\b', bygroups(
- Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')),
- # <rss>
- (r'(?i)(<)(rss)\b', bygroups(
- Punctuation, Name.Tag), ('tag-rss', 'tag-inner')),
- # <imagemap>
- (r'(?i)(<)(imagemap)\b', bygroups(
- Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')),
- # <syntaxhighlight>
- (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)',
- bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
- (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
- bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
- # <syntaxhighlight>: Fallback case for self-closing tags
- (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
- Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
- # <source>
- (r'(?i)(</)(source)\b(\s*)(>)',
- bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
- (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
- bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
- # <source>: Fallback case for self-closing tags
- (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
- Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
- # <score>
- (r'(?i)(</)(score)\b(\s*)(>)',
- bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
- (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
- bygroups(Punctuation, Name.Tag, handle_score)),
- # <score>: Fallback case for self-closing tags
- (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
- Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
- # Other parser tags
- (r'(?i)(<)({})\b'.format('|'.join(parser_tags)),
- bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
- (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)),
- bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
- # LanguageConverter markups
- (
- r"""(?xi)
- (-\{{) # Escape format()
- (?: ([^|]) (\|))?
- (?: (\s* (?:{variants}) \s*) (=>))?
- (\s* (?:{variants}) \s*) (:)
- """.format(variants='|'.join(variant_langs)),
- bygroups(Punctuation, Keyword, Punctuation,
- Name.Label, Operator, Name.Label, Punctuation),
- 'lc-inner'
- ),
- (r'-\{(?!\{)', Punctuation, 'lc-raw'),
- ],
- 'wikilink-name': [
- include('replaceable'),
- (r'[^{<]+', Name.Tag),
- (r'(?s).', Name.Tag),
- ],
- 'wikilink-inner': [
- # Quit in case of another wikilink
- (r'(?=\[\[)', Punctuation, '#pop'),
- (r'\]\]', Punctuation, '#pop'),
- include('inline'),
- include('text'),
- ],
- 'medialink-inner': [
- (r'\]\]', Punctuation, '#pop'),
- (r'(\|)([^\n=|]*)(=)',
- bygroups(Punctuation, Name.Attribute, Operator)),
- (r'\|', Punctuation),
- include('inline'),
- include('text'),
- ],
- 'quote-common': [
- # Quit in case of link/template endings
- (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'),
- (r'\n', Text, '#pop'),
- ],
- 'inline-italic': [
- include('quote-common'),
- (r"('')(''')(?!')", bygroups(Generic.Emph,
- Generic.Strong), ('#pop', 'inline-bold')),
- (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')),
- (r"''(?!')", Generic.Emph, '#pop'),
- include('inline'),
- include('text-italic'),
- ],
- 'inline-bold': [
- include('quote-common'),
- (r"(''')('')(?!')", bygroups(
- Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')),
- (r"'''(?!')", Generic.Strong, '#pop'),
- (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')),
- include('inline'),
- include('text-bold'),
- ],
- 'inline-bold-italic': [
- include('quote-common'),
- (r"('')(''')(?!')", bygroups(Generic.EmphStrong,
- Generic.Strong), '#pop'),
- (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),
- (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),
- include('inline'),
- include('text-bold-italic'),
- ],
- 'inline-italic-bold': [
- include('quote-common'),
- (r"(''')('')(?!')", bygroups(
- Generic.EmphStrong, Generic.Emph), '#pop'),
- (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),
- (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),
- include('inline'),
- include('text-bold-italic'),
- ],
- 'lc-inner': [
- (
- r"""(?xi)
- (;)
- (?: (\s* (?:{variants}) \s*) (=>))?
- (\s* (?:{variants}) \s*) (:)
- """.format(variants='|'.join(variant_langs)),
- bygroups(Punctuation, Name.Label,
- Operator, Name.Label, Punctuation)
- ),
- (r';?\s*?\}-', Punctuation, '#pop'),
- include('inline'),
- include('text'),
- ],
- 'lc-raw': [
- (r'\}-', Punctuation, '#pop'),
- include('inline'),
- include('text'),
- ],
- 'replaceable': [
- # Comments
- (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
- # Parameters
- (
- r"""(?x)
- (\{{3})
- ([^|]*?)
- (?=\}{3}|\|)
- """,
- bygroups(Punctuation, Name.Variable),
- 'parameter-inner',
- ),
- # Magic variables
- (r'(?i)(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars_i),
- bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
- (r'(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars),
- bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
- # Parser functions & templates
- (r'\{\{', Punctuation, 'template-begin-space'),
- # <tvar> legacy syntax
- (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation,
- Name.Tag, Punctuation, String, Punctuation)),
- (r'</>', Punctuation, '#pop'),
- # <tvar>
- (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
- (r'(?i)(</)(tvar)\b(\s*)(>)',
- bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
- ],
- 'parameter-inner': [
- (r'\}{3}', Punctuation, '#pop'),
- (r'\|', Punctuation),
- include('inline'),
- include('text'),
- ],
- 'template-begin-space': [
- # Templates allow line breaks at the beginning, and due to how MediaWiki handles
- # comments, an extra state is required to handle things like {{\n<!---->\n name}}
- (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
- (r'\s+', Whitespace),
- # Parser functions
- (
- r'(?i)(\#[%s]*?|%s)(:)' % (title_char,
- '|'.join(parser_functions_i)),
- bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
- ),
- (
- r'(%s)(:)' % ('|'.join(parser_functions)),
- bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
- ),
- # Templates
- (
- r'(?i)([%s]*?)(:)' % title_char,
- bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name')
- ),
- default(('#pop', 'template-name'),),
- ],
- 'template-name': [
- (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')),
- (r'\}\}', Punctuation, '#pop'),
- (r'\n', Text, '#pop'),
- include('replaceable'),
- *text_rules(Name.Tag),
- ],
- 'template-inner': [
- (r'\}\}', Punctuation, '#pop'),
- (r'\|', Punctuation),
- (
- r"""(?x)
- (?<=\|)
- ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags
- (=)
- """,
- bygroups(Name.Label, Operator)
- ),
- include('inline'),
- include('text'),
- ],
- 'table': [
- # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior
- # Endings
- (r'^([ \t\n\r\0\x0B]*?)(\|\})',
- bygroups(Whitespace, Punctuation), '#pop'),
- # Table rows
- (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation,
- using(this, state=['root', 'attr']))),
- # Captions
- (
- r"""(?x)
- ^([ \t\n\r\0\x0B]*?)(\|\+)
- # Exclude links, template and tags
- (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )?
- (.*?)$
- """,
- bygroups(Whitespace, Punctuation, using(this, state=[
- 'root', 'attr']), Punctuation, Generic.Heading),
- ),
- # Table data
- (
- r"""(?x)
- ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| )
- (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
- """,
- bygroups(Punctuation, using(this, state=[
- 'root', 'attr']), Punctuation),
- ),
- # Table headers
- (
- r"""(?x)
- ( ^(?:[ \t\n\r\0\x0B]*?)! )
- (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
- """,
- bygroups(Punctuation, using(this, state=[
- 'root', 'attr']), Punctuation),
- 'table-header',
- ),
- include('list'),
- include('inline'),
- include('text'),
- ],
- 'table-header': [
- # Requires another state for || handling inside headers
- (r'\n', Text, '#pop'),
- (
- r"""(?x)
- (!!|\|\|)
- (?:
- ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )
- (\|)(?!\|)
- )?
- """,
- bygroups(Punctuation, using(this, state=[
- 'root', 'attr']), Punctuation)
- ),
- *text_rules(Generic.Subheading),
- ],
- 'entity': [
- (r'&\S*?;', Name.Entity),
- ],
- 'dt': [
- (r'\n', Text, '#pop'),
- include('inline'),
- (r':', Keyword, '#pop'),
- include('text'),
- ],
- 'extlink-inner': [
- (r'\]', Punctuation, '#pop'),
- include('inline'),
- include('text'),
- ],
- 'nowiki-ish': [
- include('entity'),
- include('text'),
- ],
- 'attr': [
- include('replaceable'),
- (r'\s+', Whitespace),
- (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'),
- (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'),
- (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'),
- (r'[\w:-]+', Name.Attribute),
- ],
- 'attr-val-0': [
- (r'\s', Whitespace, '#pop'),
- include('replaceable'),
- *text_rules(String),
- ],
- 'attr-val-1': [
- (r"'", String.Single, '#pop'),
- include('replaceable'),
- *text_rules(String.Single),
- ],
- 'attr-val-2': [
- (r'"', String.Double, '#pop'),
- include('replaceable'),
- *text_rules(String.Double),
- ],
- 'tag-inner-ordinary': [
- (r'/?\s*>', Punctuation, '#pop'),
- include('tag-attr'),
- ],
- 'tag-inner': [
- # Return to root state for self-closing tags
- (r'/\s*>', Punctuation, '#pop:2'),
- (r'\s*>', Punctuation, '#pop'),
- include('tag-attr'),
- ],
- # There states below are just like their non-tag variants, the key difference is
- # they forcibly quit when encountering tag closing markup
- 'tag-attr': [
- include('replaceable'),
- (r'\s+', Whitespace),
- (r'(=)(\s*)(")', bygroups(Operator,
- Whitespace, String.Double), 'tag-attr-val-2'),
- (r"(=)(\s*)(')", bygroups(Operator,
- Whitespace, String.Single), 'tag-attr-val-1'),
- (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'),
- (r'[\w:-]+', Name.Attribute),
- ],
- 'tag-attr-val-0': [
- (r'\s', Whitespace, '#pop'),
- (r'/?>', Punctuation, '#pop:2'),
- include('replaceable'),
- *text_rules(String),
- ],
- 'tag-attr-val-1': [
- (r"'", String.Single, '#pop'),
- (r'/?>', Punctuation, '#pop:2'),
- include('replaceable'),
- *text_rules(String.Single),
- ],
- 'tag-attr-val-2': [
- (r'"', String.Double, '#pop'),
- (r'/?>', Punctuation, '#pop:2'),
- include('replaceable'),
- *text_rules(String.Double),
- ],
- 'tag-nowiki': nowiki_tag_rules('nowiki'),
- 'tag-pre': nowiki_tag_rules('pre'),
- 'tag-categorytree': plaintext_tag_rules('categorytree'),
- 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'),
- 'tag-hiero': plaintext_tag_rules('hiero'),
- 'tag-inputbox': plaintext_tag_rules('inputbox'),
- 'tag-imagemap': plaintext_tag_rules('imagemap'),
- 'tag-charinsert': plaintext_tag_rules('charinsert'),
- 'tag-timeline': plaintext_tag_rules('timeline'),
- 'tag-gallery': plaintext_tag_rules('gallery'),
- 'tag-graph': plaintext_tag_rules('graph'),
- 'tag-rss': plaintext_tag_rules('rss'),
- 'tag-math': delegate_tag_rules('math', TexLexer),
- 'tag-chem': delegate_tag_rules('chem', TexLexer),
- 'tag-ce': delegate_tag_rules('ce', TexLexer),
- 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer),
- 'text-italic': text_rules(Generic.Emph),
- 'text-bold': text_rules(Generic.Strong),
- 'text-bold-italic': text_rules(Generic.EmphStrong),
- 'text': text_rules(Text),
- }
|