markup.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.markup
  4. ~~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for non-HTML markup languages.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexers.html import HtmlLexer, XmlLexer
  11. from pygments.lexers.javascript import JavascriptLexer
  12. from pygments.lexers.css import CssLexer
  13. from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
  14. using, this, do_insertions, default, words
  15. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  16. Number, Punctuation, Generic, Other
  17. from pygments.util import get_bool_opt, ClassNotFound
  18. __all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
  19. 'MozPreprocHashLexer', 'MozPreprocPercentLexer',
  20. 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
  21. 'MozPreprocCssLexer', 'MarkdownLexer']
  22. class BBCodeLexer(RegexLexer):
  23. """
  24. A lexer that highlights BBCode(-like) syntax.
  25. .. versionadded:: 0.6
  26. """
  27. name = 'BBCode'
  28. aliases = ['bbcode']
  29. mimetypes = ['text/x-bbcode']
  30. tokens = {
  31. 'root': [
  32. (r'[^[]+', Text),
  33. # tag/end tag begin
  34. (r'\[/?\w+', Keyword, 'tag'),
  35. # stray bracket
  36. (r'\[', Text),
  37. ],
  38. 'tag': [
  39. (r'\s+', Text),
  40. # attribute with value
  41. (r'(\w+)(=)("?[^\s"\]]+"?)',
  42. bygroups(Name.Attribute, Operator, String)),
  43. # tag argument (a la [color=green])
  44. (r'(=)("?[^\s"\]]+"?)',
  45. bygroups(Operator, String)),
  46. # tag end
  47. (r'\]', Keyword, '#pop'),
  48. ],
  49. }
  50. class MoinWikiLexer(RegexLexer):
  51. """
  52. For MoinMoin (and Trac) Wiki markup.
  53. .. versionadded:: 0.7
  54. """
  55. name = 'MoinMoin/Trac Wiki markup'
  56. aliases = ['trac-wiki', 'moin']
  57. filenames = []
  58. mimetypes = ['text/x-trac-wiki']
  59. flags = re.MULTILINE | re.IGNORECASE
  60. tokens = {
  61. 'root': [
  62. (r'^#.*$', Comment),
  63. (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
  64. # Titles
  65. (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
  66. bygroups(Generic.Heading, using(this), Generic.Heading, String)),
  67. # Literal code blocks, with optional shebang
  68. (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
  69. (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
  70. # Lists
  71. (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
  72. (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
  73. # Other Formatting
  74. (r'\[\[\w+.*?\]\]', Keyword), # Macro
  75. (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
  76. bygroups(Keyword, String, Keyword)), # Link
  77. (r'^----+$', Keyword), # Horizontal rules
  78. (r'[^\n\'\[{!_~^,|]+', Text),
  79. (r'\n', Text),
  80. (r'.', Text),
  81. ],
  82. 'codeblock': [
  83. (r'\}\}\}', Name.Builtin, '#pop'),
  84. # these blocks are allowed to be nested in Trac, but not MoinMoin
  85. (r'\{\{\{', Text, '#push'),
  86. (r'[^{}]+', Comment.Preproc), # slurp boring text
  87. (r'.', Comment.Preproc), # allow loose { or }
  88. ],
  89. }
  90. class RstLexer(RegexLexer):
  91. """
  92. For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
  93. .. versionadded:: 0.7
  94. Additional options accepted:
  95. `handlecodeblocks`
  96. Highlight the contents of ``.. sourcecode:: language``,
  97. ``.. code:: language`` and ``.. code-block:: language``
  98. directives with a lexer for the given language (default:
  99. ``True``).
  100. .. versionadded:: 0.8
  101. """
  102. name = 'reStructuredText'
  103. aliases = ['rst', 'rest', 'restructuredtext']
  104. filenames = ['*.rst', '*.rest']
  105. mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
  106. flags = re.MULTILINE
  107. def _handle_sourcecode(self, match):
  108. from pygments.lexers import get_lexer_by_name
  109. # section header
  110. yield match.start(1), Punctuation, match.group(1)
  111. yield match.start(2), Text, match.group(2)
  112. yield match.start(3), Operator.Word, match.group(3)
  113. yield match.start(4), Punctuation, match.group(4)
  114. yield match.start(5), Text, match.group(5)
  115. yield match.start(6), Keyword, match.group(6)
  116. yield match.start(7), Text, match.group(7)
  117. # lookup lexer if wanted and existing
  118. lexer = None
  119. if self.handlecodeblocks:
  120. try:
  121. lexer = get_lexer_by_name(match.group(6).strip())
  122. except ClassNotFound:
  123. pass
  124. indention = match.group(8)
  125. indention_size = len(indention)
  126. code = (indention + match.group(9) + match.group(10) + match.group(11))
  127. # no lexer for this language. handle it like it was a code block
  128. if lexer is None:
  129. yield match.start(8), String, code
  130. return
  131. # highlight the lines with the lexer.
  132. ins = []
  133. codelines = code.splitlines(True)
  134. code = ''
  135. for line in codelines:
  136. if len(line) > indention_size:
  137. ins.append((len(code), [(0, Text, line[:indention_size])]))
  138. code += line[indention_size:]
  139. else:
  140. code += line
  141. for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
  142. yield item
  143. # from docutils.parsers.rst.states
  144. closers = u'\'")]}>\u2019\u201d\xbb!?'
  145. unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
  146. end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
  147. % (re.escape(unicode_delimiters),
  148. re.escape(closers)))
  149. tokens = {
  150. 'root': [
  151. # Heading with overline
  152. (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
  153. r'(.+)(\n)(\1)(\n)',
  154. bygroups(Generic.Heading, Text, Generic.Heading,
  155. Text, Generic.Heading, Text)),
  156. # Plain heading
  157. (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
  158. r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
  159. bygroups(Generic.Heading, Text, Generic.Heading, Text)),
  160. # Bulleted lists
  161. (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
  162. bygroups(Text, Number, using(this, state='inline'))),
  163. # Numbered lists
  164. (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
  165. bygroups(Text, Number, using(this, state='inline'))),
  166. (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
  167. bygroups(Text, Number, using(this, state='inline'))),
  168. # Numbered, but keep words at BOL from becoming lists
  169. (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
  170. bygroups(Text, Number, using(this, state='inline'))),
  171. (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
  172. bygroups(Text, Number, using(this, state='inline'))),
  173. # Line blocks
  174. (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
  175. bygroups(Text, Operator, using(this, state='inline'))),
  176. # Sourcecode directives
  177. (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
  178. r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
  179. _handle_sourcecode),
  180. # A directive
  181. (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
  182. bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
  183. using(this, state='inline'))),
  184. # A reference target
  185. (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
  186. bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
  187. # A footnote/citation target
  188. (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
  189. bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
  190. # A substitution def
  191. (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
  192. bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
  193. Punctuation, Text, using(this, state='inline'))),
  194. # Comments
  195. (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
  196. # Field list marker
  197. (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
  198. bygroups(Text, Name.Class, Text)),
  199. # Definition list
  200. (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
  201. bygroups(using(this, state='inline'), using(this, state='inline'))),
  202. # Code blocks
  203. (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
  204. bygroups(String.Escape, Text, String, String, Text, String)),
  205. include('inline'),
  206. ],
  207. 'inline': [
  208. (r'\\.', Text), # escape
  209. (r'``', String, 'literal'), # code
  210. (r'(`.+?)(<.+?>)(`__?)', # reference with inline target
  211. bygroups(String, String.Interpol, String)),
  212. (r'`.+?`__?', String), # reference
  213. (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
  214. bygroups(Name.Variable, Name.Attribute)), # role
  215. (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
  216. bygroups(Name.Attribute, Name.Variable)), # role (content first)
  217. (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
  218. (r'\*.+?\*', Generic.Emph), # Emphasis
  219. (r'\[.*?\]_', String), # Footnote or citation
  220. (r'<.+?>', Name.Tag), # Hyperlink
  221. (r'[^\\\n\[*`:]+', Text),
  222. (r'.', Text),
  223. ],
  224. 'literal': [
  225. (r'[^`]+', String),
  226. (r'``' + end_string_suffix, String, '#pop'),
  227. (r'`', String),
  228. ]
  229. }
  230. def __init__(self, **options):
  231. self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
  232. RegexLexer.__init__(self, **options)
  233. def analyse_text(text):
  234. if text[:2] == '..' and text[2:3] != '.':
  235. return 0.3
  236. p1 = text.find("\n")
  237. p2 = text.find("\n", p1 + 1)
  238. if (p2 > -1 and # has two lines
  239. p1 * 2 + 1 == p2 and # they are the same length
  240. text[p1+1] in '-=' and # the next line both starts and ends with
  241. text[p1+1] == text[p2-1]): # ...a sufficiently high header
  242. return 0.5
  243. class TexLexer(RegexLexer):
  244. """
  245. Lexer for the TeX and LaTeX typesetting languages.
  246. """
  247. name = 'TeX'
  248. aliases = ['tex', 'latex']
  249. filenames = ['*.tex', '*.aux', '*.toc']
  250. mimetypes = ['text/x-tex', 'text/x-latex']
  251. tokens = {
  252. 'general': [
  253. (r'%.*?\n', Comment),
  254. (r'[{}]', Name.Builtin),
  255. (r'[&_^]', Name.Builtin),
  256. ],
  257. 'root': [
  258. (r'\\\[', String.Backtick, 'displaymath'),
  259. (r'\\\(', String, 'inlinemath'),
  260. (r'\$\$', String.Backtick, 'displaymath'),
  261. (r'\$', String, 'inlinemath'),
  262. (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
  263. (r'\\$', Keyword),
  264. include('general'),
  265. (r'[^\\$%&_^{}]+', Text),
  266. ],
  267. 'math': [
  268. (r'\\([a-zA-Z]+|.)', Name.Variable),
  269. include('general'),
  270. (r'[0-9]+', Number),
  271. (r'[-=!+*/()\[\]]', Operator),
  272. (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
  273. ],
  274. 'inlinemath': [
  275. (r'\\\)', String, '#pop'),
  276. (r'\$', String, '#pop'),
  277. include('math'),
  278. ],
  279. 'displaymath': [
  280. (r'\\\]', String, '#pop'),
  281. (r'\$\$', String, '#pop'),
  282. (r'\$', Name.Builtin),
  283. include('math'),
  284. ],
  285. 'command': [
  286. (r'\[.*?\]', Name.Attribute),
  287. (r'\*', Keyword),
  288. default('#pop'),
  289. ],
  290. }
  291. def analyse_text(text):
  292. for start in ("\\documentclass", "\\input", "\\documentstyle",
  293. "\\relax"):
  294. if text[:len(start)] == start:
  295. return True
  296. class GroffLexer(RegexLexer):
  297. """
  298. Lexer for the (g)roff typesetting language, supporting groff
  299. extensions. Mainly useful for highlighting manpage sources.
  300. .. versionadded:: 0.6
  301. """
  302. name = 'Groff'
  303. aliases = ['groff', 'nroff', 'man']
  304. filenames = ['*.[1234567]', '*.man']
  305. mimetypes = ['application/x-troff', 'text/troff']
  306. tokens = {
  307. 'root': [
  308. (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
  309. (r'\.', Punctuation, 'request'),
  310. # Regular characters, slurp till we find a backslash or newline
  311. (r'[^\\\n]+', Text, 'textline'),
  312. default('textline'),
  313. ],
  314. 'textline': [
  315. include('escapes'),
  316. (r'[^\\\n]+', Text),
  317. (r'\n', Text, '#pop'),
  318. ],
  319. 'escapes': [
  320. # groff has many ways to write escapes.
  321. (r'\\"[^\n]*', Comment),
  322. (r'\\[fn]\w', String.Escape),
  323. (r'\\\(.{2}', String.Escape),
  324. (r'\\.\[.*\]', String.Escape),
  325. (r'\\.', String.Escape),
  326. (r'\\\n', Text, 'request'),
  327. ],
  328. 'request': [
  329. (r'\n', Text, '#pop'),
  330. include('escapes'),
  331. (r'"[^\n"]+"', String.Double),
  332. (r'\d+', Number),
  333. (r'\S+', String),
  334. (r'\s+', Text),
  335. ],
  336. }
  337. def analyse_text(text):
  338. if text[:1] != '.':
  339. return False
  340. if text[:3] == '.\\"':
  341. return True
  342. if text[:4] == '.TH ':
  343. return True
  344. if text[1:3].isalnum() and text[3].isspace():
  345. return 0.9
  346. class MozPreprocHashLexer(RegexLexer):
  347. """
  348. Lexer for Mozilla Preprocessor files (with '#' as the marker).
  349. Other data is left untouched.
  350. .. versionadded:: 2.0
  351. """
  352. name = 'mozhashpreproc'
  353. aliases = [name]
  354. filenames = []
  355. mimetypes = []
  356. tokens = {
  357. 'root': [
  358. (r'^#', Comment.Preproc, ('expr', 'exprstart')),
  359. (r'.+', Other),
  360. ],
  361. 'exprstart': [
  362. (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
  363. (words((
  364. 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
  365. 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
  366. 'include', 'includesubst', 'error')),
  367. Comment.Preproc, '#pop'),
  368. ],
  369. 'expr': [
  370. (words(('!', '!=', '==', '&&', '||')), Operator),
  371. (r'(defined)(\()', bygroups(Keyword, Punctuation)),
  372. (r'\)', Punctuation),
  373. (r'[0-9]+', Number.Decimal),
  374. (r'__\w+?__', Name.Variable),
  375. (r'@\w+?@', Name.Class),
  376. (r'\w+', Name),
  377. (r'\n', Text, '#pop'),
  378. (r'\s+', Text),
  379. (r'\S', Punctuation),
  380. ],
  381. }
  382. class MozPreprocPercentLexer(MozPreprocHashLexer):
  383. """
  384. Lexer for Mozilla Preprocessor files (with '%' as the marker).
  385. Other data is left untouched.
  386. .. versionadded:: 2.0
  387. """
  388. name = 'mozpercentpreproc'
  389. aliases = [name]
  390. filenames = []
  391. mimetypes = []
  392. tokens = {
  393. 'root': [
  394. (r'^%', Comment.Preproc, ('expr', 'exprstart')),
  395. (r'.+', Other),
  396. ],
  397. }
  398. class MozPreprocXulLexer(DelegatingLexer):
  399. """
  400. Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
  401. `XmlLexer`.
  402. .. versionadded:: 2.0
  403. """
  404. name = "XUL+mozpreproc"
  405. aliases = ['xul+mozpreproc']
  406. filenames = ['*.xul.in']
  407. mimetypes = []
  408. def __init__(self, **options):
  409. super(MozPreprocXulLexer, self).__init__(
  410. XmlLexer, MozPreprocHashLexer, **options)
  411. class MozPreprocJavascriptLexer(DelegatingLexer):
  412. """
  413. Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
  414. `JavascriptLexer`.
  415. .. versionadded:: 2.0
  416. """
  417. name = "Javascript+mozpreproc"
  418. aliases = ['javascript+mozpreproc']
  419. filenames = ['*.js.in']
  420. mimetypes = []
  421. def __init__(self, **options):
  422. super(MozPreprocJavascriptLexer, self).__init__(
  423. JavascriptLexer, MozPreprocHashLexer, **options)
  424. class MozPreprocCssLexer(DelegatingLexer):
  425. """
  426. Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
  427. `CssLexer`.
  428. .. versionadded:: 2.0
  429. """
  430. name = "CSS+mozpreproc"
  431. aliases = ['css+mozpreproc']
  432. filenames = ['*.css.in']
  433. mimetypes = []
  434. def __init__(self, **options):
  435. super(MozPreprocCssLexer, self).__init__(
  436. CssLexer, MozPreprocPercentLexer, **options)
  437. class MarkdownLexer(RegexLexer):
  438. """
  439. For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup.
  440. .. versionadded:: 2.2
  441. """
  442. name = 'markdown'
  443. aliases = ['md']
  444. filenames = ['*.md']
  445. mimetypes = ["text/x-markdown"]
  446. flags = re.MULTILINE
  447. def _handle_codeblock(self, match):
  448. """
  449. match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
  450. """
  451. from pygments.lexers import get_lexer_by_name
  452. # section header
  453. yield match.start(1), String , match.group(1)
  454. yield match.start(2), String , match.group(2)
  455. yield match.start(3), Text , match.group(3)
  456. # lookup lexer if wanted and existing
  457. lexer = None
  458. if self.handlecodeblocks:
  459. try:
  460. lexer = get_lexer_by_name( match.group(2).strip() )
  461. except ClassNotFound:
  462. pass
  463. code = match.group(4)
  464. # no lexer for this language. handle it like it was a code block
  465. if lexer is None:
  466. yield match.start(4), String, code
  467. else:
  468. for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
  469. yield item
  470. yield match.start(5), String , match.group(5)
  471. tokens = {
  472. 'root': [
  473. # heading with pound prefix
  474. (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
  475. (r'^(#{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
  476. # task list
  477. (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
  478. bygroups(Text, Keyword, Keyword, using(this, state='inline'))),
  479. # bulleted lists
  480. (r'^(\s*)([*-])(\s)(.+\n)',
  481. bygroups(Text, Keyword, Text, using(this, state='inline'))),
  482. # numbered lists
  483. (r'^(\s*)([0-9]+\.)( .+\n)',
  484. bygroups(Text, Keyword, using(this, state='inline'))),
  485. # quote
  486. (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
  487. # text block
  488. (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
  489. # code block with language
  490. (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
  491. include('inline'),
  492. ],
  493. 'inline': [
  494. # escape
  495. (r'\\.', Text),
  496. # italics
  497. (r'(\s)([*_][^*_]+[*_])(\W|\n)', bygroups(Text, Generic.Emph, Text)),
  498. # bold
  499. # warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics
  500. (r'(\s)((\*\*|__).*\3)((?=\W|\n))', bygroups(Text, Generic.Strong, None, Text)),
  501. # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))', bygroups(Text, Generic.Strong, Text)),
  502. # strikethrough
  503. (r'(\s)(~~[^~]+~~)((?=\W|\n))', bygroups(Text, Generic.Deleted, Text)),
  504. # inline code
  505. (r'`[^`]+`', String.Backtick),
  506. # mentions and topics (twitter and github stuff)
  507. (r'[@#][\w/:]+', Name.Entity),
  508. # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
  509. (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
  510. # reference-style links, e.g.:
  511. # [an example][id]
  512. # [id]: http://example.com/
  513. (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
  514. (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', bygroups(Text, Name.Label, Text, Name.Attribute)),
  515. # general text, must come last!
  516. (r'[^\\\s]+', Text),
  517. (r'.', Text),
  518. ],
  519. }
  520. def __init__(self, **options):
  521. self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
  522. RegexLexer.__init__(self, **options)