html.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. """
  2. pygments.lexers.html
  3. ~~~~~~~~~~~~~~~~~~~~
  4. Lexers for HTML, XML and related markup.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
  10. default, using
  11. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  12. Punctuation, Whitespace
  13. from pygments.util import looks_like_xml, html_doctype_matches
  14. from pygments.lexers.javascript import JavascriptLexer
  15. from pygments.lexers.jvm import ScalaLexer
  16. from pygments.lexers.css import CssLexer, _indentation, _starts_block
  17. from pygments.lexers.ruby import RubyLexer
  18. __all__ = ['HtmlLexer', 'DtdLexer', 'XmlLexer', 'XsltLexer', 'HamlLexer',
  19. 'ScamlLexer', 'PugLexer', 'UrlEncodedLexer']
  20. class HtmlLexer(RegexLexer):
  21. """
  22. For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
  23. by the appropriate lexer.
  24. """
  25. name = 'HTML'
  26. url = 'https://html.spec.whatwg.org/'
  27. aliases = ['html']
  28. filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']
  29. mimetypes = ['text/html', 'application/xhtml+xml']
  30. version_added = ''
  31. flags = re.IGNORECASE | re.DOTALL
  32. tokens = {
  33. 'root': [
  34. ('[^<&]+', Text),
  35. (r'&\S*?;', Name.Entity),
  36. (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
  37. (r'<!--.*?-->', Comment.Multiline),
  38. (r'<\?.*?\?>', Comment.Preproc),
  39. ('<![^>]*>', Comment.Preproc),
  40. (r'(<)(\s*)(script)(\s*)',
  41. bygroups(Punctuation, Text, Name.Tag, Text),
  42. ('script-content', 'tag')),
  43. (r'(<)(\s*)(style)(\s*)',
  44. bygroups(Punctuation, Text, Name.Tag, Text),
  45. ('style-content', 'tag')),
  46. # note: this allows tag names not used in HTML like <x:with-dash>,
  47. # this is to support yet-unknown template engines and the like
  48. (r'(<)(\s*)([\w:.-]+)',
  49. bygroups(Punctuation, Text, Name.Tag), 'tag'),
  50. (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',
  51. bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
  52. Punctuation)),
  53. ],
  54. 'tag': [
  55. (r'\s+', Text),
  56. (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text),
  57. 'attr'),
  58. (r'[\w:-]+', Name.Attribute),
  59. (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'),
  60. ],
  61. 'script-content': [
  62. (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)',
  63. bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
  64. Punctuation), '#pop'),
  65. (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
  66. # fallback cases for when there is no closing script tag
  67. # first look for newline and then go back into root state
  68. # if that fails just read the rest of the file
  69. # this is similar to the error handling logic in lexer.py
  70. (r'.+?\n', using(JavascriptLexer), '#pop'),
  71. (r'.+', using(JavascriptLexer), '#pop'),
  72. ],
  73. 'style-content': [
  74. (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',
  75. bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
  76. Punctuation),'#pop'),
  77. (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
  78. # fallback cases for when there is no closing style tag
  79. # first look for newline and then go back into root state
  80. # if that fails just read the rest of the file
  81. # this is similar to the error handling logic in lexer.py
  82. (r'.+?\n', using(CssLexer), '#pop'),
  83. (r'.+', using(CssLexer), '#pop'),
  84. ],
  85. 'attr': [
  86. ('".*?"', String, '#pop'),
  87. ("'.*?'", String, '#pop'),
  88. (r'[^\s>]+', String, '#pop'),
  89. ],
  90. }
  91. def analyse_text(text):
  92. if html_doctype_matches(text):
  93. return 0.5
  94. class DtdLexer(RegexLexer):
  95. """
  96. A lexer for DTDs (Document Type Definitions).
  97. """
  98. flags = re.MULTILINE | re.DOTALL
  99. name = 'DTD'
  100. aliases = ['dtd']
  101. filenames = ['*.dtd']
  102. mimetypes = ['application/xml-dtd']
  103. url = 'https://en.wikipedia.org/wiki/Document_type_definition'
  104. version_added = '1.5'
  105. tokens = {
  106. 'root': [
  107. include('common'),
  108. (r'(<!ELEMENT)(\s+)(\S+)',
  109. bygroups(Keyword, Text, Name.Tag), 'element'),
  110. (r'(<!ATTLIST)(\s+)(\S+)',
  111. bygroups(Keyword, Text, Name.Tag), 'attlist'),
  112. (r'(<!ENTITY)(\s+)(\S+)',
  113. bygroups(Keyword, Text, Name.Entity), 'entity'),
  114. (r'(<!NOTATION)(\s+)(\S+)',
  115. bygroups(Keyword, Text, Name.Tag), 'notation'),
  116. (r'(<!\[)([^\[\s]+)(\s*)(\[)', # conditional sections
  117. bygroups(Keyword, Name.Entity, Text, Keyword)),
  118. (r'(<!DOCTYPE)(\s+)([^>\s]+)',
  119. bygroups(Keyword, Text, Name.Tag)),
  120. (r'PUBLIC|SYSTEM', Keyword.Constant),
  121. (r'[\[\]>]', Keyword),
  122. ],
  123. 'common': [
  124. (r'\s+', Text),
  125. (r'(%|&)[^;]*;', Name.Entity),
  126. ('<!--', Comment, 'comment'),
  127. (r'[(|)*,?+]', Operator),
  128. (r'"[^"]*"', String.Double),
  129. (r'\'[^\']*\'', String.Single),
  130. ],
  131. 'comment': [
  132. ('[^-]+', Comment),
  133. ('-->', Comment, '#pop'),
  134. ('-', Comment),
  135. ],
  136. 'element': [
  137. include('common'),
  138. (r'EMPTY|ANY|#PCDATA', Keyword.Constant),
  139. (r'[^>\s|()?+*,]+', Name.Tag),
  140. (r'>', Keyword, '#pop'),
  141. ],
  142. 'attlist': [
  143. include('common'),
  144. (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION',
  145. Keyword.Constant),
  146. (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),
  147. (r'xml:space|xml:lang', Keyword.Reserved),
  148. (r'[^>\s|()?+*,]+', Name.Attribute),
  149. (r'>', Keyword, '#pop'),
  150. ],
  151. 'entity': [
  152. include('common'),
  153. (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),
  154. (r'[^>\s|()?+*,]+', Name.Entity),
  155. (r'>', Keyword, '#pop'),
  156. ],
  157. 'notation': [
  158. include('common'),
  159. (r'SYSTEM|PUBLIC', Keyword.Constant),
  160. (r'[^>\s|()?+*,]+', Name.Attribute),
  161. (r'>', Keyword, '#pop'),
  162. ],
  163. }
  164. def analyse_text(text):
  165. if not looks_like_xml(text) and \
  166. ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):
  167. return 0.8
  168. class XmlLexer(RegexLexer):
  169. """
  170. Generic lexer for XML (eXtensible Markup Language).
  171. """
  172. flags = re.MULTILINE | re.DOTALL
  173. name = 'XML'
  174. aliases = ['xml']
  175. filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd',
  176. '*.wsdl', '*.wsf']
  177. mimetypes = ['text/xml', 'application/xml', 'image/svg+xml',
  178. 'application/rss+xml', 'application/atom+xml']
  179. url = 'https://www.w3.org/XML'
  180. version_added = ''
  181. tokens = {
  182. 'root': [
  183. (r'[^<&\s]+', Text),
  184. (r'[^<&\S]+', Whitespace),
  185. (r'&\S*?;', Name.Entity),
  186. (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
  187. (r'<!--.*?-->', Comment.Multiline),
  188. (r'<\?.*?\?>', Comment.Preproc),
  189. ('<![^>]*>', Comment.Preproc),
  190. (r'<\s*[\w:.-]+', Name.Tag, 'tag'),
  191. (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag),
  192. ],
  193. 'tag': [
  194. (r'\s+', Whitespace),
  195. (r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
  196. (r'/?\s*>', Name.Tag, '#pop'),
  197. ],
  198. 'attr': [
  199. (r'\s+', Whitespace),
  200. ('".*?"', String, '#pop'),
  201. ("'.*?'", String, '#pop'),
  202. (r'[^\s>]+', String, '#pop'),
  203. ],
  204. }
  205. def analyse_text(text):
  206. if looks_like_xml(text):
  207. return 0.45 # less than HTML
  208. class XsltLexer(XmlLexer):
  209. """
  210. A lexer for XSLT.
  211. """
  212. name = 'XSLT'
  213. aliases = ['xslt']
  214. filenames = ['*.xsl', '*.xslt', '*.xpl'] # xpl is XProc
  215. mimetypes = ['application/xsl+xml', 'application/xslt+xml']
  216. url = 'https://www.w3.org/TR/xslt-30'
  217. version_added = '0.10'
  218. EXTRA_KEYWORDS = {
  219. 'apply-imports', 'apply-templates', 'attribute',
  220. 'attribute-set', 'call-template', 'choose', 'comment',
  221. 'copy', 'copy-of', 'decimal-format', 'element', 'fallback',
  222. 'for-each', 'if', 'import', 'include', 'key', 'message',
  223. 'namespace-alias', 'number', 'otherwise', 'output', 'param',
  224. 'preserve-space', 'processing-instruction', 'sort',
  225. 'strip-space', 'stylesheet', 'template', 'text', 'transform',
  226. 'value-of', 'variable', 'when', 'with-param'
  227. }
  228. def get_tokens_unprocessed(self, text):
  229. for index, token, value in XmlLexer.get_tokens_unprocessed(self, text):
  230. m = re.match('</?xsl:([^>]*)/?>?', value)
  231. if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS:
  232. yield index, Keyword, value
  233. else:
  234. yield index, token, value
  235. def analyse_text(text):
  236. if looks_like_xml(text) and '<xsl' in text:
  237. return 0.8
  238. class HamlLexer(ExtendedRegexLexer):
  239. """
  240. For Haml markup.
  241. """
  242. name = 'Haml'
  243. aliases = ['haml']
  244. filenames = ['*.haml']
  245. mimetypes = ['text/x-haml']
  246. url = 'https://haml.info'
  247. version_added = '1.3'
  248. flags = re.IGNORECASE
  249. # Haml can include " |\n" anywhere,
  250. # which is ignored and used to wrap long lines.
  251. # To accommodate this, use this custom faux dot instead.
  252. _dot = r'(?: \|\n(?=.* \|)|.)'
  253. # In certain places, a comma at the end of the line
  254. # allows line wrapping as well.
  255. _comma_dot = r'(?:,\s*\n|' + _dot + ')'
  256. tokens = {
  257. 'root': [
  258. (r'[ \t]*\n', Text),
  259. (r'[ \t]*', _indentation),
  260. ],
  261. 'css': [
  262. (r'\.[\w:-]+', Name.Class, 'tag'),
  263. (r'\#[\w:-]+', Name.Function, 'tag'),
  264. ],
  265. 'eval-or-plain': [
  266. (r'[&!]?==', Punctuation, 'plain'),
  267. (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
  268. bygroups(Punctuation, using(RubyLexer)),
  269. 'root'),
  270. default('plain'),
  271. ],
  272. 'content': [
  273. include('css'),
  274. (r'%[\w:-]+', Name.Tag, 'tag'),
  275. (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
  276. (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
  277. bygroups(Comment, Comment.Special, Comment),
  278. '#pop'),
  279. (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
  280. '#pop'),
  281. (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
  282. 'haml-comment-block'), '#pop'),
  283. (r'(-)(' + _comma_dot + r'*\n)',
  284. bygroups(Punctuation, using(RubyLexer)),
  285. '#pop'),
  286. (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
  287. '#pop'),
  288. include('eval-or-plain'),
  289. ],
  290. 'tag': [
  291. include('css'),
  292. (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),
  293. (r'\[' + _dot + r'*?\]', using(RubyLexer)),
  294. (r'\(', Text, 'html-attributes'),
  295. (r'/[ \t]*\n', Punctuation, '#pop:2'),
  296. (r'[<>]{1,2}(?=[ \t=])', Punctuation),
  297. include('eval-or-plain'),
  298. ],
  299. 'plain': [
  300. (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
  301. (r'(#\{)(' + _dot + r'*?)(\})',
  302. bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
  303. (r'\n', Text, 'root'),
  304. ],
  305. 'html-attributes': [
  306. (r'\s+', Text),
  307. (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
  308. (r'[\w:-]+', Name.Attribute),
  309. (r'\)', Text, '#pop'),
  310. ],
  311. 'html-attribute-value': [
  312. (r'[ \t]+', Text),
  313. (r'\w+', Name.Variable, '#pop'),
  314. (r'@\w+', Name.Variable.Instance, '#pop'),
  315. (r'\$\w+', Name.Variable.Global, '#pop'),
  316. (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
  317. (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
  318. ],
  319. 'html-comment-block': [
  320. (_dot + '+', Comment),
  321. (r'\n', Text, 'root'),
  322. ],
  323. 'haml-comment-block': [
  324. (_dot + '+', Comment.Preproc),
  325. (r'\n', Text, 'root'),
  326. ],
  327. 'filter-block': [
  328. (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
  329. (r'(#\{)(' + _dot + r'*?)(\})',
  330. bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
  331. (r'\n', Text, 'root'),
  332. ],
  333. }
  334. class ScamlLexer(ExtendedRegexLexer):
  335. """
  336. For Scaml markup. Scaml is Haml for Scala.
  337. """
  338. name = 'Scaml'
  339. aliases = ['scaml']
  340. filenames = ['*.scaml']
  341. mimetypes = ['text/x-scaml']
  342. url = 'https://scalate.github.io/scalate/'
  343. version_added = '1.4'
  344. flags = re.IGNORECASE
  345. # Scaml does not yet support the " |\n" notation to
  346. # wrap long lines. Once it does, use the custom faux
  347. # dot instead.
  348. # _dot = r'(?: \|\n(?=.* \|)|.)'
  349. _dot = r'.'
  350. tokens = {
  351. 'root': [
  352. (r'[ \t]*\n', Text),
  353. (r'[ \t]*', _indentation),
  354. ],
  355. 'css': [
  356. (r'\.[\w:-]+', Name.Class, 'tag'),
  357. (r'\#[\w:-]+', Name.Function, 'tag'),
  358. ],
  359. 'eval-or-plain': [
  360. (r'[&!]?==', Punctuation, 'plain'),
  361. (r'([&!]?[=~])(' + _dot + r'*\n)',
  362. bygroups(Punctuation, using(ScalaLexer)),
  363. 'root'),
  364. default('plain'),
  365. ],
  366. 'content': [
  367. include('css'),
  368. (r'%[\w:-]+', Name.Tag, 'tag'),
  369. (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
  370. (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
  371. bygroups(Comment, Comment.Special, Comment),
  372. '#pop'),
  373. (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
  374. '#pop'),
  375. (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
  376. 'scaml-comment-block'), '#pop'),
  377. (r'(-@\s*)(import)?(' + _dot + r'*\n)',
  378. bygroups(Punctuation, Keyword, using(ScalaLexer)),
  379. '#pop'),
  380. (r'(-)(' + _dot + r'*\n)',
  381. bygroups(Punctuation, using(ScalaLexer)),
  382. '#pop'),
  383. (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
  384. '#pop'),
  385. include('eval-or-plain'),
  386. ],
  387. 'tag': [
  388. include('css'),
  389. (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
  390. (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
  391. (r'\(', Text, 'html-attributes'),
  392. (r'/[ \t]*\n', Punctuation, '#pop:2'),
  393. (r'[<>]{1,2}(?=[ \t=])', Punctuation),
  394. include('eval-or-plain'),
  395. ],
  396. 'plain': [
  397. (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
  398. (r'(#\{)(' + _dot + r'*?)(\})',
  399. bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
  400. (r'\n', Text, 'root'),
  401. ],
  402. 'html-attributes': [
  403. (r'\s+', Text),
  404. (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
  405. (r'[\w:-]+', Name.Attribute),
  406. (r'\)', Text, '#pop'),
  407. ],
  408. 'html-attribute-value': [
  409. (r'[ \t]+', Text),
  410. (r'\w+', Name.Variable, '#pop'),
  411. (r'@\w+', Name.Variable.Instance, '#pop'),
  412. (r'\$\w+', Name.Variable.Global, '#pop'),
  413. (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
  414. (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
  415. ],
  416. 'html-comment-block': [
  417. (_dot + '+', Comment),
  418. (r'\n', Text, 'root'),
  419. ],
  420. 'scaml-comment-block': [
  421. (_dot + '+', Comment.Preproc),
  422. (r'\n', Text, 'root'),
  423. ],
  424. 'filter-block': [
  425. (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
  426. (r'(#\{)(' + _dot + r'*?)(\})',
  427. bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
  428. (r'\n', Text, 'root'),
  429. ],
  430. }
  431. class PugLexer(ExtendedRegexLexer):
  432. """
  433. For Pug markup.
  434. Pug is a variant of Scaml, see:
  435. http://scalate.fusesource.org/documentation/scaml-reference.html
  436. """
  437. name = 'Pug'
  438. aliases = ['pug', 'jade']
  439. filenames = ['*.pug', '*.jade']
  440. mimetypes = ['text/x-pug', 'text/x-jade']
  441. url = 'https://pugjs.org'
  442. version_added = '1.4'
  443. flags = re.IGNORECASE
  444. _dot = r'.'
  445. tokens = {
  446. 'root': [
  447. (r'[ \t]*\n', Text),
  448. (r'[ \t]*', _indentation),
  449. ],
  450. 'css': [
  451. (r'\.[\w:-]+', Name.Class, 'tag'),
  452. (r'\#[\w:-]+', Name.Function, 'tag'),
  453. ],
  454. 'eval-or-plain': [
  455. (r'[&!]?==', Punctuation, 'plain'),
  456. (r'([&!]?[=~])(' + _dot + r'*\n)',
  457. bygroups(Punctuation, using(ScalaLexer)), 'root'),
  458. default('plain'),
  459. ],
  460. 'content': [
  461. include('css'),
  462. (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
  463. (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
  464. bygroups(Comment, Comment.Special, Comment),
  465. '#pop'),
  466. (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
  467. '#pop'),
  468. (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
  469. 'scaml-comment-block'), '#pop'),
  470. (r'(-@\s*)(import)?(' + _dot + r'*\n)',
  471. bygroups(Punctuation, Keyword, using(ScalaLexer)),
  472. '#pop'),
  473. (r'(-)(' + _dot + r'*\n)',
  474. bygroups(Punctuation, using(ScalaLexer)),
  475. '#pop'),
  476. (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
  477. '#pop'),
  478. (r'[\w:-]+', Name.Tag, 'tag'),
  479. (r'\|', Text, 'eval-or-plain'),
  480. ],
  481. 'tag': [
  482. include('css'),
  483. (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
  484. (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
  485. (r'\(', Text, 'html-attributes'),
  486. (r'/[ \t]*\n', Punctuation, '#pop:2'),
  487. (r'[<>]{1,2}(?=[ \t=])', Punctuation),
  488. include('eval-or-plain'),
  489. ],
  490. 'plain': [
  491. (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
  492. (r'(#\{)(' + _dot + r'*?)(\})',
  493. bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
  494. (r'\n', Text, 'root'),
  495. ],
  496. 'html-attributes': [
  497. (r'\s+', Text),
  498. (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
  499. (r'[\w:-]+', Name.Attribute),
  500. (r'\)', Text, '#pop'),
  501. ],
  502. 'html-attribute-value': [
  503. (r'[ \t]+', Text),
  504. (r'\w+', Name.Variable, '#pop'),
  505. (r'@\w+', Name.Variable.Instance, '#pop'),
  506. (r'\$\w+', Name.Variable.Global, '#pop'),
  507. (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
  508. (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
  509. ],
  510. 'html-comment-block': [
  511. (_dot + '+', Comment),
  512. (r'\n', Text, 'root'),
  513. ],
  514. 'scaml-comment-block': [
  515. (_dot + '+', Comment.Preproc),
  516. (r'\n', Text, 'root'),
  517. ],
  518. 'filter-block': [
  519. (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
  520. (r'(#\{)(' + _dot + r'*?)(\})',
  521. bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
  522. (r'\n', Text, 'root'),
  523. ],
  524. }
  525. JadeLexer = PugLexer # compat
  526. class UrlEncodedLexer(RegexLexer):
  527. """
  528. Lexer for urlencoded data
  529. """
  530. name = 'urlencoded'
  531. aliases = ['urlencoded']
  532. mimetypes = ['application/x-www-form-urlencoded']
  533. url = 'https://en.wikipedia.org/wiki/Percent-encoding'
  534. version_added = '2.16'
  535. tokens = {
  536. 'root': [
  537. ('([^&=]*)(=)([^=&]*)(&?)', bygroups(Name.Tag, Operator, String, Punctuation)),
  538. ],
  539. }