haskell.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.haskell
  4. ~~~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for Haskell and related languages.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
  11. default, include, inherit
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  13. Number, Punctuation, Generic
  14. from pygments import unistring as uni
  15. __all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
  16. 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
  17. 'LiterateCryptolLexer', 'KokaLexer']
  18. line_re = re.compile('.*?\n')
  19. class HaskellLexer(RegexLexer):
  20. """
  21. A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
  22. .. versionadded:: 0.8
  23. """
  24. name = 'Haskell'
  25. aliases = ['haskell', 'hs']
  26. filenames = ['*.hs']
  27. mimetypes = ['text/x-haskell']
  28. flags = re.MULTILINE | re.UNICODE
  29. reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
  30. 'family', 'if', 'in', 'infix[lr]?', 'instance',
  31. 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
  32. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  33. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  34. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  35. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  36. tokens = {
  37. 'root': [
  38. # Whitespace:
  39. (r'\s+', Text),
  40. # (r'--\s*|.*$', Comment.Doc),
  41. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  42. (r'\{-', Comment.Multiline, 'comment'),
  43. # Lexemes:
  44. # Identifiers
  45. (r'\bimport\b', Keyword.Reserved, 'import'),
  46. (r'\bmodule\b', Keyword.Reserved, 'module'),
  47. (r'\berror\b', Name.Exception),
  48. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  49. (r"'[^\\]'", String.Char), # this has to come before the TH quote
  50. (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
  51. (r"'?[_" + uni.Ll + r"][\w']*", Name),
  52. (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
  53. (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
  54. (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC
  55. (r"(')\([^)]*\)", Keyword.Type), # ..
  56. # Operators
  57. (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
  58. (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  59. (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
  60. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
  61. # Numbers
  62. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
  63. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
  64. r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
  65. (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
  66. (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
  67. (r'0[bB]_*[01](_*[01])*', Number.Bin),
  68. (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
  69. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
  70. (r'\d(_*\d)*', Number.Integer),
  71. # Character/String Literals
  72. (r"'", String.Char, 'character'),
  73. (r'"', String, 'string'),
  74. # Special
  75. (r'\[\]', Keyword.Type),
  76. (r'\(\)', Name.Builtin),
  77. (r'[][(),;`{}]', Punctuation),
  78. ],
  79. 'import': [
  80. # Import statements
  81. (r'\s+', Text),
  82. (r'"', String, 'string'),
  83. # after "funclist" state
  84. (r'\)', Punctuation, '#pop'),
  85. (r'qualified\b', Keyword),
  86. # import X as Y
  87. (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
  88. bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
  89. # import X hiding (functions)
  90. (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
  91. bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
  92. # import X (functions)
  93. (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
  94. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  95. # import X
  96. (r'[\w.]+', Name.Namespace, '#pop'),
  97. ],
  98. 'module': [
  99. (r'\s+', Text),
  100. (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
  101. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  102. (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
  103. ],
  104. 'funclist': [
  105. (r'\s+', Text),
  106. (r'[' + uni.Lu + r']\w*', Keyword.Type),
  107. (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
  108. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  109. (r'\{-', Comment.Multiline, 'comment'),
  110. (r',', Punctuation),
  111. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  112. # (HACK, but it makes sense to push two instances, believe me)
  113. (r'\(', Punctuation, ('funclist', 'funclist')),
  114. (r'\)', Punctuation, '#pop:2'),
  115. ],
  116. # NOTE: the next four states are shared in the AgdaLexer; make sure
  117. # any change is compatible with Agda as well or copy over and change
  118. 'comment': [
  119. # Multiline Comments
  120. (r'[^-{}]+', Comment.Multiline),
  121. (r'\{-', Comment.Multiline, '#push'),
  122. (r'-\}', Comment.Multiline, '#pop'),
  123. (r'[-{}]', Comment.Multiline),
  124. ],
  125. 'character': [
  126. # Allows multi-chars, incorrectly.
  127. (r"[^\\']'", String.Char, '#pop'),
  128. (r"\\", String.Escape, 'escape'),
  129. ("'", String.Char, '#pop'),
  130. ],
  131. 'string': [
  132. (r'[^\\"]+', String),
  133. (r"\\", String.Escape, 'escape'),
  134. ('"', String, '#pop'),
  135. ],
  136. 'escape': [
  137. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  138. (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
  139. ('|'.join(ascii), String.Escape, '#pop'),
  140. (r'o[0-7]+', String.Escape, '#pop'),
  141. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  142. (r'\d+', String.Escape, '#pop'),
  143. (r'\s+\\', String.Escape, '#pop'),
  144. ],
  145. }
  146. class HspecLexer(HaskellLexer):
  147. """
  148. A Haskell lexer with support for Hspec constructs.
  149. .. versionadded:: 2.4.0
  150. """
  151. name = 'Hspec'
  152. aliases = ['hspec']
  153. filenames = []
  154. mimetypes = []
  155. tokens = {
  156. 'root': [
  157. (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
  158. (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
  159. (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
  160. inherit,
  161. ],
  162. }
  163. class IdrisLexer(RegexLexer):
  164. """
  165. A lexer for the dependently typed programming language Idris.
  166. Based on the Haskell and Agda Lexer.
  167. .. versionadded:: 2.0
  168. """
  169. name = 'Idris'
  170. aliases = ['idris', 'idr']
  171. filenames = ['*.idr']
  172. mimetypes = ['text/x-idris']
  173. reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
  174. 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
  175. 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
  176. 'total', 'partial',
  177. 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
  178. 'pattern', 'term', 'syntax', 'prefix',
  179. 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
  180. 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
  181. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  182. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  183. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  184. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  185. directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
  186. 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
  187. tokens = {
  188. 'root': [
  189. # Comments
  190. (r'^(\s*)(%%%s)' % '|'.join(directives),
  191. bygroups(Text, Keyword.Reserved)),
  192. (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
  193. (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
  194. (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
  195. # Declaration
  196. (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
  197. bygroups(Text, Name.Function, Text, Operator.Word, Text)),
  198. # Identifiers
  199. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  200. (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
  201. (r"('')?[A-Z][\w\']*", Keyword.Type),
  202. (r'[a-z][\w\']*', Text),
  203. # Special Symbols
  204. (r'(<-|::|->|=>|=)', Operator.Word), # specials
  205. (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  206. # Numbers
  207. (r'\d+[eE][+-]?\d+', Number.Float),
  208. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  209. (r'0[xX][\da-fA-F]+', Number.Hex),
  210. (r'\d+', Number.Integer),
  211. # Strings
  212. (r"'", String.Char, 'character'),
  213. (r'"', String, 'string'),
  214. (r'[^\s(){}]+', Text),
  215. (r'\s+?', Text), # Whitespace
  216. ],
  217. 'module': [
  218. (r'\s+', Text),
  219. (r'([A-Z][\w.]*)(\s+)(\()',
  220. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  221. (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
  222. ],
  223. 'funclist': [
  224. (r'\s+', Text),
  225. (r'[A-Z]\w*', Keyword.Type),
  226. (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
  227. (r'--.*$', Comment.Single),
  228. (r'\{-', Comment.Multiline, 'comment'),
  229. (r',', Punctuation),
  230. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  231. # (HACK, but it makes sense to push two instances, believe me)
  232. (r'\(', Punctuation, ('funclist', 'funclist')),
  233. (r'\)', Punctuation, '#pop:2'),
  234. ],
  235. # NOTE: the next four states are shared in the AgdaLexer; make sure
  236. # any change is compatible with Agda as well or copy over and change
  237. 'comment': [
  238. # Multiline Comments
  239. (r'[^-{}]+', Comment.Multiline),
  240. (r'\{-', Comment.Multiline, '#push'),
  241. (r'-\}', Comment.Multiline, '#pop'),
  242. (r'[-{}]', Comment.Multiline),
  243. ],
  244. 'character': [
  245. # Allows multi-chars, incorrectly.
  246. (r"[^\\']", String.Char),
  247. (r"\\", String.Escape, 'escape'),
  248. ("'", String.Char, '#pop'),
  249. ],
  250. 'string': [
  251. (r'[^\\"]+', String),
  252. (r"\\", String.Escape, 'escape'),
  253. ('"', String, '#pop'),
  254. ],
  255. 'escape': [
  256. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  257. (r'\^[][A-Z@^_]', String.Escape, '#pop'),
  258. ('|'.join(ascii), String.Escape, '#pop'),
  259. (r'o[0-7]+', String.Escape, '#pop'),
  260. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  261. (r'\d+', String.Escape, '#pop'),
  262. (r'\s+\\', String.Escape, '#pop')
  263. ],
  264. }
  265. class AgdaLexer(RegexLexer):
  266. """
  267. For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
  268. dependently typed functional programming language and proof assistant.
  269. .. versionadded:: 2.0
  270. """
  271. name = 'Agda'
  272. aliases = ['agda']
  273. filenames = ['*.agda']
  274. mimetypes = ['text/x-agda']
  275. reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
  276. 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
  277. 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open',
  278. 'pattern', 'postulate', 'primitive', 'private',
  279. 'quote', 'quoteGoal', 'quoteTerm',
  280. 'record', 'renaming', 'rewrite', 'syntax', 'tactic',
  281. 'unquote', 'unquoteDecl', 'using', 'where', 'with']
  282. tokens = {
  283. 'root': [
  284. # Declaration
  285. (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
  286. bygroups(Text, Name.Function, Text, Operator.Word, Text)),
  287. # Comments
  288. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  289. (r'\{-', Comment.Multiline, 'comment'),
  290. # Holes
  291. (r'\{!', Comment.Directive, 'hole'),
  292. # Lexemes:
  293. # Identifiers
  294. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  295. (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
  296. (u'\\b(Set|Prop)[\u2080-\u2089]*\\b', Keyword.Type),
  297. # Special Symbols
  298. (r'(\(|\)|\{|\})', Operator),
  299. (u'(\\.{1,3}|\\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
  300. # Numbers
  301. (r'\d+[eE][+-]?\d+', Number.Float),
  302. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  303. (r'0[xX][\da-fA-F]+', Number.Hex),
  304. (r'\d+', Number.Integer),
  305. # Strings
  306. (r"'", String.Char, 'character'),
  307. (r'"', String, 'string'),
  308. (r'[^\s(){}]+', Text),
  309. (r'\s+?', Text), # Whitespace
  310. ],
  311. 'hole': [
  312. # Holes
  313. (r'[^!{}]+', Comment.Directive),
  314. (r'\{!', Comment.Directive, '#push'),
  315. (r'!\}', Comment.Directive, '#pop'),
  316. (r'[!{}]', Comment.Directive),
  317. ],
  318. 'module': [
  319. (r'\{-', Comment.Multiline, 'comment'),
  320. (r'[a-zA-Z][\w.]*', Name, '#pop'),
  321. (r'[\W0-9_]+', Text)
  322. ],
  323. 'comment': HaskellLexer.tokens['comment'],
  324. 'character': HaskellLexer.tokens['character'],
  325. 'string': HaskellLexer.tokens['string'],
  326. 'escape': HaskellLexer.tokens['escape']
  327. }
  328. class CryptolLexer(RegexLexer):
  329. """
  330. FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
  331. .. versionadded:: 2.0
  332. """
  333. name = 'Cryptol'
  334. aliases = ['cryptol', 'cry']
  335. filenames = ['*.cry']
  336. mimetypes = ['text/x-cryptol']
  337. reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
  338. 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
  339. 'max', 'min', 'module', 'newtype', 'pragma', 'property',
  340. 'then', 'type', 'where', 'width')
  341. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  342. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  343. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  344. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  345. tokens = {
  346. 'root': [
  347. # Whitespace:
  348. (r'\s+', Text),
  349. # (r'--\s*|.*$', Comment.Doc),
  350. (r'//.*$', Comment.Single),
  351. (r'/\*', Comment.Multiline, 'comment'),
  352. # Lexemes:
  353. # Identifiers
  354. (r'\bimport\b', Keyword.Reserved, 'import'),
  355. (r'\bmodule\b', Keyword.Reserved, 'module'),
  356. (r'\berror\b', Name.Exception),
  357. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  358. (r'^[_a-z][\w\']*', Name.Function),
  359. (r"'?[_a-z][\w']*", Name),
  360. (r"('')?[A-Z][\w\']*", Keyword.Type),
  361. # Operators
  362. (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
  363. (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  364. (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
  365. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
  366. # Numbers
  367. (r'\d+[eE][+-]?\d+', Number.Float),
  368. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  369. (r'0[oO][0-7]+', Number.Oct),
  370. (r'0[xX][\da-fA-F]+', Number.Hex),
  371. (r'\d+', Number.Integer),
  372. # Character/String Literals
  373. (r"'", String.Char, 'character'),
  374. (r'"', String, 'string'),
  375. # Special
  376. (r'\[\]', Keyword.Type),
  377. (r'\(\)', Name.Builtin),
  378. (r'[][(),;`{}]', Punctuation),
  379. ],
  380. 'import': [
  381. # Import statements
  382. (r'\s+', Text),
  383. (r'"', String, 'string'),
  384. # after "funclist" state
  385. (r'\)', Punctuation, '#pop'),
  386. (r'qualified\b', Keyword),
  387. # import X as Y
  388. (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
  389. bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
  390. # import X hiding (functions)
  391. (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
  392. bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
  393. # import X (functions)
  394. (r'([A-Z][\w.]*)(\s+)(\()',
  395. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  396. # import X
  397. (r'[\w.]+', Name.Namespace, '#pop'),
  398. ],
  399. 'module': [
  400. (r'\s+', Text),
  401. (r'([A-Z][\w.]*)(\s+)(\()',
  402. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  403. (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
  404. ],
  405. 'funclist': [
  406. (r'\s+', Text),
  407. (r'[A-Z]\w*', Keyword.Type),
  408. (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
  409. # TODO: these don't match the comments in docs, remove.
  410. # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  411. # (r'{-', Comment.Multiline, 'comment'),
  412. (r',', Punctuation),
  413. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  414. # (HACK, but it makes sense to push two instances, believe me)
  415. (r'\(', Punctuation, ('funclist', 'funclist')),
  416. (r'\)', Punctuation, '#pop:2'),
  417. ],
  418. 'comment': [
  419. # Multiline Comments
  420. (r'[^/*]+', Comment.Multiline),
  421. (r'/\*', Comment.Multiline, '#push'),
  422. (r'\*/', Comment.Multiline, '#pop'),
  423. (r'[*/]', Comment.Multiline),
  424. ],
  425. 'character': [
  426. # Allows multi-chars, incorrectly.
  427. (r"[^\\']'", String.Char, '#pop'),
  428. (r"\\", String.Escape, 'escape'),
  429. ("'", String.Char, '#pop'),
  430. ],
  431. 'string': [
  432. (r'[^\\"]+', String),
  433. (r"\\", String.Escape, 'escape'),
  434. ('"', String, '#pop'),
  435. ],
  436. 'escape': [
  437. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  438. (r'\^[][A-Z@^_]', String.Escape, '#pop'),
  439. ('|'.join(ascii), String.Escape, '#pop'),
  440. (r'o[0-7]+', String.Escape, '#pop'),
  441. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  442. (r'\d+', String.Escape, '#pop'),
  443. (r'\s+\\', String.Escape, '#pop'),
  444. ],
  445. }
  446. EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
  447. 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
  448. 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
  449. 'trace'}
  450. def get_tokens_unprocessed(self, text):
  451. stack = ['root']
  452. for index, token, value in \
  453. RegexLexer.get_tokens_unprocessed(self, text, stack):
  454. if token is Name and value in self.EXTRA_KEYWORDS:
  455. yield index, Name.Builtin, value
  456. else:
  457. yield index, token, value
  458. class LiterateLexer(Lexer):
  459. """
  460. Base class for lexers of literate file formats based on LaTeX or Bird-style
  461. (prefixing each code line with ">").
  462. Additional options accepted:
  463. `litstyle`
  464. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  465. is autodetected: if the first non-whitespace character in the source
  466. is a backslash or percent character, LaTeX is assumed, else Bird.
  467. """
  468. bird_re = re.compile(r'(>[ \t]*)(.*\n)')
  469. def __init__(self, baselexer, **options):
  470. self.baselexer = baselexer
  471. Lexer.__init__(self, **options)
  472. def get_tokens_unprocessed(self, text):
  473. style = self.options.get('litstyle')
  474. if style is None:
  475. style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
  476. code = ''
  477. insertions = []
  478. if style == 'bird':
  479. # bird-style
  480. for match in line_re.finditer(text):
  481. line = match.group()
  482. m = self.bird_re.match(line)
  483. if m:
  484. insertions.append((len(code),
  485. [(0, Comment.Special, m.group(1))]))
  486. code += m.group(2)
  487. else:
  488. insertions.append((len(code), [(0, Text, line)]))
  489. else:
  490. # latex-style
  491. from pygments.lexers.markup import TexLexer
  492. lxlexer = TexLexer(**self.options)
  493. codelines = 0
  494. latex = ''
  495. for match in line_re.finditer(text):
  496. line = match.group()
  497. if codelines:
  498. if line.lstrip().startswith('\\end{code}'):
  499. codelines = 0
  500. latex += line
  501. else:
  502. code += line
  503. elif line.lstrip().startswith('\\begin{code}'):
  504. codelines = 1
  505. latex += line
  506. insertions.append((len(code),
  507. list(lxlexer.get_tokens_unprocessed(latex))))
  508. latex = ''
  509. else:
  510. latex += line
  511. insertions.append((len(code),
  512. list(lxlexer.get_tokens_unprocessed(latex))))
  513. for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)):
  514. yield item
  515. class LiterateHaskellLexer(LiterateLexer):
  516. """
  517. For Literate Haskell (Bird-style or LaTeX) source.
  518. Additional options accepted:
  519. `litstyle`
  520. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  521. is autodetected: if the first non-whitespace character in the source
  522. is a backslash or percent character, LaTeX is assumed, else Bird.
  523. .. versionadded:: 0.9
  524. """
  525. name = 'Literate Haskell'
  526. aliases = ['lhs', 'literate-haskell', 'lhaskell']
  527. filenames = ['*.lhs']
  528. mimetypes = ['text/x-literate-haskell']
  529. def __init__(self, **options):
  530. hslexer = HaskellLexer(**options)
  531. LiterateLexer.__init__(self, hslexer, **options)
  532. class LiterateIdrisLexer(LiterateLexer):
  533. """
  534. For Literate Idris (Bird-style or LaTeX) source.
  535. Additional options accepted:
  536. `litstyle`
  537. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  538. is autodetected: if the first non-whitespace character in the source
  539. is a backslash or percent character, LaTeX is assumed, else Bird.
  540. .. versionadded:: 2.0
  541. """
  542. name = 'Literate Idris'
  543. aliases = ['lidr', 'literate-idris', 'lidris']
  544. filenames = ['*.lidr']
  545. mimetypes = ['text/x-literate-idris']
  546. def __init__(self, **options):
  547. hslexer = IdrisLexer(**options)
  548. LiterateLexer.__init__(self, hslexer, **options)
  549. class LiterateAgdaLexer(LiterateLexer):
  550. """
  551. For Literate Agda source.
  552. Additional options accepted:
  553. `litstyle`
  554. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  555. is autodetected: if the first non-whitespace character in the source
  556. is a backslash or percent character, LaTeX is assumed, else Bird.
  557. .. versionadded:: 2.0
  558. """
  559. name = 'Literate Agda'
  560. aliases = ['lagda', 'literate-agda']
  561. filenames = ['*.lagda']
  562. mimetypes = ['text/x-literate-agda']
  563. def __init__(self, **options):
  564. agdalexer = AgdaLexer(**options)
  565. LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
  566. class LiterateCryptolLexer(LiterateLexer):
  567. """
  568. For Literate Cryptol (Bird-style or LaTeX) source.
  569. Additional options accepted:
  570. `litstyle`
  571. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  572. is autodetected: if the first non-whitespace character in the source
  573. is a backslash or percent character, LaTeX is assumed, else Bird.
  574. .. versionadded:: 2.0
  575. """
  576. name = 'Literate Cryptol'
  577. aliases = ['lcry', 'literate-cryptol', 'lcryptol']
  578. filenames = ['*.lcry']
  579. mimetypes = ['text/x-literate-cryptol']
  580. def __init__(self, **options):
  581. crylexer = CryptolLexer(**options)
  582. LiterateLexer.__init__(self, crylexer, **options)
  583. class KokaLexer(RegexLexer):
  584. """
  585. Lexer for the `Koka <http://koka.codeplex.com>`_
  586. language.
  587. .. versionadded:: 1.6
  588. """
  589. name = 'Koka'
  590. aliases = ['koka']
  591. filenames = ['*.kk', '*.kki']
  592. mimetypes = ['text/x-koka']
  593. keywords = [
  594. 'infix', 'infixr', 'infixl',
  595. 'type', 'cotype', 'rectype', 'alias',
  596. 'struct', 'con',
  597. 'fun', 'function', 'val', 'var',
  598. 'external',
  599. 'if', 'then', 'else', 'elif', 'return', 'match',
  600. 'private', 'public', 'private',
  601. 'module', 'import', 'as',
  602. 'include', 'inline',
  603. 'rec',
  604. 'try', 'yield', 'enum',
  605. 'interface', 'instance',
  606. ]
  607. # keywords that are followed by a type
  608. typeStartKeywords = [
  609. 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
  610. ]
  611. # keywords valid in a type
  612. typekeywords = [
  613. 'forall', 'exists', 'some', 'with',
  614. ]
  615. # builtin names and special names
  616. builtin = [
  617. 'for', 'while', 'repeat',
  618. 'foreach', 'foreach-indexed',
  619. 'error', 'catch', 'finally',
  620. 'cs', 'js', 'file', 'ref', 'assigned',
  621. ]
  622. # symbols that can be in an operator
  623. symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
  624. # symbol boundary: an operator keyword should not be followed by any of these
  625. sboundary = '(?!' + symbols + ')'
  626. # name boundary: a keyword should not be followed by any of these
  627. boundary = r'(?![\w/])'
  628. # koka token abstractions
  629. tokenType = Name.Attribute
  630. tokenTypeDef = Name.Class
  631. tokenConstructor = Generic.Emph
  632. # main lexer
  633. tokens = {
  634. 'root': [
  635. include('whitespace'),
  636. # go into type mode
  637. (r'::?' + sboundary, tokenType, 'type'),
  638. (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  639. 'alias-type'),
  640. (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  641. 'struct-type'),
  642. ((r'(%s)' % '|'.join(typeStartKeywords)) +
  643. r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  644. 'type'),
  645. # special sequences of tokens (we use ?: for non-capturing group as
  646. # required by 'bygroups')
  647. (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
  648. bygroups(Keyword, Text, Keyword, Name.Namespace)),
  649. (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
  650. r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
  651. r'((?:[a-z]\w*/)*[a-z]\w*))?',
  652. bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
  653. Keyword, Name.Namespace)),
  654. (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
  655. r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
  656. bygroups(Keyword, Text, Name.Function)),
  657. (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
  658. r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
  659. bygroups(Keyword, Text, Keyword, Name.Function)),
  660. # keywords
  661. (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
  662. (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
  663. (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
  664. (r'::?|:=|\->|[=.]' + sboundary, Keyword),
  665. # names
  666. (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
  667. bygroups(Name.Namespace, tokenConstructor)),
  668. (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
  669. (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
  670. bygroups(Name.Namespace, Name)),
  671. (r'_\w*', Name.Variable),
  672. # literal string
  673. (r'@"', String.Double, 'litstring'),
  674. # operators
  675. (symbols + "|/(?![*/])", Operator),
  676. (r'`', Operator),
  677. (r'[{}()\[\];,]', Punctuation),
  678. # literals. No check for literal characters with len > 1
  679. (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
  680. (r'0[xX][0-9a-fA-F]+', Number.Hex),
  681. (r'[0-9]+', Number.Integer),
  682. (r"'", String.Char, 'char'),
  683. (r'"', String.Double, 'string'),
  684. ],
  685. # type started by alias
  686. 'alias-type': [
  687. (r'=', Keyword),
  688. include('type')
  689. ],
  690. # type started by struct
  691. 'struct-type': [
  692. (r'(?=\((?!,*\)))', Punctuation, '#pop'),
  693. include('type')
  694. ],
  695. # type started by colon
  696. 'type': [
  697. (r'[(\[<]', tokenType, 'type-nested'),
  698. include('type-content')
  699. ],
  700. # type nested in brackets: can contain parameters, comma etc.
  701. 'type-nested': [
  702. (r'[)\]>]', tokenType, '#pop'),
  703. (r'[(\[<]', tokenType, 'type-nested'),
  704. (r',', tokenType),
  705. (r'([a-z]\w*)(\s*)(:)(?!:)',
  706. bygroups(Name, Text, tokenType)), # parameter name
  707. include('type-content')
  708. ],
  709. # shared contents of a type
  710. 'type-content': [
  711. include('whitespace'),
  712. # keywords
  713. (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
  714. (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
  715. Keyword, '#pop'), # need to match because names overlap...
  716. # kinds
  717. (r'[EPHVX]' + boundary, tokenType),
  718. # type names
  719. (r'[a-z][0-9]*(?![\w/])', tokenType),
  720. (r'_\w*', tokenType.Variable), # Generic.Emph
  721. (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
  722. bygroups(Name.Namespace, tokenType)),
  723. (r'((?:[a-z]\w*/)*)([a-z]\w+)',
  724. bygroups(Name.Namespace, tokenType)),
  725. # type keyword operators
  726. (r'::|->|[.:|]', tokenType),
  727. # catchall
  728. default('#pop')
  729. ],
  730. # comments and literals
  731. 'whitespace': [
  732. (r'\n\s*#.*$', Comment.Preproc),
  733. (r'\s+', Text),
  734. (r'/\*', Comment.Multiline, 'comment'),
  735. (r'//.*$', Comment.Single)
  736. ],
  737. 'comment': [
  738. (r'[^/*]+', Comment.Multiline),
  739. (r'/\*', Comment.Multiline, '#push'),
  740. (r'\*/', Comment.Multiline, '#pop'),
  741. (r'[*/]', Comment.Multiline),
  742. ],
  743. 'litstring': [
  744. (r'[^"]+', String.Double),
  745. (r'""', String.Escape),
  746. (r'"', String.Double, '#pop'),
  747. ],
  748. 'string': [
  749. (r'[^\\"\n]+', String.Double),
  750. include('escape-sequence'),
  751. (r'["\n]', String.Double, '#pop'),
  752. ],
  753. 'char': [
  754. (r'[^\\\'\n]+', String.Char),
  755. include('escape-sequence'),
  756. (r'[\'\n]', String.Char, '#pop'),
  757. ],
  758. 'escape-sequence': [
  759. (r'\\[nrt\\"\']', String.Escape),
  760. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  761. (r'\\u[0-9a-fA-F]{4}', String.Escape),
  762. # Yes, \U literals are 6 hex digits.
  763. (r'\\U[0-9a-fA-F]{6}', String.Escape)
  764. ]
  765. }