haskell.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866
  1. """
  2. pygments.lexers.haskell
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for Haskell and related languages.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
  10. default, include, inherit, line_re
  11. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  12. Number, Punctuation, Generic, Whitespace
  13. from pygments import unistring as uni
  14. __all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
  15. 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
  16. 'LiterateCryptolLexer', 'KokaLexer']
  17. class HaskellLexer(RegexLexer):
  18. """
  19. A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
  20. """
  21. name = 'Haskell'
  22. url = 'https://www.haskell.org/'
  23. aliases = ['haskell', 'hs']
  24. filenames = ['*.hs']
  25. mimetypes = ['text/x-haskell']
  26. version_added = '0.8'
  27. reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
  28. 'family', 'if', 'in', 'infix[lr]?', 'instance',
  29. 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
  30. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  31. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  32. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  33. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  34. tokens = {
  35. 'root': [
  36. # Whitespace:
  37. (r'\s+', Whitespace),
  38. # (r'--\s*|.*$', Comment.Doc),
  39. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  40. (r'\{-', Comment.Multiline, 'comment'),
  41. # Lexemes:
  42. # Identifiers
  43. (r'\bimport\b', Keyword.Reserved, 'import'),
  44. (r'\bmodule\b', Keyword.Reserved, 'module'),
  45. (r'\berror\b', Name.Exception),
  46. (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
  47. (r"'[^\\]'", String.Char), # this has to come before the TH quote
  48. (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
  49. (r"'?[_" + uni.Ll + r"][\w']*", Name),
  50. (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
  51. (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
  52. (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC
  53. (r"(')\([^)]*\)", Keyword.Type), # ..
  54. (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators
  55. # Operators
  56. (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
  57. (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  58. (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
  59. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
  60. # Numbers
  61. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
  62. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
  63. r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
  64. (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
  65. (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
  66. (r'0[bB]_*[01](_*[01])*', Number.Bin),
  67. (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
  68. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
  69. (r'\d(_*\d)*', Number.Integer),
  70. # Character/String Literals
  71. (r"'", String.Char, 'character'),
  72. (r'"', String, 'string'),
  73. # Special
  74. (r'\[\]', Keyword.Type),
  75. (r'\(\)', Name.Builtin),
  76. (r'[][(),;`{}]', Punctuation),
  77. ],
  78. 'import': [
  79. # Import statements
  80. (r'\s+', Whitespace),
  81. (r'"', String, 'string'),
  82. # after "funclist" state
  83. (r'\)', Punctuation, '#pop'),
  84. (r'qualified\b', Keyword),
  85. # import X as Y
  86. (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
  87. bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
  88. # import X hiding (functions)
  89. (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
  90. bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
  91. # import X (functions)
  92. (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
  93. bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
  94. # import X
  95. (r'[\w.]+', Name.Namespace, '#pop'),
  96. ],
  97. 'module': [
  98. (r'\s+', Whitespace),
  99. (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
  100. bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
  101. (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
  102. ],
  103. 'funclist': [
  104. (r'\s+', Whitespace),
  105. (r'[' + uni.Lu + r']\w*', Keyword.Type),
  106. (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
  107. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  108. (r'\{-', Comment.Multiline, 'comment'),
  109. (r',', Punctuation),
  110. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  111. # (HACK, but it makes sense to push two instances, believe me)
  112. (r'\(', Punctuation, ('funclist', 'funclist')),
  113. (r'\)', Punctuation, '#pop:2'),
  114. ],
  115. # NOTE: the next four states are shared in the AgdaLexer; make sure
  116. # any change is compatible with Agda as well or copy over and change
  117. 'comment': [
  118. # Multiline Comments
  119. (r'[^-{}]+', Comment.Multiline),
  120. (r'\{-', Comment.Multiline, '#push'),
  121. (r'-\}', Comment.Multiline, '#pop'),
  122. (r'[-{}]', Comment.Multiline),
  123. ],
  124. 'character': [
  125. # Allows multi-chars, incorrectly.
  126. (r"[^\\']'", String.Char, '#pop'),
  127. (r"\\", String.Escape, 'escape'),
  128. ("'", String.Char, '#pop'),
  129. ],
  130. 'string': [
  131. (r'[^\\"]+', String),
  132. (r"\\", String.Escape, 'escape'),
  133. ('"', String, '#pop'),
  134. ],
  135. 'escape': [
  136. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  137. (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
  138. ('|'.join(ascii), String.Escape, '#pop'),
  139. (r'o[0-7]+', String.Escape, '#pop'),
  140. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  141. (r'\d+', String.Escape, '#pop'),
  142. (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
  143. ],
  144. }
  145. class HspecLexer(HaskellLexer):
  146. """
  147. A Haskell lexer with support for Hspec constructs.
  148. """
  149. name = 'Hspec'
  150. aliases = ['hspec']
  151. filenames = ['*Spec.hs']
  152. mimetypes = []
  153. version_added = '2.4'
  154. tokens = {
  155. 'root': [
  156. (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
  157. (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
  158. (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
  159. inherit,
  160. ],
  161. }
  162. class IdrisLexer(RegexLexer):
  163. """
  164. A lexer for the dependently typed programming language Idris.
  165. Based on the Haskell and Agda Lexer.
  166. """
  167. name = 'Idris'
  168. url = 'https://www.idris-lang.org/'
  169. aliases = ['idris', 'idr']
  170. filenames = ['*.idr']
  171. mimetypes = ['text/x-idris']
  172. version_added = '2.0'
  173. reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
  174. 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
  175. 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
  176. 'total', 'partial',
  177. 'interface', 'implementation', 'export', 'covering', 'constructor',
  178. 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
  179. 'pattern', 'term', 'syntax', 'prefix',
  180. 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
  181. 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
  182. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  183. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  184. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  185. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  186. directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
  187. 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
  188. tokens = {
  189. 'root': [
  190. # Comments
  191. (r'^(\s*)(%({}))'.format('|'.join(directives)),
  192. bygroups(Whitespace, Keyword.Reserved)),
  193. (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)),
  194. (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)),
  195. (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'),
  196. # Declaration
  197. (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
  198. bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),
  199. # Identifiers
  200. (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
  201. (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),
  202. (r"('')?[A-Z][\w\']*", Keyword.Type),
  203. (r'[a-z][\w\']*', Text),
  204. # Special Symbols
  205. (r'(<-|::|->|=>|=)', Operator.Word), # specials
  206. (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  207. # Numbers
  208. (r'\d+[eE][+-]?\d+', Number.Float),
  209. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  210. (r'0[xX][\da-fA-F]+', Number.Hex),
  211. (r'\d+', Number.Integer),
  212. # Strings
  213. (r"'", String.Char, 'character'),
  214. (r'"', String, 'string'),
  215. (r'[^\s(){}]+', Text),
  216. (r'\s+?', Whitespace), # Whitespace
  217. ],
  218. 'module': [
  219. (r'\s+', Whitespace),
  220. (r'([A-Z][\w.]*)(\s+)(\()',
  221. bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
  222. (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
  223. ],
  224. 'funclist': [
  225. (r'\s+', Whitespace),
  226. (r'[A-Z]\w*', Keyword.Type),
  227. (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
  228. (r'--.*$', Comment.Single),
  229. (r'\{-', Comment.Multiline, 'comment'),
  230. (r',', Punctuation),
  231. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  232. # (HACK, but it makes sense to push two instances, believe me)
  233. (r'\(', Punctuation, ('funclist', 'funclist')),
  234. (r'\)', Punctuation, '#pop:2'),
  235. ],
  236. # NOTE: the next four states are shared in the AgdaLexer; make sure
  237. # any change is compatible with Agda as well or copy over and change
  238. 'comment': [
  239. # Multiline Comments
  240. (r'[^-{}]+', Comment.Multiline),
  241. (r'\{-', Comment.Multiline, '#push'),
  242. (r'-\}', Comment.Multiline, '#pop'),
  243. (r'[-{}]', Comment.Multiline),
  244. ],
  245. 'character': [
  246. # Allows multi-chars, incorrectly.
  247. (r"[^\\']", String.Char),
  248. (r"\\", String.Escape, 'escape'),
  249. ("'", String.Char, '#pop'),
  250. ],
  251. 'string': [
  252. (r'[^\\"]+', String),
  253. (r"\\", String.Escape, 'escape'),
  254. ('"', String, '#pop'),
  255. ],
  256. 'escape': [
  257. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  258. (r'\^[][A-Z@^_]', String.Escape, '#pop'),
  259. ('|'.join(ascii), String.Escape, '#pop'),
  260. (r'o[0-7]+', String.Escape, '#pop'),
  261. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  262. (r'\d+', String.Escape, '#pop'),
  263. (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop')
  264. ],
  265. }
  266. class AgdaLexer(RegexLexer):
  267. """
  268. For the Agda dependently typed functional programming language and
  269. proof assistant.
  270. """
  271. name = 'Agda'
  272. url = 'http://wiki.portal.chalmers.se/agda/pmwiki.php'
  273. aliases = ['agda']
  274. filenames = ['*.agda']
  275. mimetypes = ['text/x-agda']
  276. version_added = '2.0'
  277. reserved = (
  278. 'abstract', 'codata', 'coinductive', 'constructor', 'data', 'do',
  279. 'eta-equality', 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
  280. 'infixl', 'infixr', 'instance', 'interleaved', 'let', 'macro', 'mutual',
  281. 'no-eta-equality', 'open', 'overlap', 'pattern', 'postulate', 'primitive',
  282. 'private', 'quote', 'quoteTerm', 'record', 'renaming', 'rewrite',
  283. 'syntax', 'tactic', 'unquote', 'unquoteDecl', 'unquoteDef', 'using',
  284. 'variable', 'where', 'with',
  285. )
  286. tokens = {
  287. 'root': [
  288. # Declaration
  289. (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
  290. bygroups(Whitespace, Name.Function, Whitespace,
  291. Operator.Word, Whitespace)),
  292. # Comments
  293. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  294. (r'\{-', Comment.Multiline, 'comment'),
  295. # Holes
  296. (r'\{!', Comment.Directive, 'hole'),
  297. # Lexemes:
  298. # Identifiers
  299. (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
  300. (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace),
  301. 'module'),
  302. (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),
  303. # Special Symbols
  304. (r'(\(|\)|\{|\})', Operator),
  305. (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
  306. # Numbers
  307. (r'\d+[eE][+-]?\d+', Number.Float),
  308. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  309. (r'0[xX][\da-fA-F]+', Number.Hex),
  310. (r'\d+', Number.Integer),
  311. # Strings
  312. (r"'", String.Char, 'character'),
  313. (r'"', String, 'string'),
  314. (r'[^\s(){}]+', Text),
  315. (r'\s+?', Whitespace), # Whitespace
  316. ],
  317. 'hole': [
  318. # Holes
  319. (r'[^!{}]+', Comment.Directive),
  320. (r'\{!', Comment.Directive, '#push'),
  321. (r'!\}', Comment.Directive, '#pop'),
  322. (r'[!{}]', Comment.Directive),
  323. ],
  324. 'module': [
  325. (r'\{-', Comment.Multiline, 'comment'),
  326. (r'[a-zA-Z][\w.\']*', Name, '#pop'),
  327. (r'[\W0-9_]+', Text)
  328. ],
  329. 'comment': HaskellLexer.tokens['comment'],
  330. 'character': HaskellLexer.tokens['character'],
  331. 'string': HaskellLexer.tokens['string'],
  332. 'escape': HaskellLexer.tokens['escape']
  333. }
  334. class CryptolLexer(RegexLexer):
  335. """
  336. FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
  337. """
  338. name = 'Cryptol'
  339. aliases = ['cryptol', 'cry']
  340. filenames = ['*.cry']
  341. mimetypes = ['text/x-cryptol']
  342. url = 'https://www.cryptol.net'
  343. version_added = '2.0'
  344. reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
  345. 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
  346. 'max', 'min', 'module', 'newtype', 'pragma', 'property',
  347. 'then', 'type', 'where', 'width')
  348. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  349. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  350. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  351. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  352. tokens = {
  353. 'root': [
  354. # Whitespace:
  355. (r'\s+', Whitespace),
  356. # (r'--\s*|.*$', Comment.Doc),
  357. (r'//.*$', Comment.Single),
  358. (r'/\*', Comment.Multiline, 'comment'),
  359. # Lexemes:
  360. # Identifiers
  361. (r'\bimport\b', Keyword.Reserved, 'import'),
  362. (r'\bmodule\b', Keyword.Reserved, 'module'),
  363. (r'\berror\b', Name.Exception),
  364. (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
  365. (r'^[_a-z][\w\']*', Name.Function),
  366. (r"'?[_a-z][\w']*", Name),
  367. (r"('')?[A-Z][\w\']*", Keyword.Type),
  368. # Operators
  369. (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
  370. (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  371. (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
  372. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
  373. # Numbers
  374. (r'\d+[eE][+-]?\d+', Number.Float),
  375. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  376. (r'0[oO][0-7]+', Number.Oct),
  377. (r'0[xX][\da-fA-F]+', Number.Hex),
  378. (r'\d+', Number.Integer),
  379. # Character/String Literals
  380. (r"'", String.Char, 'character'),
  381. (r'"', String, 'string'),
  382. # Special
  383. (r'\[\]', Keyword.Type),
  384. (r'\(\)', Name.Builtin),
  385. (r'[][(),;`{}]', Punctuation),
  386. ],
  387. 'import': [
  388. # Import statements
  389. (r'\s+', Whitespace),
  390. (r'"', String, 'string'),
  391. # after "funclist" state
  392. (r'\)', Punctuation, '#pop'),
  393. (r'qualified\b', Keyword),
  394. # import X as Y
  395. (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
  396. bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
  397. # import X hiding (functions)
  398. (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
  399. bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
  400. # import X (functions)
  401. (r'([A-Z][\w.]*)(\s+)(\()',
  402. bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
  403. # import X
  404. (r'[\w.]+', Name.Namespace, '#pop'),
  405. ],
  406. 'module': [
  407. (r'\s+', Whitespace),
  408. (r'([A-Z][\w.]*)(\s+)(\()',
  409. bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
  410. (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
  411. ],
  412. 'funclist': [
  413. (r'\s+', Whitespace),
  414. (r'[A-Z]\w*', Keyword.Type),
  415. (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
  416. # TODO: these don't match the comments in docs, remove.
  417. # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  418. # (r'{-', Comment.Multiline, 'comment'),
  419. (r',', Punctuation),
  420. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  421. # (HACK, but it makes sense to push two instances, believe me)
  422. (r'\(', Punctuation, ('funclist', 'funclist')),
  423. (r'\)', Punctuation, '#pop:2'),
  424. ],
  425. 'comment': [
  426. # Multiline Comments
  427. (r'[^/*]+', Comment.Multiline),
  428. (r'/\*', Comment.Multiline, '#push'),
  429. (r'\*/', Comment.Multiline, '#pop'),
  430. (r'[*/]', Comment.Multiline),
  431. ],
  432. 'character': [
  433. # Allows multi-chars, incorrectly.
  434. (r"[^\\']'", String.Char, '#pop'),
  435. (r"\\", String.Escape, 'escape'),
  436. ("'", String.Char, '#pop'),
  437. ],
  438. 'string': [
  439. (r'[^\\"]+', String),
  440. (r"\\", String.Escape, 'escape'),
  441. ('"', String, '#pop'),
  442. ],
  443. 'escape': [
  444. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  445. (r'\^[][A-Z@^_]', String.Escape, '#pop'),
  446. ('|'.join(ascii), String.Escape, '#pop'),
  447. (r'o[0-7]+', String.Escape, '#pop'),
  448. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  449. (r'\d+', String.Escape, '#pop'),
  450. (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
  451. ],
  452. }
  453. EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
  454. 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
  455. 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
  456. 'trace'}
  457. def get_tokens_unprocessed(self, text):
  458. stack = ['root']
  459. for index, token, value in \
  460. RegexLexer.get_tokens_unprocessed(self, text, stack):
  461. if token is Name and value in self.EXTRA_KEYWORDS:
  462. yield index, Name.Builtin, value
  463. else:
  464. yield index, token, value
  465. class LiterateLexer(Lexer):
  466. """
  467. Base class for lexers of literate file formats based on LaTeX or Bird-style
  468. (prefixing each code line with ">").
  469. Additional options accepted:
  470. `litstyle`
  471. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  472. is autodetected: if the first non-whitespace character in the source
  473. is a backslash or percent character, LaTeX is assumed, else Bird.
  474. """
  475. bird_re = re.compile(r'(>[ \t]*)(.*\n)')
  476. def __init__(self, baselexer, **options):
  477. self.baselexer = baselexer
  478. Lexer.__init__(self, **options)
  479. def get_tokens_unprocessed(self, text):
  480. style = self.options.get('litstyle')
  481. if style is None:
  482. style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
  483. code = ''
  484. insertions = []
  485. if style == 'bird':
  486. # bird-style
  487. for match in line_re.finditer(text):
  488. line = match.group()
  489. m = self.bird_re.match(line)
  490. if m:
  491. insertions.append((len(code),
  492. [(0, Comment.Special, m.group(1))]))
  493. code += m.group(2)
  494. else:
  495. insertions.append((len(code), [(0, Text, line)]))
  496. else:
  497. # latex-style
  498. from pygments.lexers.markup import TexLexer
  499. lxlexer = TexLexer(**self.options)
  500. codelines = 0
  501. latex = ''
  502. for match in line_re.finditer(text):
  503. line = match.group()
  504. if codelines:
  505. if line.lstrip().startswith('\\end{code}'):
  506. codelines = 0
  507. latex += line
  508. else:
  509. code += line
  510. elif line.lstrip().startswith('\\begin{code}'):
  511. codelines = 1
  512. latex += line
  513. insertions.append((len(code),
  514. list(lxlexer.get_tokens_unprocessed(latex))))
  515. latex = ''
  516. else:
  517. latex += line
  518. insertions.append((len(code),
  519. list(lxlexer.get_tokens_unprocessed(latex))))
  520. yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code))
  521. class LiterateHaskellLexer(LiterateLexer):
  522. """
  523. For Literate Haskell (Bird-style or LaTeX) source.
  524. Additional options accepted:
  525. `litstyle`
  526. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  527. is autodetected: if the first non-whitespace character in the source
  528. is a backslash or percent character, LaTeX is assumed, else Bird.
  529. """
  530. name = 'Literate Haskell'
  531. aliases = ['literate-haskell', 'lhaskell', 'lhs']
  532. filenames = ['*.lhs']
  533. mimetypes = ['text/x-literate-haskell']
  534. url = 'https://wiki.haskell.org/Literate_programming'
  535. version_added = '0.9'
  536. def __init__(self, **options):
  537. hslexer = HaskellLexer(**options)
  538. LiterateLexer.__init__(self, hslexer, **options)
  539. class LiterateIdrisLexer(LiterateLexer):
  540. """
  541. For Literate Idris (Bird-style or LaTeX) source.
  542. Additional options accepted:
  543. `litstyle`
  544. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  545. is autodetected: if the first non-whitespace character in the source
  546. is a backslash or percent character, LaTeX is assumed, else Bird.
  547. """
  548. name = 'Literate Idris'
  549. aliases = ['literate-idris', 'lidris', 'lidr']
  550. filenames = ['*.lidr']
  551. mimetypes = ['text/x-literate-idris']
  552. url = 'https://idris2.readthedocs.io/en/latest/reference/literate.html'
  553. version_added = '2.0'
  554. def __init__(self, **options):
  555. hslexer = IdrisLexer(**options)
  556. LiterateLexer.__init__(self, hslexer, **options)
  557. class LiterateAgdaLexer(LiterateLexer):
  558. """
  559. For Literate Agda source.
  560. Additional options accepted:
  561. `litstyle`
  562. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  563. is autodetected: if the first non-whitespace character in the source
  564. is a backslash or percent character, LaTeX is assumed, else Bird.
  565. """
  566. name = 'Literate Agda'
  567. aliases = ['literate-agda', 'lagda']
  568. filenames = ['*.lagda']
  569. mimetypes = ['text/x-literate-agda']
  570. url = 'https://agda.readthedocs.io/en/latest/tools/literate-programming.html'
  571. version_added = '2.0'
  572. def __init__(self, **options):
  573. agdalexer = AgdaLexer(**options)
  574. LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
  575. class LiterateCryptolLexer(LiterateLexer):
  576. """
  577. For Literate Cryptol (Bird-style or LaTeX) source.
  578. Additional options accepted:
  579. `litstyle`
  580. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  581. is autodetected: if the first non-whitespace character in the source
  582. is a backslash or percent character, LaTeX is assumed, else Bird.
  583. """
  584. name = 'Literate Cryptol'
  585. aliases = ['literate-cryptol', 'lcryptol', 'lcry']
  586. filenames = ['*.lcry']
  587. mimetypes = ['text/x-literate-cryptol']
  588. url = 'https://www.cryptol.net'
  589. version_added = '2.0'
  590. def __init__(self, **options):
  591. crylexer = CryptolLexer(**options)
  592. LiterateLexer.__init__(self, crylexer, **options)
  593. class KokaLexer(RegexLexer):
  594. """
  595. Lexer for the Koka language.
  596. """
  597. name = 'Koka'
  598. url = 'https://koka-lang.github.io/koka/doc/index.html'
  599. aliases = ['koka']
  600. filenames = ['*.kk', '*.kki']
  601. mimetypes = ['text/x-koka']
  602. version_added = '1.6'
  603. keywords = [
  604. 'infix', 'infixr', 'infixl',
  605. 'type', 'cotype', 'rectype', 'alias',
  606. 'struct', 'con',
  607. 'fun', 'function', 'val', 'var',
  608. 'external',
  609. 'if', 'then', 'else', 'elif', 'return', 'match',
  610. 'private', 'public', 'private',
  611. 'module', 'import', 'as',
  612. 'include', 'inline',
  613. 'rec',
  614. 'try', 'yield', 'enum',
  615. 'interface', 'instance',
  616. ]
  617. # keywords that are followed by a type
  618. typeStartKeywords = [
  619. 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
  620. ]
  621. # keywords valid in a type
  622. typekeywords = [
  623. 'forall', 'exists', 'some', 'with',
  624. ]
  625. # builtin names and special names
  626. builtin = [
  627. 'for', 'while', 'repeat',
  628. 'foreach', 'foreach-indexed',
  629. 'error', 'catch', 'finally',
  630. 'cs', 'js', 'file', 'ref', 'assigned',
  631. ]
  632. # symbols that can be in an operator
  633. symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
  634. # symbol boundary: an operator keyword should not be followed by any of these
  635. sboundary = '(?!' + symbols + ')'
  636. # name boundary: a keyword should not be followed by any of these
  637. boundary = r'(?![\w/])'
  638. # koka token abstractions
  639. tokenType = Name.Attribute
  640. tokenTypeDef = Name.Class
  641. tokenConstructor = Generic.Emph
  642. # main lexer
  643. tokens = {
  644. 'root': [
  645. include('whitespace'),
  646. # go into type mode
  647. (r'::?' + sboundary, tokenType, 'type'),
  648. (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
  649. 'alias-type'),
  650. (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
  651. 'struct-type'),
  652. ((r'({})'.format('|'.join(typeStartKeywords))) +
  653. r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
  654. 'type'),
  655. # special sequences of tokens (we use ?: for non-capturing group as
  656. # required by 'bygroups')
  657. (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
  658. bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)),
  659. (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
  660. r'(?:(\s*)(=)(\s*)(qualified)?(\s*)'
  661. r'((?:[a-z]\w*/)*[a-z]\w*))?',
  662. bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace,
  663. Keyword, Whitespace, Name.Namespace)),
  664. (r'^(public|private)?(\s+)?(function|fun|val)'
  665. r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
  666. bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
  667. (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?'
  668. r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
  669. bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
  670. # keywords
  671. (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword.Type),
  672. (r'({})'.format('|'.join(keywords)) + boundary, Keyword),
  673. (r'({})'.format('|'.join(builtin)) + boundary, Keyword.Pseudo),
  674. (r'::?|:=|\->|[=.]' + sboundary, Keyword),
  675. # names
  676. (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
  677. bygroups(Name.Namespace, tokenConstructor)),
  678. (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
  679. (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
  680. bygroups(Name.Namespace, Name)),
  681. (r'_\w*', Name.Variable),
  682. # literal string
  683. (r'@"', String.Double, 'litstring'),
  684. # operators
  685. (symbols + "|/(?![*/])", Operator),
  686. (r'`', Operator),
  687. (r'[{}()\[\];,]', Punctuation),
  688. # literals. No check for literal characters with len > 1
  689. (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
  690. (r'0[xX][0-9a-fA-F]+', Number.Hex),
  691. (r'[0-9]+', Number.Integer),
  692. (r"'", String.Char, 'char'),
  693. (r'"', String.Double, 'string'),
  694. ],
  695. # type started by alias
  696. 'alias-type': [
  697. (r'=', Keyword),
  698. include('type')
  699. ],
  700. # type started by struct
  701. 'struct-type': [
  702. (r'(?=\((?!,*\)))', Punctuation, '#pop'),
  703. include('type')
  704. ],
  705. # type started by colon
  706. 'type': [
  707. (r'[(\[<]', tokenType, 'type-nested'),
  708. include('type-content')
  709. ],
  710. # type nested in brackets: can contain parameters, comma etc.
  711. 'type-nested': [
  712. (r'[)\]>]', tokenType, '#pop'),
  713. (r'[(\[<]', tokenType, 'type-nested'),
  714. (r',', tokenType),
  715. (r'([a-z]\w*)(\s*)(:)(?!:)',
  716. bygroups(Name, Whitespace, tokenType)), # parameter name
  717. include('type-content')
  718. ],
  719. # shared contents of a type
  720. 'type-content': [
  721. include('whitespace'),
  722. # keywords
  723. (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword),
  724. (r'(?=(({})'.format('|'.join(keywords)) + boundary + '))',
  725. Keyword, '#pop'), # need to match because names overlap...
  726. # kinds
  727. (r'[EPHVX]' + boundary, tokenType),
  728. # type names
  729. (r'[a-z][0-9]*(?![\w/])', tokenType),
  730. (r'_\w*', tokenType.Variable), # Generic.Emph
  731. (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
  732. bygroups(Name.Namespace, tokenType)),
  733. (r'((?:[a-z]\w*/)*)([a-z]\w+)',
  734. bygroups(Name.Namespace, tokenType)),
  735. # type keyword operators
  736. (r'::|->|[.:|]', tokenType),
  737. # catchall
  738. default('#pop')
  739. ],
  740. # comments and literals
  741. 'whitespace': [
  742. (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)),
  743. (r'\s+', Whitespace),
  744. (r'/\*', Comment.Multiline, 'comment'),
  745. (r'//.*$', Comment.Single)
  746. ],
  747. 'comment': [
  748. (r'[^/*]+', Comment.Multiline),
  749. (r'/\*', Comment.Multiline, '#push'),
  750. (r'\*/', Comment.Multiline, '#pop'),
  751. (r'[*/]', Comment.Multiline),
  752. ],
  753. 'litstring': [
  754. (r'[^"]+', String.Double),
  755. (r'""', String.Escape),
  756. (r'"', String.Double, '#pop'),
  757. ],
  758. 'string': [
  759. (r'[^\\"\n]+', String.Double),
  760. include('escape-sequence'),
  761. (r'["\n]', String.Double, '#pop'),
  762. ],
  763. 'char': [
  764. (r'[^\\\'\n]+', String.Char),
  765. include('escape-sequence'),
  766. (r'[\'\n]', String.Char, '#pop'),
  767. ],
  768. 'escape-sequence': [
  769. (r'\\[nrt\\"\']', String.Escape),
  770. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  771. (r'\\u[0-9a-fA-F]{4}', String.Escape),
  772. # Yes, \U literals are 6 hex digits.
  773. (r'\\U[0-9a-fA-F]{6}', String.Escape)
  774. ]
  775. }