123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869 |
- # -*- coding: utf-8 -*-
- """
- pygments.lexers.haskell
- ~~~~~~~~~~~~~~~~~~~~~~~
- Lexers for Haskell and related languages.
- :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
- default, include, inherit
- from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation, Generic
- from pygments import unistring as uni
- __all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
- 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
- 'LiterateCryptolLexer', 'KokaLexer']
- line_re = re.compile('.*?\n')
- class HaskellLexer(RegexLexer):
- """
- A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
- .. versionadded:: 0.8
- """
- name = 'Haskell'
- aliases = ['haskell', 'hs']
- filenames = ['*.hs']
- mimetypes = ['text/x-haskell']
- flags = re.MULTILINE | re.UNICODE
- reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
- 'family', 'if', 'in', 'infix[lr]?', 'instance',
- 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
- ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
- 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
- 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
- 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
- tokens = {
- 'root': [
- # Whitespace:
- (r'\s+', Text),
- # (r'--\s*|.*$', Comment.Doc),
- (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
- (r'\{-', Comment.Multiline, 'comment'),
- # Lexemes:
- # Identifiers
- (r'\bimport\b', Keyword.Reserved, 'import'),
- (r'\bmodule\b', Keyword.Reserved, 'module'),
- (r'\berror\b', Name.Exception),
- (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
- (r"'[^\\]'", String.Char), # this has to come before the TH quote
- (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
- (r"'?[_" + uni.Ll + r"][\w']*", Name),
- (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
- (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
- (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC
- (r"(')\([^)]*\)", Keyword.Type), # ..
- # Operators
- (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
- (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
- (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
- (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
- # Numbers
- (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
- (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
- r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
- (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
- (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
- (r'0[bB]_*[01](_*[01])*', Number.Bin),
- (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
- (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
- (r'\d(_*\d)*', Number.Integer),
- # Character/String Literals
- (r"'", String.Char, 'character'),
- (r'"', String, 'string'),
- # Special
- (r'\[\]', Keyword.Type),
- (r'\(\)', Name.Builtin),
- (r'[][(),;`{}]', Punctuation),
- ],
- 'import': [
- # Import statements
- (r'\s+', Text),
- (r'"', String, 'string'),
- # after "funclist" state
- (r'\)', Punctuation, '#pop'),
- (r'qualified\b', Keyword),
- # import X as Y
- (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
- bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
- # import X hiding (functions)
- (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
- bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
- # import X (functions)
- (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
- bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
- # import X
- (r'[\w.]+', Name.Namespace, '#pop'),
- ],
- 'module': [
- (r'\s+', Text),
- (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
- bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
- (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
- ],
- 'funclist': [
- (r'\s+', Text),
- (r'[' + uni.Lu + r']\w*', Keyword.Type),
- (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
- (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
- (r'\{-', Comment.Multiline, 'comment'),
- (r',', Punctuation),
- (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
- # (HACK, but it makes sense to push two instances, believe me)
- (r'\(', Punctuation, ('funclist', 'funclist')),
- (r'\)', Punctuation, '#pop:2'),
- ],
- # NOTE: the next four states are shared in the AgdaLexer; make sure
- # any change is compatible with Agda as well or copy over and change
- 'comment': [
- # Multiline Comments
- (r'[^-{}]+', Comment.Multiline),
- (r'\{-', Comment.Multiline, '#push'),
- (r'-\}', Comment.Multiline, '#pop'),
- (r'[-{}]', Comment.Multiline),
- ],
- 'character': [
- # Allows multi-chars, incorrectly.
- (r"[^\\']'", String.Char, '#pop'),
- (r"\\", String.Escape, 'escape'),
- ("'", String.Char, '#pop'),
- ],
- 'string': [
- (r'[^\\"]+', String),
- (r"\\", String.Escape, 'escape'),
- ('"', String, '#pop'),
- ],
- 'escape': [
- (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
- (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
- ('|'.join(ascii), String.Escape, '#pop'),
- (r'o[0-7]+', String.Escape, '#pop'),
- (r'x[\da-fA-F]+', String.Escape, '#pop'),
- (r'\d+', String.Escape, '#pop'),
- (r'\s+\\', String.Escape, '#pop'),
- ],
- }
- class HspecLexer(HaskellLexer):
- """
- A Haskell lexer with support for Hspec constructs.
- .. versionadded:: 2.4.0
- """
- name = 'Hspec'
- aliases = ['hspec']
- filenames = []
- mimetypes = []
- tokens = {
- 'root': [
- (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
- (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
- (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
- inherit,
- ],
- }
- class IdrisLexer(RegexLexer):
- """
- A lexer for the dependently typed programming language Idris.
- Based on the Haskell and Agda Lexer.
- .. versionadded:: 2.0
- """
- name = 'Idris'
- aliases = ['idris', 'idr']
- filenames = ['*.idr']
- mimetypes = ['text/x-idris']
- reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
- 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
- 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
- 'total', 'partial',
- 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
- 'pattern', 'term', 'syntax', 'prefix',
- 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
- 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
- ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
- 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
- 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
- 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
- directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
- 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
- tokens = {
- 'root': [
- # Comments
- (r'^(\s*)(%%%s)' % '|'.join(directives),
- bygroups(Text, Keyword.Reserved)),
- (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
- (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
- (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
- # Declaration
- (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
- bygroups(Text, Name.Function, Text, Operator.Word, Text)),
- # Identifiers
- (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
- (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
- (r"('')?[A-Z][\w\']*", Keyword.Type),
- (r'[a-z][\w\']*', Text),
- # Special Symbols
- (r'(<-|::|->|=>|=)', Operator.Word), # specials
- (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
- # Numbers
- (r'\d+[eE][+-]?\d+', Number.Float),
- (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
- (r'0[xX][\da-fA-F]+', Number.Hex),
- (r'\d+', Number.Integer),
- # Strings
- (r"'", String.Char, 'character'),
- (r'"', String, 'string'),
- (r'[^\s(){}]+', Text),
- (r'\s+?', Text), # Whitespace
- ],
- 'module': [
- (r'\s+', Text),
- (r'([A-Z][\w.]*)(\s+)(\()',
- bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
- (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
- ],
- 'funclist': [
- (r'\s+', Text),
- (r'[A-Z]\w*', Keyword.Type),
- (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
- (r'--.*$', Comment.Single),
- (r'\{-', Comment.Multiline, 'comment'),
- (r',', Punctuation),
- (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
- # (HACK, but it makes sense to push two instances, believe me)
- (r'\(', Punctuation, ('funclist', 'funclist')),
- (r'\)', Punctuation, '#pop:2'),
- ],
- # NOTE: the next four states are shared in the AgdaLexer; make sure
- # any change is compatible with Agda as well or copy over and change
- 'comment': [
- # Multiline Comments
- (r'[^-{}]+', Comment.Multiline),
- (r'\{-', Comment.Multiline, '#push'),
- (r'-\}', Comment.Multiline, '#pop'),
- (r'[-{}]', Comment.Multiline),
- ],
- 'character': [
- # Allows multi-chars, incorrectly.
- (r"[^\\']", String.Char),
- (r"\\", String.Escape, 'escape'),
- ("'", String.Char, '#pop'),
- ],
- 'string': [
- (r'[^\\"]+', String),
- (r"\\", String.Escape, 'escape'),
- ('"', String, '#pop'),
- ],
- 'escape': [
- (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
- (r'\^[][A-Z@^_]', String.Escape, '#pop'),
- ('|'.join(ascii), String.Escape, '#pop'),
- (r'o[0-7]+', String.Escape, '#pop'),
- (r'x[\da-fA-F]+', String.Escape, '#pop'),
- (r'\d+', String.Escape, '#pop'),
- (r'\s+\\', String.Escape, '#pop')
- ],
- }
- class AgdaLexer(RegexLexer):
- """
- For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
- dependently typed functional programming language and proof assistant.
- .. versionadded:: 2.0
- """
- name = 'Agda'
- aliases = ['agda']
- filenames = ['*.agda']
- mimetypes = ['text/x-agda']
- reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
- 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
- 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open',
- 'pattern', 'postulate', 'primitive', 'private',
- 'quote', 'quoteGoal', 'quoteTerm',
- 'record', 'renaming', 'rewrite', 'syntax', 'tactic',
- 'unquote', 'unquoteDecl', 'using', 'where', 'with']
- tokens = {
- 'root': [
- # Declaration
- (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
- bygroups(Text, Name.Function, Text, Operator.Word, Text)),
- # Comments
- (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
- (r'\{-', Comment.Multiline, 'comment'),
- # Holes
- (r'\{!', Comment.Directive, 'hole'),
- # Lexemes:
- # Identifiers
- (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
- (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
- (u'\\b(Set|Prop)[\u2080-\u2089]*\\b', Keyword.Type),
- # Special Symbols
- (r'(\(|\)|\{|\})', Operator),
- (u'(\\.{1,3}|\\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
- # Numbers
- (r'\d+[eE][+-]?\d+', Number.Float),
- (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
- (r'0[xX][\da-fA-F]+', Number.Hex),
- (r'\d+', Number.Integer),
- # Strings
- (r"'", String.Char, 'character'),
- (r'"', String, 'string'),
- (r'[^\s(){}]+', Text),
- (r'\s+?', Text), # Whitespace
- ],
- 'hole': [
- # Holes
- (r'[^!{}]+', Comment.Directive),
- (r'\{!', Comment.Directive, '#push'),
- (r'!\}', Comment.Directive, '#pop'),
- (r'[!{}]', Comment.Directive),
- ],
- 'module': [
- (r'\{-', Comment.Multiline, 'comment'),
- (r'[a-zA-Z][\w.]*', Name, '#pop'),
- (r'[\W0-9_]+', Text)
- ],
- 'comment': HaskellLexer.tokens['comment'],
- 'character': HaskellLexer.tokens['character'],
- 'string': HaskellLexer.tokens['string'],
- 'escape': HaskellLexer.tokens['escape']
- }
- class CryptolLexer(RegexLexer):
- """
- FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
- .. versionadded:: 2.0
- """
- name = 'Cryptol'
- aliases = ['cryptol', 'cry']
- filenames = ['*.cry']
- mimetypes = ['text/x-cryptol']
- reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
- 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
- 'max', 'min', 'module', 'newtype', 'pragma', 'property',
- 'then', 'type', 'where', 'width')
- ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
- 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
- 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
- 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
- tokens = {
- 'root': [
- # Whitespace:
- (r'\s+', Text),
- # (r'--\s*|.*$', Comment.Doc),
- (r'//.*$', Comment.Single),
- (r'/\*', Comment.Multiline, 'comment'),
- # Lexemes:
- # Identifiers
- (r'\bimport\b', Keyword.Reserved, 'import'),
- (r'\bmodule\b', Keyword.Reserved, 'module'),
- (r'\berror\b', Name.Exception),
- (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
- (r'^[_a-z][\w\']*', Name.Function),
- (r"'?[_a-z][\w']*", Name),
- (r"('')?[A-Z][\w\']*", Keyword.Type),
- # Operators
- (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
- (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
- (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
- (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
- # Numbers
- (r'\d+[eE][+-]?\d+', Number.Float),
- (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
- (r'0[oO][0-7]+', Number.Oct),
- (r'0[xX][\da-fA-F]+', Number.Hex),
- (r'\d+', Number.Integer),
- # Character/String Literals
- (r"'", String.Char, 'character'),
- (r'"', String, 'string'),
- # Special
- (r'\[\]', Keyword.Type),
- (r'\(\)', Name.Builtin),
- (r'[][(),;`{}]', Punctuation),
- ],
- 'import': [
- # Import statements
- (r'\s+', Text),
- (r'"', String, 'string'),
- # after "funclist" state
- (r'\)', Punctuation, '#pop'),
- (r'qualified\b', Keyword),
- # import X as Y
- (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
- bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
- # import X hiding (functions)
- (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
- bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
- # import X (functions)
- (r'([A-Z][\w.]*)(\s+)(\()',
- bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
- # import X
- (r'[\w.]+', Name.Namespace, '#pop'),
- ],
- 'module': [
- (r'\s+', Text),
- (r'([A-Z][\w.]*)(\s+)(\()',
- bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
- (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
- ],
- 'funclist': [
- (r'\s+', Text),
- (r'[A-Z]\w*', Keyword.Type),
- (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
- # TODO: these don't match the comments in docs, remove.
- # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
- # (r'{-', Comment.Multiline, 'comment'),
- (r',', Punctuation),
- (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
- # (HACK, but it makes sense to push two instances, believe me)
- (r'\(', Punctuation, ('funclist', 'funclist')),
- (r'\)', Punctuation, '#pop:2'),
- ],
- 'comment': [
- # Multiline Comments
- (r'[^/*]+', Comment.Multiline),
- (r'/\*', Comment.Multiline, '#push'),
- (r'\*/', Comment.Multiline, '#pop'),
- (r'[*/]', Comment.Multiline),
- ],
- 'character': [
- # Allows multi-chars, incorrectly.
- (r"[^\\']'", String.Char, '#pop'),
- (r"\\", String.Escape, 'escape'),
- ("'", String.Char, '#pop'),
- ],
- 'string': [
- (r'[^\\"]+', String),
- (r"\\", String.Escape, 'escape'),
- ('"', String, '#pop'),
- ],
- 'escape': [
- (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
- (r'\^[][A-Z@^_]', String.Escape, '#pop'),
- ('|'.join(ascii), String.Escape, '#pop'),
- (r'o[0-7]+', String.Escape, '#pop'),
- (r'x[\da-fA-F]+', String.Escape, '#pop'),
- (r'\d+', String.Escape, '#pop'),
- (r'\s+\\', String.Escape, '#pop'),
- ],
- }
- EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
- 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
- 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
- 'trace'}
- def get_tokens_unprocessed(self, text):
- stack = ['root']
- for index, token, value in \
- RegexLexer.get_tokens_unprocessed(self, text, stack):
- if token is Name and value in self.EXTRA_KEYWORDS:
- yield index, Name.Builtin, value
- else:
- yield index, token, value
- class LiterateLexer(Lexer):
- """
- Base class for lexers of literate file formats based on LaTeX or Bird-style
- (prefixing each code line with ">").
- Additional options accepted:
- `litstyle`
- If given, must be ``"bird"`` or ``"latex"``. If not given, the style
- is autodetected: if the first non-whitespace character in the source
- is a backslash or percent character, LaTeX is assumed, else Bird.
- """
- bird_re = re.compile(r'(>[ \t]*)(.*\n)')
- def __init__(self, baselexer, **options):
- self.baselexer = baselexer
- Lexer.__init__(self, **options)
- def get_tokens_unprocessed(self, text):
- style = self.options.get('litstyle')
- if style is None:
- style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
- code = ''
- insertions = []
- if style == 'bird':
- # bird-style
- for match in line_re.finditer(text):
- line = match.group()
- m = self.bird_re.match(line)
- if m:
- insertions.append((len(code),
- [(0, Comment.Special, m.group(1))]))
- code += m.group(2)
- else:
- insertions.append((len(code), [(0, Text, line)]))
- else:
- # latex-style
- from pygments.lexers.markup import TexLexer
- lxlexer = TexLexer(**self.options)
- codelines = 0
- latex = ''
- for match in line_re.finditer(text):
- line = match.group()
- if codelines:
- if line.lstrip().startswith('\\end{code}'):
- codelines = 0
- latex += line
- else:
- code += line
- elif line.lstrip().startswith('\\begin{code}'):
- codelines = 1
- latex += line
- insertions.append((len(code),
- list(lxlexer.get_tokens_unprocessed(latex))))
- latex = ''
- else:
- latex += line
- insertions.append((len(code),
- list(lxlexer.get_tokens_unprocessed(latex))))
- for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)):
- yield item
- class LiterateHaskellLexer(LiterateLexer):
- """
- For Literate Haskell (Bird-style or LaTeX) source.
- Additional options accepted:
- `litstyle`
- If given, must be ``"bird"`` or ``"latex"``. If not given, the style
- is autodetected: if the first non-whitespace character in the source
- is a backslash or percent character, LaTeX is assumed, else Bird.
- .. versionadded:: 0.9
- """
- name = 'Literate Haskell'
- aliases = ['lhs', 'literate-haskell', 'lhaskell']
- filenames = ['*.lhs']
- mimetypes = ['text/x-literate-haskell']
- def __init__(self, **options):
- hslexer = HaskellLexer(**options)
- LiterateLexer.__init__(self, hslexer, **options)
- class LiterateIdrisLexer(LiterateLexer):
- """
- For Literate Idris (Bird-style or LaTeX) source.
- Additional options accepted:
- `litstyle`
- If given, must be ``"bird"`` or ``"latex"``. If not given, the style
- is autodetected: if the first non-whitespace character in the source
- is a backslash or percent character, LaTeX is assumed, else Bird.
- .. versionadded:: 2.0
- """
- name = 'Literate Idris'
- aliases = ['lidr', 'literate-idris', 'lidris']
- filenames = ['*.lidr']
- mimetypes = ['text/x-literate-idris']
- def __init__(self, **options):
- hslexer = IdrisLexer(**options)
- LiterateLexer.__init__(self, hslexer, **options)
- class LiterateAgdaLexer(LiterateLexer):
- """
- For Literate Agda source.
- Additional options accepted:
- `litstyle`
- If given, must be ``"bird"`` or ``"latex"``. If not given, the style
- is autodetected: if the first non-whitespace character in the source
- is a backslash or percent character, LaTeX is assumed, else Bird.
- .. versionadded:: 2.0
- """
- name = 'Literate Agda'
- aliases = ['lagda', 'literate-agda']
- filenames = ['*.lagda']
- mimetypes = ['text/x-literate-agda']
- def __init__(self, **options):
- agdalexer = AgdaLexer(**options)
- LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
- class LiterateCryptolLexer(LiterateLexer):
- """
- For Literate Cryptol (Bird-style or LaTeX) source.
- Additional options accepted:
- `litstyle`
- If given, must be ``"bird"`` or ``"latex"``. If not given, the style
- is autodetected: if the first non-whitespace character in the source
- is a backslash or percent character, LaTeX is assumed, else Bird.
- .. versionadded:: 2.0
- """
- name = 'Literate Cryptol'
- aliases = ['lcry', 'literate-cryptol', 'lcryptol']
- filenames = ['*.lcry']
- mimetypes = ['text/x-literate-cryptol']
- def __init__(self, **options):
- crylexer = CryptolLexer(**options)
- LiterateLexer.__init__(self, crylexer, **options)
- class KokaLexer(RegexLexer):
- """
- Lexer for the `Koka <http://koka.codeplex.com>`_
- language.
- .. versionadded:: 1.6
- """
- name = 'Koka'
- aliases = ['koka']
- filenames = ['*.kk', '*.kki']
- mimetypes = ['text/x-koka']
- keywords = [
- 'infix', 'infixr', 'infixl',
- 'type', 'cotype', 'rectype', 'alias',
- 'struct', 'con',
- 'fun', 'function', 'val', 'var',
- 'external',
- 'if', 'then', 'else', 'elif', 'return', 'match',
- 'private', 'public', 'private',
- 'module', 'import', 'as',
- 'include', 'inline',
- 'rec',
- 'try', 'yield', 'enum',
- 'interface', 'instance',
- ]
- # keywords that are followed by a type
- typeStartKeywords = [
- 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
- ]
- # keywords valid in a type
- typekeywords = [
- 'forall', 'exists', 'some', 'with',
- ]
- # builtin names and special names
- builtin = [
- 'for', 'while', 'repeat',
- 'foreach', 'foreach-indexed',
- 'error', 'catch', 'finally',
- 'cs', 'js', 'file', 'ref', 'assigned',
- ]
- # symbols that can be in an operator
- symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
- # symbol boundary: an operator keyword should not be followed by any of these
- sboundary = '(?!' + symbols + ')'
- # name boundary: a keyword should not be followed by any of these
- boundary = r'(?![\w/])'
- # koka token abstractions
- tokenType = Name.Attribute
- tokenTypeDef = Name.Class
- tokenConstructor = Generic.Emph
- # main lexer
- tokens = {
- 'root': [
- include('whitespace'),
- # go into type mode
- (r'::?' + sboundary, tokenType, 'type'),
- (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
- 'alias-type'),
- (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
- 'struct-type'),
- ((r'(%s)' % '|'.join(typeStartKeywords)) +
- r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
- 'type'),
- # special sequences of tokens (we use ?: for non-capturing group as
- # required by 'bygroups')
- (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
- bygroups(Keyword, Text, Keyword, Name.Namespace)),
- (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
- r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
- r'((?:[a-z]\w*/)*[a-z]\w*))?',
- bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
- Keyword, Name.Namespace)),
- (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
- r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
- bygroups(Keyword, Text, Name.Function)),
- (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
- r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
- bygroups(Keyword, Text, Keyword, Name.Function)),
- # keywords
- (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
- (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
- (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
- (r'::?|:=|\->|[=.]' + sboundary, Keyword),
- # names
- (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
- bygroups(Name.Namespace, tokenConstructor)),
- (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
- (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
- bygroups(Name.Namespace, Name)),
- (r'_\w*', Name.Variable),
- # literal string
- (r'@"', String.Double, 'litstring'),
- # operators
- (symbols + "|/(?![*/])", Operator),
- (r'`', Operator),
- (r'[{}()\[\];,]', Punctuation),
- # literals. No check for literal characters with len > 1
- (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
- (r'0[xX][0-9a-fA-F]+', Number.Hex),
- (r'[0-9]+', Number.Integer),
- (r"'", String.Char, 'char'),
- (r'"', String.Double, 'string'),
- ],
- # type started by alias
- 'alias-type': [
- (r'=', Keyword),
- include('type')
- ],
- # type started by struct
- 'struct-type': [
- (r'(?=\((?!,*\)))', Punctuation, '#pop'),
- include('type')
- ],
- # type started by colon
- 'type': [
- (r'[(\[<]', tokenType, 'type-nested'),
- include('type-content')
- ],
- # type nested in brackets: can contain parameters, comma etc.
- 'type-nested': [
- (r'[)\]>]', tokenType, '#pop'),
- (r'[(\[<]', tokenType, 'type-nested'),
- (r',', tokenType),
- (r'([a-z]\w*)(\s*)(:)(?!:)',
- bygroups(Name, Text, tokenType)), # parameter name
- include('type-content')
- ],
- # shared contents of a type
- 'type-content': [
- include('whitespace'),
- # keywords
- (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
- (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
- Keyword, '#pop'), # need to match because names overlap...
- # kinds
- (r'[EPHVX]' + boundary, tokenType),
- # type names
- (r'[a-z][0-9]*(?![\w/])', tokenType),
- (r'_\w*', tokenType.Variable), # Generic.Emph
- (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
- bygroups(Name.Namespace, tokenType)),
- (r'((?:[a-z]\w*/)*)([a-z]\w+)',
- bygroups(Name.Namespace, tokenType)),
- # type keyword operators
- (r'::|->|[.:|]', tokenType),
- # catchall
- default('#pop')
- ],
- # comments and literals
- 'whitespace': [
- (r'\n\s*#.*$', Comment.Preproc),
- (r'\s+', Text),
- (r'/\*', Comment.Multiline, 'comment'),
- (r'//.*$', Comment.Single)
- ],
- 'comment': [
- (r'[^/*]+', Comment.Multiline),
- (r'/\*', Comment.Multiline, '#push'),
- (r'\*/', Comment.Multiline, '#pop'),
- (r'[*/]', Comment.Multiline),
- ],
- 'litstring': [
- (r'[^"]+', String.Double),
- (r'""', String.Escape),
- (r'"', String.Double, '#pop'),
- ],
- 'string': [
- (r'[^\\"\n]+', String.Double),
- include('escape-sequence'),
- (r'["\n]', String.Double, '#pop'),
- ],
- 'char': [
- (r'[^\\\'\n]+', String.Char),
- include('escape-sequence'),
- (r'[\'\n]', String.Char, '#pop'),
- ],
- 'escape-sequence': [
- (r'\\[nrt\\"\']', String.Escape),
- (r'\\x[0-9a-fA-F]{2}', String.Escape),
- (r'\\u[0-9a-fA-F]{4}', String.Escape),
- # Yes, \U literals are 6 hex digits.
- (r'\\U[0-9a-fA-F]{6}', String.Escape)
- ]
- }
|