123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704 |
- """
- pygments.lexers.mojo
- ~~~~~~~~~~~~~~~~~~~~
- Lexers for Mojo and related languages.
- :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import keyword
- from pygments import unistring as uni
- from pygments.lexer import (
- RegexLexer,
- bygroups,
- combined,
- default,
- include,
- this,
- using,
- words,
- )
- from pygments.token import (
- Comment,
- # Error,
- Keyword,
- Name,
- Number,
- Operator,
- Punctuation,
- String,
- Text,
- Whitespace,
- )
- from pygments.util import shebang_matches
- __all__ = ["MojoLexer"]
- class MojoLexer(RegexLexer):
- """
- For Mojo source code (version 24.2.1).
- """
- name = "Mojo"
- url = "https://docs.modular.com/mojo/"
- aliases = ["mojo", "🔥"]
- filenames = [
- "*.mojo",
- "*.🔥",
- ]
- mimetypes = [
- "text/x-mojo",
- "application/x-mojo",
- ]
- version_added = "2.18"
- uni_name = f"[{uni.xid_start}][{uni.xid_continue}]*"
- def innerstring_rules(ttype):
- return [
- # the old style '%s' % (...) string formatting (still valid in Py3)
- (
- r"%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?"
- "[hlL]?[E-GXc-giorsaux%]",
- String.Interpol,
- ),
- # the new style '{}'.format(...) string formatting
- (
- r"\{"
- r"((\w+)((\.\w+)|(\[[^\]]+\]))*)?" # field name
- r"(\![sra])?" # conversion
- r"(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?"
- r"\}",
- String.Interpol,
- ),
- # backslashes, quotes and formatting signs must be parsed one at a time
- (r'[^\\\'"%{\n]+', ttype),
- (r'[\'"\\]', ttype),
- # unhandled string formatting sign
- (r"%|(\{{1,2})", ttype),
- # newlines are an error (use "nl" state)
- ]
- def fstring_rules(ttype):
- return [
- # Assuming that a '}' is the closing brace after format specifier.
- # Sadly, this means that we won't detect syntax error. But it's
- # more important to parse correct syntax correctly, than to
- # highlight invalid syntax.
- (r"\}", String.Interpol),
- (r"\{", String.Interpol, "expr-inside-fstring"),
- # backslashes, quotes and formatting signs must be parsed one at a time
- (r'[^\\\'"{}\n]+', ttype),
- (r'[\'"\\]', ttype),
- # newlines are an error (use "nl" state)
- ]
- tokens = {
- "root": [
- (r"\s+", Whitespace),
- (
- r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
- bygroups(Whitespace, String.Affix, String.Doc),
- ),
- (
- r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
- bygroups(Whitespace, String.Affix, String.Doc),
- ),
- (r"\A#!.+$", Comment.Hashbang),
- (r"#.*$", Comment.Single),
- (r"\\\n", Whitespace),
- (r"\\", Whitespace),
- include("keywords"),
- include("soft-keywords"),
- # In the original PR, all the below here used ((?:\s|\\\s)+) to
- # designate whitespace, but I can't find any example of this being
- # needed in the example file, so we're replacing it with `\s+`.
- (
- r"(alias)(\s+)",
- bygroups(Keyword, Whitespace),
- "varname", # TODO varname the right fit?
- ),
- (r"(var)(\s+)", bygroups(Keyword, Whitespace), "varname"),
- (r"(def)(\s+)", bygroups(Keyword, Whitespace), "funcname"),
- (r"(fn)(\s+)", bygroups(Keyword, Whitespace), "funcname"),
- (
- r"(class)(\s+)",
- bygroups(Keyword, Whitespace),
- "classname",
- ), # not implemented yet
- (r"(struct)(\s+)", bygroups(Keyword, Whitespace), "structname"),
- (r"(trait)(\s+)", bygroups(Keyword, Whitespace), "structname"),
- (r"(from)(\s+)", bygroups(Keyword.Namespace, Whitespace), "fromimport"),
- (r"(import)(\s+)", bygroups(Keyword.Namespace, Whitespace), "import"),
- include("expr"),
- ],
- "expr": [
- # raw f-strings
- (
- '(?i)(rf|fr)(""")',
- bygroups(String.Affix, String.Double),
- combined("rfstringescape", "tdqf"),
- ),
- (
- "(?i)(rf|fr)(''')",
- bygroups(String.Affix, String.Single),
- combined("rfstringescape", "tsqf"),
- ),
- (
- '(?i)(rf|fr)(")',
- bygroups(String.Affix, String.Double),
- combined("rfstringescape", "dqf"),
- ),
- (
- "(?i)(rf|fr)(')",
- bygroups(String.Affix, String.Single),
- combined("rfstringescape", "sqf"),
- ),
- # non-raw f-strings
- (
- '([fF])(""")',
- bygroups(String.Affix, String.Double),
- combined("fstringescape", "tdqf"),
- ),
- (
- "([fF])(''')",
- bygroups(String.Affix, String.Single),
- combined("fstringescape", "tsqf"),
- ),
- (
- '([fF])(")',
- bygroups(String.Affix, String.Double),
- combined("fstringescape", "dqf"),
- ),
- (
- "([fF])(')",
- bygroups(String.Affix, String.Single),
- combined("fstringescape", "sqf"),
- ),
- # raw bytes and strings
- ('(?i)(rb|br|r)(""")', bygroups(String.Affix, String.Double), "tdqs"),
- ("(?i)(rb|br|r)(''')", bygroups(String.Affix, String.Single), "tsqs"),
- ('(?i)(rb|br|r)(")', bygroups(String.Affix, String.Double), "dqs"),
- ("(?i)(rb|br|r)(')", bygroups(String.Affix, String.Single), "sqs"),
- # non-raw strings
- (
- '([uU]?)(""")',
- bygroups(String.Affix, String.Double),
- combined("stringescape", "tdqs"),
- ),
- (
- "([uU]?)(''')",
- bygroups(String.Affix, String.Single),
- combined("stringescape", "tsqs"),
- ),
- (
- '([uU]?)(")',
- bygroups(String.Affix, String.Double),
- combined("stringescape", "dqs"),
- ),
- (
- "([uU]?)(')",
- bygroups(String.Affix, String.Single),
- combined("stringescape", "sqs"),
- ),
- # non-raw bytes
- (
- '([bB])(""")',
- bygroups(String.Affix, String.Double),
- combined("bytesescape", "tdqs"),
- ),
- (
- "([bB])(''')",
- bygroups(String.Affix, String.Single),
- combined("bytesescape", "tsqs"),
- ),
- (
- '([bB])(")',
- bygroups(String.Affix, String.Double),
- combined("bytesescape", "dqs"),
- ),
- (
- "([bB])(')",
- bygroups(String.Affix, String.Single),
- combined("bytesescape", "sqs"),
- ),
- (r"[^\S\n]+", Text),
- include("numbers"),
- (r"!=|==|<<|>>|:=|[-~+/*%=<>&^|.]", Operator),
- (r"([]{}:\(\),;[])+", Punctuation),
- (r"(in|is|and|or|not)\b", Operator.Word),
- include("expr-keywords"),
- include("builtins"),
- include("magicfuncs"),
- include("magicvars"),
- include("name"),
- ],
- "expr-inside-fstring": [
- (r"[{([]", Punctuation, "expr-inside-fstring-inner"),
- # without format specifier
- (
- r"(=\s*)?" # debug (https://bugs.python.org/issue36817)
- r"(\![sraf])?" # conversion
- r"\}",
- String.Interpol,
- "#pop",
- ),
- # with format specifier
- # we'll catch the remaining '}' in the outer scope
- (
- r"(=\s*)?" # debug (https://bugs.python.org/issue36817)
- r"(\![sraf])?" # conversion
- r":",
- String.Interpol,
- "#pop",
- ),
- (r"\s+", Whitespace), # allow new lines
- include("expr"),
- ],
- "expr-inside-fstring-inner": [
- (r"[{([]", Punctuation, "expr-inside-fstring-inner"),
- (r"[])}]", Punctuation, "#pop"),
- (r"\s+", Whitespace), # allow new lines
- include("expr"),
- ],
- "expr-keywords": [
- # Based on https://docs.python.org/3/reference/expressions.html
- (
- words(
- (
- "async for", # TODO https://docs.modular.com/mojo/roadmap#no-async-for-or-async-with
- "async with", # TODO https://docs.modular.com/mojo/roadmap#no-async-for-or-async-with
- "await",
- "else",
- "for",
- "if",
- "lambda",
- "yield",
- "yield from",
- ),
- suffix=r"\b",
- ),
- Keyword,
- ),
- (words(("True", "False", "None"), suffix=r"\b"), Keyword.Constant),
- ],
- "keywords": [
- (
- words(
- (
- "assert",
- "async",
- "await",
- "borrowed",
- "break",
- "continue",
- "del",
- "elif",
- "else",
- "except",
- "finally",
- "for",
- "global",
- "if",
- "lambda",
- "pass",
- "raise",
- "nonlocal",
- "return",
- "try",
- "while",
- "yield",
- "yield from",
- "as",
- "with",
- ),
- suffix=r"\b",
- ),
- Keyword,
- ),
- (words(("True", "False", "None"), suffix=r"\b"), Keyword.Constant),
- ],
- "soft-keywords": [
- # `match`, `case` and `_` soft keywords
- (
- r"(^[ \t]*)" # at beginning of line + possible indentation
- r"(match|case)\b" # a possible keyword
- r"(?![ \t]*(?:" # not followed by...
- r"[:,;=^&|@~)\]}]|(?:" + # characters and keywords that mean this isn't
- # pattern matching (but None/True/False is ok)
- r"|".join(k for k in keyword.kwlist if k[0].islower())
- + r")\b))",
- bygroups(Whitespace, Keyword),
- "soft-keywords-inner",
- ),
- ],
- "soft-keywords-inner": [
- # optional `_` keyword
- (r"(\s+)([^\n_]*)(_\b)", bygroups(Whitespace, using(this), Keyword)),
- default("#pop"),
- ],
- "builtins": [
- (
- words(
- (
- "__import__",
- "abs",
- "aiter",
- "all",
- "any",
- "bin",
- "bool",
- "bytearray",
- "breakpoint",
- "bytes",
- "callable",
- "chr",
- "classmethod",
- "compile",
- "complex",
- "delattr",
- "dict",
- "dir",
- "divmod",
- "enumerate",
- "eval",
- "filter",
- "float",
- "format",
- "frozenset",
- "getattr",
- "globals",
- "hasattr",
- "hash",
- "hex",
- "id",
- "input",
- "int",
- "isinstance",
- "issubclass",
- "iter",
- "len",
- "list",
- "locals",
- "map",
- "max",
- "memoryview",
- "min",
- "next",
- "object",
- "oct",
- "open",
- "ord",
- "pow",
- "print",
- "property",
- "range",
- "repr",
- "reversed",
- "round",
- "set",
- "setattr",
- "slice",
- "sorted",
- "staticmethod",
- "str",
- "sum",
- "super",
- "tuple",
- "type",
- "vars",
- "zip",
- # Mojo builtin types: https://docs.modular.com/mojo/stdlib/builtin/
- "AnyType",
- "Coroutine",
- "DType",
- "Error",
- "Int",
- "List",
- "ListLiteral",
- "Scalar",
- "Int8",
- "UInt8",
- "Int16",
- "UInt16",
- "Int32",
- "UInt32",
- "Int64",
- "UInt64",
- "BFloat16",
- "Float16",
- "Float32",
- "Float64",
- "SIMD",
- "String",
- "Tensor",
- "Tuple",
- "Movable",
- "Copyable",
- "CollectionElement",
- ),
- prefix=r"(?<!\.)",
- suffix=r"\b",
- ),
- Name.Builtin,
- ),
- (r"(?<!\.)(self|Ellipsis|NotImplemented|cls)\b", Name.Builtin.Pseudo),
- (
- words(
- ("Error",),
- prefix=r"(?<!\.)",
- suffix=r"\b",
- ),
- Name.Exception,
- ),
- ],
- "magicfuncs": [
- (
- words(
- (
- "__abs__",
- "__add__",
- "__aenter__",
- "__aexit__",
- "__aiter__",
- "__and__",
- "__anext__",
- "__await__",
- "__bool__",
- "__bytes__",
- "__call__",
- "__complex__",
- "__contains__",
- "__del__",
- "__delattr__",
- "__delete__",
- "__delitem__",
- "__dir__",
- "__divmod__",
- "__enter__",
- "__eq__",
- "__exit__",
- "__float__",
- "__floordiv__",
- "__format__",
- "__ge__",
- "__get__",
- "__getattr__",
- "__getattribute__",
- "__getitem__",
- "__gt__",
- "__hash__",
- "__iadd__",
- "__iand__",
- "__ifloordiv__",
- "__ilshift__",
- "__imatmul__",
- "__imod__",
- "__imul__",
- "__index__",
- "__init__",
- "__instancecheck__",
- "__int__",
- "__invert__",
- "__ior__",
- "__ipow__",
- "__irshift__",
- "__isub__",
- "__iter__",
- "__itruediv__",
- "__ixor__",
- "__le__",
- "__len__",
- "__length_hint__",
- "__lshift__",
- "__lt__",
- "__matmul__",
- "__missing__",
- "__mod__",
- "__mul__",
- "__ne__",
- "__neg__",
- "__new__",
- "__next__",
- "__or__",
- "__pos__",
- "__pow__",
- "__prepare__",
- "__radd__",
- "__rand__",
- "__rdivmod__",
- "__repr__",
- "__reversed__",
- "__rfloordiv__",
- "__rlshift__",
- "__rmatmul__",
- "__rmod__",
- "__rmul__",
- "__ror__",
- "__round__",
- "__rpow__",
- "__rrshift__",
- "__rshift__",
- "__rsub__",
- "__rtruediv__",
- "__rxor__",
- "__set__",
- "__setattr__",
- "__setitem__",
- "__str__",
- "__sub__",
- "__subclasscheck__",
- "__truediv__",
- "__xor__",
- ),
- suffix=r"\b",
- ),
- Name.Function.Magic,
- ),
- ],
- "magicvars": [
- (
- words(
- (
- "__annotations__",
- "__bases__",
- "__class__",
- "__closure__",
- "__code__",
- "__defaults__",
- "__dict__",
- "__doc__",
- "__file__",
- "__func__",
- "__globals__",
- "__kwdefaults__",
- "__module__",
- "__mro__",
- "__name__",
- "__objclass__",
- "__qualname__",
- "__self__",
- "__slots__",
- "__weakref__",
- ),
- suffix=r"\b",
- ),
- Name.Variable.Magic,
- ),
- ],
- "numbers": [
- (
- r"(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)"
- r"([eE][+-]?\d(?:_?\d)*)?",
- Number.Float,
- ),
- (r"\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?", Number.Float),
- (r"0[oO](?:_?[0-7])+", Number.Oct),
- (r"0[bB](?:_?[01])+", Number.Bin),
- (r"0[xX](?:_?[a-fA-F0-9])+", Number.Hex),
- (r"\d(?:_?\d)*", Number.Integer),
- ],
- "name": [
- (r"@" + uni_name, Name.Decorator),
- (r"@", Operator), # new matrix multiplication operator
- (uni_name, Name),
- ],
- "varname": [
- (uni_name, Name.Variable, "#pop"),
- ],
- "funcname": [
- include("magicfuncs"),
- (uni_name, Name.Function, "#pop"),
- default("#pop"),
- ],
- "classname": [
- (uni_name, Name.Class, "#pop"),
- ],
- "structname": [
- (uni_name, Name.Struct, "#pop"),
- ],
- "import": [
- (r"(\s+)(as)(\s+)", bygroups(Whitespace, Keyword, Whitespace)),
- (r"\.", Name.Namespace),
- (uni_name, Name.Namespace),
- (r"(\s*)(,)(\s*)", bygroups(Whitespace, Operator, Whitespace)),
- default("#pop"), # all else: go back
- ],
- "fromimport": [
- (r"(\s+)(import)\b", bygroups(Whitespace, Keyword.Namespace), "#pop"),
- (r"\.", Name.Namespace),
- # if None occurs here, it's "raise x from None", since None can
- # never be a module name
- (r"None\b", Keyword.Constant, "#pop"),
- (uni_name, Name.Namespace),
- default("#pop"),
- ],
- "rfstringescape": [
- (r"\{\{", String.Escape),
- (r"\}\}", String.Escape),
- ],
- "fstringescape": [
- include("rfstringescape"),
- include("stringescape"),
- ],
- "bytesescape": [
- (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
- ],
- "stringescape": [
- (r"\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})", String.Escape),
- include("bytesescape"),
- ],
- "fstrings-single": fstring_rules(String.Single),
- "fstrings-double": fstring_rules(String.Double),
- "strings-single": innerstring_rules(String.Single),
- "strings-double": innerstring_rules(String.Double),
- "dqf": [
- (r'"', String.Double, "#pop"),
- (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
- include("fstrings-double"),
- ],
- "sqf": [
- (r"'", String.Single, "#pop"),
- (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
- include("fstrings-single"),
- ],
- "dqs": [
- (r'"', String.Double, "#pop"),
- (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
- include("strings-double"),
- ],
- "sqs": [
- (r"'", String.Single, "#pop"),
- (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
- include("strings-single"),
- ],
- "tdqf": [
- (r'"""', String.Double, "#pop"),
- include("fstrings-double"),
- (r"\n", String.Double),
- ],
- "tsqf": [
- (r"'''", String.Single, "#pop"),
- include("fstrings-single"),
- (r"\n", String.Single),
- ],
- "tdqs": [
- (r'"""', String.Double, "#pop"),
- include("strings-double"),
- (r"\n", String.Double),
- ],
- "tsqs": [
- (r"'''", String.Single, "#pop"),
- include("strings-single"),
- (r"\n", String.Single),
- ],
- }
- def analyse_text(text):
- return (
- shebang_matches(text, r"mojo?") or "import " in text[:1000]
- ) # TODO supported?
|