mojo.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704
  1. """
  2. pygments.lexers.mojo
  3. ~~~~~~~~~~~~~~~~~~~~
  4. Lexers for Mojo and related languages.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import keyword
  9. from pygments import unistring as uni
  10. from pygments.lexer import (
  11. RegexLexer,
  12. bygroups,
  13. combined,
  14. default,
  15. include,
  16. this,
  17. using,
  18. words,
  19. )
  20. from pygments.token import (
  21. Comment,
  22. # Error,
  23. Keyword,
  24. Name,
  25. Number,
  26. Operator,
  27. Punctuation,
  28. String,
  29. Text,
  30. Whitespace,
  31. )
  32. from pygments.util import shebang_matches
  33. __all__ = ["MojoLexer"]
  34. class MojoLexer(RegexLexer):
  35. """
  36. For Mojo source code (version 24.2.1).
  37. """
  38. name = "Mojo"
  39. url = "https://docs.modular.com/mojo/"
  40. aliases = ["mojo", "🔥"]
  41. filenames = [
  42. "*.mojo",
  43. "*.🔥",
  44. ]
  45. mimetypes = [
  46. "text/x-mojo",
  47. "application/x-mojo",
  48. ]
  49. version_added = "2.18"
  50. uni_name = f"[{uni.xid_start}][{uni.xid_continue}]*"
  51. def innerstring_rules(ttype):
  52. return [
  53. # the old style '%s' % (...) string formatting (still valid in Py3)
  54. (
  55. r"%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?"
  56. "[hlL]?[E-GXc-giorsaux%]",
  57. String.Interpol,
  58. ),
  59. # the new style '{}'.format(...) string formatting
  60. (
  61. r"\{"
  62. r"((\w+)((\.\w+)|(\[[^\]]+\]))*)?" # field name
  63. r"(\![sra])?" # conversion
  64. r"(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?"
  65. r"\}",
  66. String.Interpol,
  67. ),
  68. # backslashes, quotes and formatting signs must be parsed one at a time
  69. (r'[^\\\'"%{\n]+', ttype),
  70. (r'[\'"\\]', ttype),
  71. # unhandled string formatting sign
  72. (r"%|(\{{1,2})", ttype),
  73. # newlines are an error (use "nl" state)
  74. ]
  75. def fstring_rules(ttype):
  76. return [
  77. # Assuming that a '}' is the closing brace after format specifier.
  78. # Sadly, this means that we won't detect syntax error. But it's
  79. # more important to parse correct syntax correctly, than to
  80. # highlight invalid syntax.
  81. (r"\}", String.Interpol),
  82. (r"\{", String.Interpol, "expr-inside-fstring"),
  83. # backslashes, quotes and formatting signs must be parsed one at a time
  84. (r'[^\\\'"{}\n]+', ttype),
  85. (r'[\'"\\]', ttype),
  86. # newlines are an error (use "nl" state)
  87. ]
  88. tokens = {
  89. "root": [
  90. (r"\s+", Whitespace),
  91. (
  92. r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
  93. bygroups(Whitespace, String.Affix, String.Doc),
  94. ),
  95. (
  96. r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
  97. bygroups(Whitespace, String.Affix, String.Doc),
  98. ),
  99. (r"\A#!.+$", Comment.Hashbang),
  100. (r"#.*$", Comment.Single),
  101. (r"\\\n", Whitespace),
  102. (r"\\", Whitespace),
  103. include("keywords"),
  104. include("soft-keywords"),
  105. # In the original PR, all the below here used ((?:\s|\\\s)+) to
  106. # designate whitespace, but I can't find any example of this being
  107. # needed in the example file, so we're replacing it with `\s+`.
  108. (
  109. r"(alias)(\s+)",
  110. bygroups(Keyword, Whitespace),
  111. "varname", # TODO varname the right fit?
  112. ),
  113. (r"(var)(\s+)", bygroups(Keyword, Whitespace), "varname"),
  114. (r"(def)(\s+)", bygroups(Keyword, Whitespace), "funcname"),
  115. (r"(fn)(\s+)", bygroups(Keyword, Whitespace), "funcname"),
  116. (
  117. r"(class)(\s+)",
  118. bygroups(Keyword, Whitespace),
  119. "classname",
  120. ), # not implemented yet
  121. (r"(struct)(\s+)", bygroups(Keyword, Whitespace), "structname"),
  122. (r"(trait)(\s+)", bygroups(Keyword, Whitespace), "structname"),
  123. (r"(from)(\s+)", bygroups(Keyword.Namespace, Whitespace), "fromimport"),
  124. (r"(import)(\s+)", bygroups(Keyword.Namespace, Whitespace), "import"),
  125. include("expr"),
  126. ],
  127. "expr": [
  128. # raw f-strings
  129. (
  130. '(?i)(rf|fr)(""")',
  131. bygroups(String.Affix, String.Double),
  132. combined("rfstringescape", "tdqf"),
  133. ),
  134. (
  135. "(?i)(rf|fr)(''')",
  136. bygroups(String.Affix, String.Single),
  137. combined("rfstringescape", "tsqf"),
  138. ),
  139. (
  140. '(?i)(rf|fr)(")',
  141. bygroups(String.Affix, String.Double),
  142. combined("rfstringescape", "dqf"),
  143. ),
  144. (
  145. "(?i)(rf|fr)(')",
  146. bygroups(String.Affix, String.Single),
  147. combined("rfstringescape", "sqf"),
  148. ),
  149. # non-raw f-strings
  150. (
  151. '([fF])(""")',
  152. bygroups(String.Affix, String.Double),
  153. combined("fstringescape", "tdqf"),
  154. ),
  155. (
  156. "([fF])(''')",
  157. bygroups(String.Affix, String.Single),
  158. combined("fstringescape", "tsqf"),
  159. ),
  160. (
  161. '([fF])(")',
  162. bygroups(String.Affix, String.Double),
  163. combined("fstringescape", "dqf"),
  164. ),
  165. (
  166. "([fF])(')",
  167. bygroups(String.Affix, String.Single),
  168. combined("fstringescape", "sqf"),
  169. ),
  170. # raw bytes and strings
  171. ('(?i)(rb|br|r)(""")', bygroups(String.Affix, String.Double), "tdqs"),
  172. ("(?i)(rb|br|r)(''')", bygroups(String.Affix, String.Single), "tsqs"),
  173. ('(?i)(rb|br|r)(")', bygroups(String.Affix, String.Double), "dqs"),
  174. ("(?i)(rb|br|r)(')", bygroups(String.Affix, String.Single), "sqs"),
  175. # non-raw strings
  176. (
  177. '([uU]?)(""")',
  178. bygroups(String.Affix, String.Double),
  179. combined("stringescape", "tdqs"),
  180. ),
  181. (
  182. "([uU]?)(''')",
  183. bygroups(String.Affix, String.Single),
  184. combined("stringescape", "tsqs"),
  185. ),
  186. (
  187. '([uU]?)(")',
  188. bygroups(String.Affix, String.Double),
  189. combined("stringescape", "dqs"),
  190. ),
  191. (
  192. "([uU]?)(')",
  193. bygroups(String.Affix, String.Single),
  194. combined("stringescape", "sqs"),
  195. ),
  196. # non-raw bytes
  197. (
  198. '([bB])(""")',
  199. bygroups(String.Affix, String.Double),
  200. combined("bytesescape", "tdqs"),
  201. ),
  202. (
  203. "([bB])(''')",
  204. bygroups(String.Affix, String.Single),
  205. combined("bytesescape", "tsqs"),
  206. ),
  207. (
  208. '([bB])(")',
  209. bygroups(String.Affix, String.Double),
  210. combined("bytesescape", "dqs"),
  211. ),
  212. (
  213. "([bB])(')",
  214. bygroups(String.Affix, String.Single),
  215. combined("bytesescape", "sqs"),
  216. ),
  217. (r"[^\S\n]+", Text),
  218. include("numbers"),
  219. (r"!=|==|<<|>>|:=|[-~+/*%=<>&^|.]", Operator),
  220. (r"([]{}:\(\),;[])+", Punctuation),
  221. (r"(in|is|and|or|not)\b", Operator.Word),
  222. include("expr-keywords"),
  223. include("builtins"),
  224. include("magicfuncs"),
  225. include("magicvars"),
  226. include("name"),
  227. ],
  228. "expr-inside-fstring": [
  229. (r"[{([]", Punctuation, "expr-inside-fstring-inner"),
  230. # without format specifier
  231. (
  232. r"(=\s*)?" # debug (https://bugs.python.org/issue36817)
  233. r"(\![sraf])?" # conversion
  234. r"\}",
  235. String.Interpol,
  236. "#pop",
  237. ),
  238. # with format specifier
  239. # we'll catch the remaining '}' in the outer scope
  240. (
  241. r"(=\s*)?" # debug (https://bugs.python.org/issue36817)
  242. r"(\![sraf])?" # conversion
  243. r":",
  244. String.Interpol,
  245. "#pop",
  246. ),
  247. (r"\s+", Whitespace), # allow new lines
  248. include("expr"),
  249. ],
  250. "expr-inside-fstring-inner": [
  251. (r"[{([]", Punctuation, "expr-inside-fstring-inner"),
  252. (r"[])}]", Punctuation, "#pop"),
  253. (r"\s+", Whitespace), # allow new lines
  254. include("expr"),
  255. ],
  256. "expr-keywords": [
  257. # Based on https://docs.python.org/3/reference/expressions.html
  258. (
  259. words(
  260. (
  261. "async for", # TODO https://docs.modular.com/mojo/roadmap#no-async-for-or-async-with
  262. "async with", # TODO https://docs.modular.com/mojo/roadmap#no-async-for-or-async-with
  263. "await",
  264. "else",
  265. "for",
  266. "if",
  267. "lambda",
  268. "yield",
  269. "yield from",
  270. ),
  271. suffix=r"\b",
  272. ),
  273. Keyword,
  274. ),
  275. (words(("True", "False", "None"), suffix=r"\b"), Keyword.Constant),
  276. ],
  277. "keywords": [
  278. (
  279. words(
  280. (
  281. "assert",
  282. "async",
  283. "await",
  284. "borrowed",
  285. "break",
  286. "continue",
  287. "del",
  288. "elif",
  289. "else",
  290. "except",
  291. "finally",
  292. "for",
  293. "global",
  294. "if",
  295. "lambda",
  296. "pass",
  297. "raise",
  298. "nonlocal",
  299. "return",
  300. "try",
  301. "while",
  302. "yield",
  303. "yield from",
  304. "as",
  305. "with",
  306. ),
  307. suffix=r"\b",
  308. ),
  309. Keyword,
  310. ),
  311. (words(("True", "False", "None"), suffix=r"\b"), Keyword.Constant),
  312. ],
  313. "soft-keywords": [
  314. # `match`, `case` and `_` soft keywords
  315. (
  316. r"(^[ \t]*)" # at beginning of line + possible indentation
  317. r"(match|case)\b" # a possible keyword
  318. r"(?![ \t]*(?:" # not followed by...
  319. r"[:,;=^&|@~)\]}]|(?:" + # characters and keywords that mean this isn't
  320. # pattern matching (but None/True/False is ok)
  321. r"|".join(k for k in keyword.kwlist if k[0].islower())
  322. + r")\b))",
  323. bygroups(Whitespace, Keyword),
  324. "soft-keywords-inner",
  325. ),
  326. ],
  327. "soft-keywords-inner": [
  328. # optional `_` keyword
  329. (r"(\s+)([^\n_]*)(_\b)", bygroups(Whitespace, using(this), Keyword)),
  330. default("#pop"),
  331. ],
  332. "builtins": [
  333. (
  334. words(
  335. (
  336. "__import__",
  337. "abs",
  338. "aiter",
  339. "all",
  340. "any",
  341. "bin",
  342. "bool",
  343. "bytearray",
  344. "breakpoint",
  345. "bytes",
  346. "callable",
  347. "chr",
  348. "classmethod",
  349. "compile",
  350. "complex",
  351. "delattr",
  352. "dict",
  353. "dir",
  354. "divmod",
  355. "enumerate",
  356. "eval",
  357. "filter",
  358. "float",
  359. "format",
  360. "frozenset",
  361. "getattr",
  362. "globals",
  363. "hasattr",
  364. "hash",
  365. "hex",
  366. "id",
  367. "input",
  368. "int",
  369. "isinstance",
  370. "issubclass",
  371. "iter",
  372. "len",
  373. "list",
  374. "locals",
  375. "map",
  376. "max",
  377. "memoryview",
  378. "min",
  379. "next",
  380. "object",
  381. "oct",
  382. "open",
  383. "ord",
  384. "pow",
  385. "print",
  386. "property",
  387. "range",
  388. "repr",
  389. "reversed",
  390. "round",
  391. "set",
  392. "setattr",
  393. "slice",
  394. "sorted",
  395. "staticmethod",
  396. "str",
  397. "sum",
  398. "super",
  399. "tuple",
  400. "type",
  401. "vars",
  402. "zip",
  403. # Mojo builtin types: https://docs.modular.com/mojo/stdlib/builtin/
  404. "AnyType",
  405. "Coroutine",
  406. "DType",
  407. "Error",
  408. "Int",
  409. "List",
  410. "ListLiteral",
  411. "Scalar",
  412. "Int8",
  413. "UInt8",
  414. "Int16",
  415. "UInt16",
  416. "Int32",
  417. "UInt32",
  418. "Int64",
  419. "UInt64",
  420. "BFloat16",
  421. "Float16",
  422. "Float32",
  423. "Float64",
  424. "SIMD",
  425. "String",
  426. "Tensor",
  427. "Tuple",
  428. "Movable",
  429. "Copyable",
  430. "CollectionElement",
  431. ),
  432. prefix=r"(?<!\.)",
  433. suffix=r"\b",
  434. ),
  435. Name.Builtin,
  436. ),
  437. (r"(?<!\.)(self|Ellipsis|NotImplemented|cls)\b", Name.Builtin.Pseudo),
  438. (
  439. words(
  440. ("Error",),
  441. prefix=r"(?<!\.)",
  442. suffix=r"\b",
  443. ),
  444. Name.Exception,
  445. ),
  446. ],
  447. "magicfuncs": [
  448. (
  449. words(
  450. (
  451. "__abs__",
  452. "__add__",
  453. "__aenter__",
  454. "__aexit__",
  455. "__aiter__",
  456. "__and__",
  457. "__anext__",
  458. "__await__",
  459. "__bool__",
  460. "__bytes__",
  461. "__call__",
  462. "__complex__",
  463. "__contains__",
  464. "__del__",
  465. "__delattr__",
  466. "__delete__",
  467. "__delitem__",
  468. "__dir__",
  469. "__divmod__",
  470. "__enter__",
  471. "__eq__",
  472. "__exit__",
  473. "__float__",
  474. "__floordiv__",
  475. "__format__",
  476. "__ge__",
  477. "__get__",
  478. "__getattr__",
  479. "__getattribute__",
  480. "__getitem__",
  481. "__gt__",
  482. "__hash__",
  483. "__iadd__",
  484. "__iand__",
  485. "__ifloordiv__",
  486. "__ilshift__",
  487. "__imatmul__",
  488. "__imod__",
  489. "__imul__",
  490. "__index__",
  491. "__init__",
  492. "__instancecheck__",
  493. "__int__",
  494. "__invert__",
  495. "__ior__",
  496. "__ipow__",
  497. "__irshift__",
  498. "__isub__",
  499. "__iter__",
  500. "__itruediv__",
  501. "__ixor__",
  502. "__le__",
  503. "__len__",
  504. "__length_hint__",
  505. "__lshift__",
  506. "__lt__",
  507. "__matmul__",
  508. "__missing__",
  509. "__mod__",
  510. "__mul__",
  511. "__ne__",
  512. "__neg__",
  513. "__new__",
  514. "__next__",
  515. "__or__",
  516. "__pos__",
  517. "__pow__",
  518. "__prepare__",
  519. "__radd__",
  520. "__rand__",
  521. "__rdivmod__",
  522. "__repr__",
  523. "__reversed__",
  524. "__rfloordiv__",
  525. "__rlshift__",
  526. "__rmatmul__",
  527. "__rmod__",
  528. "__rmul__",
  529. "__ror__",
  530. "__round__",
  531. "__rpow__",
  532. "__rrshift__",
  533. "__rshift__",
  534. "__rsub__",
  535. "__rtruediv__",
  536. "__rxor__",
  537. "__set__",
  538. "__setattr__",
  539. "__setitem__",
  540. "__str__",
  541. "__sub__",
  542. "__subclasscheck__",
  543. "__truediv__",
  544. "__xor__",
  545. ),
  546. suffix=r"\b",
  547. ),
  548. Name.Function.Magic,
  549. ),
  550. ],
  551. "magicvars": [
  552. (
  553. words(
  554. (
  555. "__annotations__",
  556. "__bases__",
  557. "__class__",
  558. "__closure__",
  559. "__code__",
  560. "__defaults__",
  561. "__dict__",
  562. "__doc__",
  563. "__file__",
  564. "__func__",
  565. "__globals__",
  566. "__kwdefaults__",
  567. "__module__",
  568. "__mro__",
  569. "__name__",
  570. "__objclass__",
  571. "__qualname__",
  572. "__self__",
  573. "__slots__",
  574. "__weakref__",
  575. ),
  576. suffix=r"\b",
  577. ),
  578. Name.Variable.Magic,
  579. ),
  580. ],
  581. "numbers": [
  582. (
  583. r"(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)"
  584. r"([eE][+-]?\d(?:_?\d)*)?",
  585. Number.Float,
  586. ),
  587. (r"\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?", Number.Float),
  588. (r"0[oO](?:_?[0-7])+", Number.Oct),
  589. (r"0[bB](?:_?[01])+", Number.Bin),
  590. (r"0[xX](?:_?[a-fA-F0-9])+", Number.Hex),
  591. (r"\d(?:_?\d)*", Number.Integer),
  592. ],
  593. "name": [
  594. (r"@" + uni_name, Name.Decorator),
  595. (r"@", Operator), # new matrix multiplication operator
  596. (uni_name, Name),
  597. ],
  598. "varname": [
  599. (uni_name, Name.Variable, "#pop"),
  600. ],
  601. "funcname": [
  602. include("magicfuncs"),
  603. (uni_name, Name.Function, "#pop"),
  604. default("#pop"),
  605. ],
  606. "classname": [
  607. (uni_name, Name.Class, "#pop"),
  608. ],
  609. "structname": [
  610. (uni_name, Name.Struct, "#pop"),
  611. ],
  612. "import": [
  613. (r"(\s+)(as)(\s+)", bygroups(Whitespace, Keyword, Whitespace)),
  614. (r"\.", Name.Namespace),
  615. (uni_name, Name.Namespace),
  616. (r"(\s*)(,)(\s*)", bygroups(Whitespace, Operator, Whitespace)),
  617. default("#pop"), # all else: go back
  618. ],
  619. "fromimport": [
  620. (r"(\s+)(import)\b", bygroups(Whitespace, Keyword.Namespace), "#pop"),
  621. (r"\.", Name.Namespace),
  622. # if None occurs here, it's "raise x from None", since None can
  623. # never be a module name
  624. (r"None\b", Keyword.Constant, "#pop"),
  625. (uni_name, Name.Namespace),
  626. default("#pop"),
  627. ],
  628. "rfstringescape": [
  629. (r"\{\{", String.Escape),
  630. (r"\}\}", String.Escape),
  631. ],
  632. "fstringescape": [
  633. include("rfstringescape"),
  634. include("stringescape"),
  635. ],
  636. "bytesescape": [
  637. (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
  638. ],
  639. "stringescape": [
  640. (r"\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})", String.Escape),
  641. include("bytesescape"),
  642. ],
  643. "fstrings-single": fstring_rules(String.Single),
  644. "fstrings-double": fstring_rules(String.Double),
  645. "strings-single": innerstring_rules(String.Single),
  646. "strings-double": innerstring_rules(String.Double),
  647. "dqf": [
  648. (r'"', String.Double, "#pop"),
  649. (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
  650. include("fstrings-double"),
  651. ],
  652. "sqf": [
  653. (r"'", String.Single, "#pop"),
  654. (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
  655. include("fstrings-single"),
  656. ],
  657. "dqs": [
  658. (r'"', String.Double, "#pop"),
  659. (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
  660. include("strings-double"),
  661. ],
  662. "sqs": [
  663. (r"'", String.Single, "#pop"),
  664. (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
  665. include("strings-single"),
  666. ],
  667. "tdqf": [
  668. (r'"""', String.Double, "#pop"),
  669. include("fstrings-double"),
  670. (r"\n", String.Double),
  671. ],
  672. "tsqf": [
  673. (r"'''", String.Single, "#pop"),
  674. include("fstrings-single"),
  675. (r"\n", String.Single),
  676. ],
  677. "tdqs": [
  678. (r'"""', String.Double, "#pop"),
  679. include("strings-double"),
  680. (r"\n", String.Double),
  681. ],
  682. "tsqs": [
  683. (r"'''", String.Single, "#pop"),
  684. include("strings-single"),
  685. (r"\n", String.Single),
  686. ],
  687. }
  688. def analyse_text(text):
  689. return (
  690. shebang_matches(text, r"mojo?") or "import " in text[:1000]
  691. ) # TODO supported?