julia.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. """
  2. pygments.lexers.julia
  3. ~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for the Julia language.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
  9. words, include
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  11. Number, Punctuation, Generic, Whitespace
  12. from pygments.util import shebang_matches
  13. from pygments.lexers._julia_builtins import OPERATORS_LIST, DOTTED_OPERATORS_LIST, \
  14. KEYWORD_LIST, BUILTIN_LIST, LITERAL_LIST
  15. __all__ = ['JuliaLexer', 'JuliaConsoleLexer']
  16. # see https://docs.julialang.org/en/v1/manual/variables/#Allowed-Variable-Names
  17. allowed_variable = \
  18. '(?:[a-zA-Z_\u00A1-\U0010ffff][a-zA-Z_0-9!\u00A1-\U0010ffff]*)'
  19. # see https://github.com/JuliaLang/julia/blob/master/src/flisp/julia_opsuffs.h
  20. operator_suffixes = r'[²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ′″‴‵‶‷⁗⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ]*'
  21. class JuliaLexer(RegexLexer):
  22. """
  23. For Julia source code.
  24. """
  25. name = 'Julia'
  26. url = 'https://julialang.org/'
  27. aliases = ['julia', 'jl']
  28. filenames = ['*.jl']
  29. mimetypes = ['text/x-julia', 'application/x-julia']
  30. version_added = '1.6'
  31. tokens = {
  32. 'root': [
  33. (r'\n', Whitespace),
  34. (r'[^\S\n]+', Whitespace),
  35. (r'#=', Comment.Multiline, "blockcomment"),
  36. (r'#.*$', Comment),
  37. (r'[\[\](),;]', Punctuation),
  38. # symbols
  39. # intercept range expressions first
  40. (r'(' + allowed_variable + r')(\s*)(:)(' + allowed_variable + ')',
  41. bygroups(Name, Whitespace, Operator, Name)),
  42. # then match :name which does not follow closing brackets, digits, or the
  43. # ::, <:, and :> operators
  44. (r'(?<![\]):<>\d.])(:' + allowed_variable + ')', String.Symbol),
  45. # type assertions - excludes expressions like ::typeof(sin) and ::avec[1]
  46. (r'(?<=::)(\s*)(' + allowed_variable + r')\b(?![(\[])',
  47. bygroups(Whitespace, Keyword.Type)),
  48. # type comparisons
  49. # - MyType <: A or MyType >: A
  50. ('(' + allowed_variable + r')(\s*)([<>]:)(\s*)(' + allowed_variable + r')\b(?![(\[])',
  51. bygroups(Keyword.Type, Whitespace, Operator, Whitespace, Keyword.Type)),
  52. # - <: B or >: B
  53. (r'([<>]:)(\s*)(' + allowed_variable + r')\b(?![(\[])',
  54. bygroups(Operator, Whitespace, Keyword.Type)),
  55. # - A <: or A >:
  56. (r'\b(' + allowed_variable + r')(\s*)([<>]:)',
  57. bygroups(Keyword.Type, Whitespace, Operator)),
  58. # operators
  59. # Suffixes aren't actually allowed on all operators, but we'll ignore that
  60. # since those cases are invalid Julia code.
  61. (words([*OPERATORS_LIST, *DOTTED_OPERATORS_LIST],
  62. suffix=operator_suffixes), Operator),
  63. (words(['.' + o for o in DOTTED_OPERATORS_LIST],
  64. suffix=operator_suffixes), Operator),
  65. (words(['...', '..']), Operator),
  66. # NOTE
  67. # Patterns below work only for definition sites and thus hardly reliable.
  68. #
  69. # functions
  70. # (r'(function)(\s+)(' + allowed_variable + ')',
  71. # bygroups(Keyword, Text, Name.Function)),
  72. # chars
  73. (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|"
  74. r"\\U[a-fA-F0-9]{1,6}|[^\\\'\n])'", String.Char),
  75. # try to match trailing transpose
  76. (r'(?<=[.\w)\]])(\'' + operator_suffixes + ')+', Operator),
  77. # raw strings
  78. (r'(raw)(""")', bygroups(String.Affix, String), 'tqrawstring'),
  79. (r'(raw)(")', bygroups(String.Affix, String), 'rawstring'),
  80. # regular expressions
  81. (r'(r)(""")', bygroups(String.Affix, String.Regex), 'tqregex'),
  82. (r'(r)(")', bygroups(String.Affix, String.Regex), 'regex'),
  83. # other strings
  84. (r'(' + allowed_variable + ')?(""")',
  85. bygroups(String.Affix, String), 'tqstring'),
  86. (r'(' + allowed_variable + ')?(")',
  87. bygroups(String.Affix, String), 'string'),
  88. # backticks
  89. (r'(' + allowed_variable + ')?(```)',
  90. bygroups(String.Affix, String.Backtick), 'tqcommand'),
  91. (r'(' + allowed_variable + ')?(`)',
  92. bygroups(String.Affix, String.Backtick), 'command'),
  93. # type names
  94. # - names that begin a curly expression
  95. ('(' + allowed_variable + r')(\{)',
  96. bygroups(Keyword.Type, Punctuation), 'curly'),
  97. # - names as part of bare 'where'
  98. (r'(where)(\s+)(' + allowed_variable + ')',
  99. bygroups(Keyword, Whitespace, Keyword.Type)),
  100. # - curly expressions in general
  101. (r'(\{)', Punctuation, 'curly'),
  102. # - names as part of type declaration
  103. (r'(abstract|primitive)([ \t]+)(type\b)([\s()]+)(' +
  104. allowed_variable + r')',
  105. bygroups(Keyword, Whitespace, Keyword, Text, Keyword.Type)),
  106. (r'(mutable(?=[ \t]))?([ \t]+)?(struct\b)([\s()]+)(' +
  107. allowed_variable + r')',
  108. bygroups(Keyword, Whitespace, Keyword, Text, Keyword.Type)),
  109. # macros
  110. (r'@' + allowed_variable, Name.Decorator),
  111. (words([*OPERATORS_LIST, '..', '.', *DOTTED_OPERATORS_LIST],
  112. prefix='@', suffix=operator_suffixes), Name.Decorator),
  113. # keywords
  114. (words(KEYWORD_LIST, suffix=r'\b'), Keyword),
  115. # builtin types
  116. (words(BUILTIN_LIST, suffix=r'\b'), Keyword.Type),
  117. # builtin literals
  118. (words(LITERAL_LIST, suffix=r'\b'), Name.Builtin),
  119. # names
  120. (allowed_variable, Name),
  121. # numbers
  122. (r'(\d+((_\d+)+)?\.(?!\.)(\d+((_\d+)+)?)?|\.\d+((_\d+)+)?)([eEf][+-]?[0-9]+)?', Number.Float),
  123. (r'\d+((_\d+)+)?[eEf][+-]?[0-9]+', Number.Float),
  124. (r'0x[a-fA-F0-9]+((_[a-fA-F0-9]+)+)?(\.([a-fA-F0-9]+((_[a-fA-F0-9]+)+)?)?)?p[+-]?\d+', Number.Float),
  125. (r'0b[01]+((_[01]+)+)?', Number.Bin),
  126. (r'0o[0-7]+((_[0-7]+)+)?', Number.Oct),
  127. (r'0x[a-fA-F0-9]+((_[a-fA-F0-9]+)+)?', Number.Hex),
  128. (r'\d+((_\d+)+)?', Number.Integer),
  129. # single dot operator matched last to permit e.g. ".1" as a float
  130. (words(['.']), Operator),
  131. ],
  132. "blockcomment": [
  133. (r'[^=#]', Comment.Multiline),
  134. (r'#=', Comment.Multiline, '#push'),
  135. (r'=#', Comment.Multiline, '#pop'),
  136. (r'[=#]', Comment.Multiline),
  137. ],
  138. 'curly': [
  139. (r'\{', Punctuation, '#push'),
  140. (r'\}', Punctuation, '#pop'),
  141. (allowed_variable, Keyword.Type),
  142. include('root'),
  143. ],
  144. 'tqrawstring': [
  145. (r'"""', String, '#pop'),
  146. (r'([^"]|"[^"][^"])+', String),
  147. ],
  148. 'rawstring': [
  149. (r'"', String, '#pop'),
  150. (r'\\"', String.Escape),
  151. (r'([^"\\]|\\[^"])+', String),
  152. ],
  153. # Interpolation is defined as "$" followed by the shortest full
  154. # expression, which is something we can't parse. Include the most
  155. # common cases here: $word, and $(paren'd expr).
  156. 'interp': [
  157. (r'\$' + allowed_variable, String.Interpol),
  158. (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
  159. ],
  160. 'in-intp': [
  161. (r'\(', Punctuation, '#push'),
  162. (r'\)', Punctuation, '#pop'),
  163. include('root'),
  164. ],
  165. 'string': [
  166. (r'(")(' + allowed_variable + r'|\d+)?',
  167. bygroups(String, String.Affix), '#pop'),
  168. # FIXME: This escape pattern is not perfect.
  169. (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
  170. include('interp'),
  171. # @printf and @sprintf formats
  172. (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]',
  173. String.Interpol),
  174. (r'[^"$%\\]+', String),
  175. (r'.', String),
  176. ],
  177. 'tqstring': [
  178. (r'(""")(' + allowed_variable + r'|\d+)?',
  179. bygroups(String, String.Affix), '#pop'),
  180. (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
  181. include('interp'),
  182. (r'[^"$%\\]+', String),
  183. (r'.', String),
  184. ],
  185. 'regex': [
  186. (r'(")([imsxa]*)?', bygroups(String.Regex, String.Affix), '#pop'),
  187. (r'\\"', String.Regex),
  188. (r'[^\\"]+', String.Regex),
  189. ],
  190. 'tqregex': [
  191. (r'(""")([imsxa]*)?', bygroups(String.Regex, String.Affix), '#pop'),
  192. (r'[^"]+', String.Regex),
  193. ],
  194. 'command': [
  195. (r'(`)(' + allowed_variable + r'|\d+)?',
  196. bygroups(String.Backtick, String.Affix), '#pop'),
  197. (r'\\[`$]', String.Escape),
  198. include('interp'),
  199. (r'[^\\`$]+', String.Backtick),
  200. (r'.', String.Backtick),
  201. ],
  202. 'tqcommand': [
  203. (r'(```)(' + allowed_variable + r'|\d+)?',
  204. bygroups(String.Backtick, String.Affix), '#pop'),
  205. (r'\\\$', String.Escape),
  206. include('interp'),
  207. (r'[^\\`$]+', String.Backtick),
  208. (r'.', String.Backtick),
  209. ],
  210. }
  211. def analyse_text(text):
  212. return shebang_matches(text, r'julia')
  213. class JuliaConsoleLexer(Lexer):
  214. """
  215. For Julia console sessions. Modeled after MatlabSessionLexer.
  216. """
  217. name = 'Julia console'
  218. aliases = ['jlcon', 'julia-repl']
  219. url = 'https://julialang.org/'
  220. version_added = '1.6'
  221. def get_tokens_unprocessed(self, text):
  222. jllexer = JuliaLexer(**self.options)
  223. start = 0
  224. curcode = ''
  225. insertions = []
  226. output = False
  227. error = False
  228. for line in text.splitlines(keepends=True):
  229. if line.startswith('julia>'):
  230. insertions.append((len(curcode), [(0, Generic.Prompt, line[:6])]))
  231. curcode += line[6:]
  232. output = False
  233. error = False
  234. elif line.startswith('help?>') or line.startswith('shell>'):
  235. yield start, Generic.Prompt, line[:6]
  236. yield start + 6, Text, line[6:]
  237. output = False
  238. error = False
  239. elif line.startswith(' ') and not output:
  240. insertions.append((len(curcode), [(0, Whitespace, line[:6])]))
  241. curcode += line[6:]
  242. else:
  243. if curcode:
  244. yield from do_insertions(
  245. insertions, jllexer.get_tokens_unprocessed(curcode))
  246. curcode = ''
  247. insertions = []
  248. if line.startswith('ERROR: ') or error:
  249. yield start, Generic.Error, line
  250. error = True
  251. else:
  252. yield start, Generic.Output, line
  253. output = True
  254. start += len(line)
  255. if curcode:
  256. yield from do_insertions(
  257. insertions, jllexer.get_tokens_unprocessed(curcode))