julia.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.julia
  4. ~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for the Julia language.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
  11. words, include
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  13. Number, Punctuation, Generic
  14. from pygments.util import shebang_matches, unirange
  15. __all__ = ['JuliaLexer', 'JuliaConsoleLexer']
  16. allowed_variable = (
  17. u'(?:[a-zA-Z_\u00A1-\uffff]|%s)(?:[a-zA-Z_0-9\u00A1-\uffff]|%s)*!*' %
  18. ((unirange(0x10000, 0x10ffff),) * 2))
  19. class JuliaLexer(RegexLexer):
  20. """
  21. For `Julia <http://julialang.org/>`_ source code.
  22. .. versionadded:: 1.6
  23. """
  24. name = 'Julia'
  25. aliases = ['julia', 'jl']
  26. filenames = ['*.jl']
  27. mimetypes = ['text/x-julia', 'application/x-julia']
  28. flags = re.MULTILINE | re.UNICODE
  29. tokens = {
  30. 'root': [
  31. (r'\n', Text),
  32. (r'[^\S\n]+', Text),
  33. (r'#=', Comment.Multiline, "blockcomment"),
  34. (r'#.*$', Comment),
  35. (r'[\[\]{}(),;]', Punctuation),
  36. # keywords
  37. (r'in\b', Keyword.Pseudo),
  38. (r'isa\b', Keyword.Pseudo),
  39. (r'(true|false)\b', Keyword.Constant),
  40. (r'(local|global|const)\b', Keyword.Declaration),
  41. (words([
  42. 'function', 'type', 'typealias', 'abstract', 'immutable',
  43. 'baremodule', 'begin', 'bitstype', 'break', 'catch', 'ccall',
  44. 'continue', 'do', 'else', 'elseif', 'end', 'export', 'finally',
  45. 'for', 'if', 'import', 'importall', 'let', 'macro', 'module',
  46. 'mutable', 'primitive', 'quote', 'return', 'struct', 'try',
  47. 'using', 'while'],
  48. suffix=r'\b'), Keyword),
  49. # NOTE
  50. # Patterns below work only for definition sites and thus hardly reliable.
  51. #
  52. # functions
  53. # (r'(function)(\s+)(' + allowed_variable + ')',
  54. # bygroups(Keyword, Text, Name.Function)),
  55. #
  56. # types
  57. # (r'(type|typealias|abstract|immutable)(\s+)(' + allowed_variable + ')',
  58. # bygroups(Keyword, Text, Name.Class)),
  59. # type names
  60. (words([
  61. 'ANY', 'ASCIIString', 'AbstractArray', 'AbstractChannel',
  62. 'AbstractFloat', 'AbstractMatrix', 'AbstractRNG',
  63. 'AbstractSparseArray', 'AbstractSparseMatrix',
  64. 'AbstractSparseVector', 'AbstractString', 'AbstractVecOrMat',
  65. 'AbstractVector', 'Any', 'ArgumentError', 'Array',
  66. 'AssertionError', 'Associative', 'Base64DecodePipe',
  67. 'Base64EncodePipe', 'Bidiagonal', 'BigFloat', 'BigInt',
  68. 'BitArray', 'BitMatrix', 'BitVector', 'Bool', 'BoundsError',
  69. 'Box', 'BufferStream', 'CapturedException', 'CartesianIndex',
  70. 'CartesianRange', 'Cchar', 'Cdouble', 'Cfloat', 'Channel',
  71. 'Char', 'Cint', 'Cintmax_t', 'Clong', 'Clonglong',
  72. 'ClusterManager', 'Cmd', 'Coff_t', 'Colon', 'Complex',
  73. 'Complex128', 'Complex32', 'Complex64', 'CompositeException',
  74. 'Condition', 'Cptrdiff_t', 'Cshort', 'Csize_t', 'Cssize_t',
  75. 'Cstring', 'Cuchar', 'Cuint', 'Cuintmax_t', 'Culong',
  76. 'Culonglong', 'Cushort', 'Cwchar_t', 'Cwstring', 'DataType',
  77. 'Date', 'DateTime', 'DenseArray', 'DenseMatrix',
  78. 'DenseVecOrMat', 'DenseVector', 'Diagonal', 'Dict',
  79. 'DimensionMismatch', 'Dims', 'DirectIndexString', 'Display',
  80. 'DivideError', 'DomainError', 'EOFError', 'EachLine', 'Enum',
  81. 'Enumerate', 'ErrorException', 'Exception', 'Expr',
  82. 'Factorization', 'FileMonitor', 'FileOffset', 'Filter',
  83. 'Float16', 'Float32', 'Float64', 'FloatRange', 'Function',
  84. 'GenSym', 'GlobalRef', 'GotoNode', 'HTML', 'Hermitian', 'IO',
  85. 'IOBuffer', 'IOStream', 'IPv4', 'IPv6', 'InexactError',
  86. 'InitError', 'Int', 'Int128', 'Int16', 'Int32', 'Int64', 'Int8',
  87. 'IntSet', 'Integer', 'InterruptException', 'IntrinsicFunction',
  88. 'InvalidStateException', 'Irrational', 'KeyError', 'LabelNode',
  89. 'LambdaStaticData', 'LinSpace', 'LineNumberNode', 'LoadError',
  90. 'LocalProcess', 'LowerTriangular', 'MIME', 'Matrix',
  91. 'MersenneTwister', 'Method', 'MethodError', 'MethodTable',
  92. 'Module', 'NTuple', 'NewvarNode', 'NullException', 'Nullable',
  93. 'Number', 'ObjectIdDict', 'OrdinalRange', 'OutOfMemoryError',
  94. 'OverflowError', 'Pair', 'ParseError', 'PartialQuickSort',
  95. 'Pipe', 'PollingFileWatcher', 'ProcessExitedException',
  96. 'ProcessGroup', 'Ptr', 'QuoteNode', 'RandomDevice', 'Range',
  97. 'Rational', 'RawFD', 'ReadOnlyMemoryError', 'Real',
  98. 'ReentrantLock', 'Ref', 'Regex', 'RegexMatch',
  99. 'RemoteException', 'RemoteRef', 'RepString', 'RevString',
  100. 'RopeString', 'RoundingMode', 'SegmentationFault',
  101. 'SerializationState', 'Set', 'SharedArray', 'SharedMatrix',
  102. 'SharedVector', 'Signed', 'SimpleVector', 'SparseMatrixCSC',
  103. 'StackOverflowError', 'StatStruct', 'StepRange', 'StridedArray',
  104. 'StridedMatrix', 'StridedVecOrMat', 'StridedVector', 'SubArray',
  105. 'SubString', 'SymTridiagonal', 'Symbol', 'SymbolNode',
  106. 'Symmetric', 'SystemError', 'TCPSocket', 'Task', 'Text',
  107. 'TextDisplay', 'Timer', 'TopNode', 'Tridiagonal', 'Tuple',
  108. 'Type', 'TypeConstructor', 'TypeError', 'TypeName', 'TypeVar',
  109. 'UDPSocket', 'UInt', 'UInt128', 'UInt16', 'UInt32', 'UInt64',
  110. 'UInt8', 'UTF16String', 'UTF32String', 'UTF8String',
  111. 'UndefRefError', 'UndefVarError', 'UnicodeError', 'UniformScaling',
  112. 'Union', 'UnitRange', 'Unsigned', 'UpperTriangular', 'Val',
  113. 'Vararg', 'VecOrMat', 'Vector', 'VersionNumber', 'Void', 'WString',
  114. 'WeakKeyDict', 'WeakRef', 'WorkerConfig', 'Zip'], suffix=r'\b'),
  115. Keyword.Type),
  116. # builtins
  117. (words([
  118. u'ARGS', u'CPU_CORES', u'C_NULL', u'DevNull', u'ENDIAN_BOM',
  119. u'ENV', u'I', u'Inf', u'Inf16', u'Inf32', u'Inf64',
  120. u'InsertionSort', u'JULIA_HOME', u'LOAD_PATH', u'MergeSort',
  121. u'NaN', u'NaN16', u'NaN32', u'NaN64', u'OS_NAME',
  122. u'QuickSort', u'RoundDown', u'RoundFromZero', u'RoundNearest',
  123. u'RoundNearestTiesAway', u'RoundNearestTiesUp',
  124. u'RoundToZero', u'RoundUp', u'STDERR', u'STDIN', u'STDOUT',
  125. u'VERSION', u'WORD_SIZE', u'catalan', u'e', u'eu',
  126. u'eulergamma', u'golden', u'im', u'nothing', u'pi', u'γ',
  127. u'π', u'φ'],
  128. suffix=r'\b'), Name.Builtin),
  129. # operators
  130. # see: https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm
  131. (words([
  132. # prec-assignment
  133. u'=', u':=', u'+=', u'-=', u'*=', u'/=', u'//=', u'.//=', u'.*=', u'./=',
  134. u'\\=', u'.\\=', u'^=', u'.^=', u'÷=', u'.÷=', u'%=', u'.%=', u'|=', u'&=',
  135. u'$=', u'=>', u'<<=', u'>>=', u'>>>=', u'~', u'.+=', u'.-=',
  136. # prec-conditional
  137. u'?',
  138. # prec-arrow
  139. u'--', u'-->',
  140. # prec-lazy-or
  141. u'||',
  142. # prec-lazy-and
  143. u'&&',
  144. # prec-comparison
  145. u'>', u'<', u'>=', u'≥', u'<=', u'≤', u'==', u'===', u'≡', u'!=', u'≠',
  146. u'!==', u'≢', u'.>', u'.<', u'.>=', u'.≥', u'.<=', u'.≤', u'.==', u'.!=',
  147. u'.≠', u'.=', u'.!', u'<:', u'>:', u'∈', u'∉', u'∋', u'∌', u'⊆',
  148. u'⊈', u'⊂',
  149. u'⊄', u'⊊',
  150. # prec-pipe
  151. u'|>', u'<|',
  152. # prec-colon
  153. u':',
  154. # prec-plus
  155. u'+', u'-', u'.+', u'.-', u'|', u'∪', u'$',
  156. # prec-bitshift
  157. u'<<', u'>>', u'>>>', u'.<<', u'.>>', u'.>>>',
  158. # prec-times
  159. u'*', u'/', u'./', u'÷', u'.÷', u'%', u'⋅', u'.%', u'.*', u'\\', u'.\\', u'&', u'∩',
  160. # prec-rational
  161. u'//', u'.//',
  162. # prec-power
  163. u'^', u'.^',
  164. # prec-decl
  165. u'::',
  166. # prec-dot
  167. u'.',
  168. # unary op
  169. u'+', u'-', u'!', u'√', u'∛', u'∜'
  170. ]), Operator),
  171. # chars
  172. (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|"
  173. r"\\U[a-fA-F0-9]{1,6}|[^\\\'\n])'", String.Char),
  174. # try to match trailing transpose
  175. (r'(?<=[.\w)\]])\'+', Operator),
  176. # strings
  177. (r'"""', String, 'tqstring'),
  178. (r'"', String, 'string'),
  179. # regular expressions
  180. (r'r"""', String.Regex, 'tqregex'),
  181. (r'r"', String.Regex, 'regex'),
  182. # backticks
  183. (r'`', String.Backtick, 'command'),
  184. # names
  185. (allowed_variable, Name),
  186. (r'@' + allowed_variable, Name.Decorator),
  187. # numbers
  188. (r'(\d+(_\d+)+\.\d*|\d*\.\d+(_\d+)+)([eEf][+-]?[0-9]+)?', Number.Float),
  189. (r'(\d+\.\d*|\d*\.\d+)([eEf][+-]?[0-9]+)?', Number.Float),
  190. (r'\d+(_\d+)+[eEf][+-]?[0-9]+', Number.Float),
  191. (r'\d+[eEf][+-]?[0-9]+', Number.Float),
  192. (r'0b[01]+(_[01]+)+', Number.Bin),
  193. (r'0b[01]+', Number.Bin),
  194. (r'0o[0-7]+(_[0-7]+)+', Number.Oct),
  195. (r'0o[0-7]+', Number.Oct),
  196. (r'0x[a-fA-F0-9]+(_[a-fA-F0-9]+)+', Number.Hex),
  197. (r'0x[a-fA-F0-9]+', Number.Hex),
  198. (r'\d+(_\d+)+', Number.Integer),
  199. (r'\d+', Number.Integer)
  200. ],
  201. "blockcomment": [
  202. (r'[^=#]', Comment.Multiline),
  203. (r'#=', Comment.Multiline, '#push'),
  204. (r'=#', Comment.Multiline, '#pop'),
  205. (r'[=#]', Comment.Multiline),
  206. ],
  207. 'string': [
  208. (r'"', String, '#pop'),
  209. # FIXME: This escape pattern is not perfect.
  210. (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
  211. # Interpolation is defined as "$" followed by the shortest full
  212. # expression, which is something we can't parse.
  213. # Include the most common cases here: $word, and $(paren'd expr).
  214. (r'\$' + allowed_variable, String.Interpol),
  215. # (r'\$[a-zA-Z_]+', String.Interpol),
  216. (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
  217. # @printf and @sprintf formats
  218. (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]',
  219. String.Interpol),
  220. (r'.|\s', String),
  221. ],
  222. 'tqstring': [
  223. (r'"""', String, '#pop'),
  224. (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
  225. (r'\$' + allowed_variable, String.Interpol),
  226. (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
  227. (r'.|\s', String),
  228. ],
  229. 'regex': [
  230. (r'"', String.Regex, '#pop'),
  231. (r'\\"', String.Regex),
  232. (r'.|\s', String.Regex),
  233. ],
  234. 'tqregex': [
  235. (r'"""', String.Regex, '#pop'),
  236. (r'.|\s', String.Regex),
  237. ],
  238. 'command': [
  239. (r'`', String.Backtick, '#pop'),
  240. (r'\$' + allowed_variable, String.Interpol),
  241. (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
  242. (r'.|\s', String.Backtick)
  243. ],
  244. 'in-intp': [
  245. (r'\(', Punctuation, '#push'),
  246. (r'\)', Punctuation, '#pop'),
  247. include('root'),
  248. ]
  249. }
  250. def analyse_text(text):
  251. return shebang_matches(text, r'julia')
  252. class JuliaConsoleLexer(Lexer):
  253. """
  254. For Julia console sessions. Modeled after MatlabSessionLexer.
  255. .. versionadded:: 1.6
  256. """
  257. name = 'Julia console'
  258. aliases = ['jlcon']
  259. def get_tokens_unprocessed(self, text):
  260. jllexer = JuliaLexer(**self.options)
  261. start = 0
  262. curcode = ''
  263. insertions = []
  264. output = False
  265. error = False
  266. for line in text.splitlines(True):
  267. if line.startswith('julia>'):
  268. insertions.append((len(curcode), [(0, Generic.Prompt, line[:6])]))
  269. curcode += line[6:]
  270. output = False
  271. error = False
  272. elif line.startswith('help?>') or line.startswith('shell>'):
  273. yield start, Generic.Prompt, line[:6]
  274. yield start + 6, Text, line[6:]
  275. output = False
  276. error = False
  277. elif line.startswith(' ') and not output:
  278. insertions.append((len(curcode), [(0, Text, line[:6])]))
  279. curcode += line[6:]
  280. else:
  281. if curcode:
  282. for item in do_insertions(
  283. insertions, jllexer.get_tokens_unprocessed(curcode)):
  284. yield item
  285. curcode = ''
  286. insertions = []
  287. if line.startswith('ERROR: ') or error:
  288. yield start, Generic.Error, line
  289. error = True
  290. else:
  291. yield start, Generic.Output, line
  292. output = True
  293. start += len(line)
  294. if curcode:
  295. for item in do_insertions(
  296. insertions, jllexer.get_tokens_unprocessed(curcode)):
  297. yield item