ml.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.ml
  4. ~~~~~~~~~~~~~~~~~~
  5. Lexers for ML family languages.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import RegexLexer, include, bygroups, default, words
  11. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  12. Number, Punctuation, Error
  13. __all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer']
  14. class SMLLexer(RegexLexer):
  15. """
  16. For the Standard ML language.
  17. .. versionadded:: 1.5
  18. """
  19. name = 'Standard ML'
  20. aliases = ['sml']
  21. filenames = ['*.sml', '*.sig', '*.fun']
  22. mimetypes = ['text/x-standardml', 'application/x-standardml']
  23. alphanumid_reserved = {
  24. # Core
  25. 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
  26. 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
  27. 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
  28. 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
  29. # Modules
  30. 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
  31. 'struct', 'structure', 'where',
  32. }
  33. symbolicid_reserved = {
  34. # Core
  35. ':', r'\|', '=', '=>', '->', '#',
  36. # Modules
  37. ':>',
  38. }
  39. nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'}
  40. alphanumid_re = r"[a-zA-Z][\w']*"
  41. symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
  42. # A character constant is a sequence of the form #s, where s is a string
  43. # constant denoting a string of size one character. This setup just parses
  44. # the entire string as either a String.Double or a String.Char (depending
  45. # on the argument), even if the String.Char is an erronous
  46. # multiple-character string.
  47. def stringy(whatkind):
  48. return [
  49. (r'[^"\\]', whatkind),
  50. (r'\\[\\"abtnvfr]', String.Escape),
  51. # Control-character notation is used for codes < 32,
  52. # where \^@ == \000
  53. (r'\\\^[\x40-\x5e]', String.Escape),
  54. # Docs say 'decimal digits'
  55. (r'\\[0-9]{3}', String.Escape),
  56. (r'\\u[0-9a-fA-F]{4}', String.Escape),
  57. (r'\\\s+\\', String.Interpol),
  58. (r'"', whatkind, '#pop'),
  59. ]
  60. # Callbacks for distinguishing tokens and reserved words
  61. def long_id_callback(self, match):
  62. if match.group(1) in self.alphanumid_reserved:
  63. token = Error
  64. else:
  65. token = Name.Namespace
  66. yield match.start(1), token, match.group(1)
  67. yield match.start(2), Punctuation, match.group(2)
  68. def end_id_callback(self, match):
  69. if match.group(1) in self.alphanumid_reserved:
  70. token = Error
  71. elif match.group(1) in self.symbolicid_reserved:
  72. token = Error
  73. else:
  74. token = Name
  75. yield match.start(1), token, match.group(1)
  76. def id_callback(self, match):
  77. str = match.group(1)
  78. if str in self.alphanumid_reserved:
  79. token = Keyword.Reserved
  80. elif str in self.symbolicid_reserved:
  81. token = Punctuation
  82. else:
  83. token = Name
  84. yield match.start(1), token, str
  85. tokens = {
  86. # Whitespace and comments are (almost) everywhere
  87. 'whitespace': [
  88. (r'\s+', Text),
  89. (r'\(\*', Comment.Multiline, 'comment'),
  90. ],
  91. 'delimiters': [
  92. # This lexer treats these delimiters specially:
  93. # Delimiters define scopes, and the scope is how the meaning of
  94. # the `|' is resolved - is it a case/handle expression, or function
  95. # definition by cases? (This is not how the Definition works, but
  96. # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
  97. (r'\(|\[|\{', Punctuation, 'main'),
  98. (r'\)|\]|\}', Punctuation, '#pop'),
  99. (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
  100. (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
  101. (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
  102. ],
  103. 'core': [
  104. # Punctuation that doesn't overlap symbolic identifiers
  105. (r'(%s)' % '|'.join(re.escape(z) for z in nonid_reserved),
  106. Punctuation),
  107. # Special constants: strings, floats, numbers in decimal and hex
  108. (r'#"', String.Char, 'char'),
  109. (r'"', String.Double, 'string'),
  110. (r'~?0x[0-9a-fA-F]+', Number.Hex),
  111. (r'0wx[0-9a-fA-F]+', Number.Hex),
  112. (r'0w\d+', Number.Integer),
  113. (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
  114. (r'~?\d+\.\d+', Number.Float),
  115. (r'~?\d+[eE]~?\d+', Number.Float),
  116. (r'~?\d+', Number.Integer),
  117. # Labels
  118. (r'#\s*[1-9][0-9]*', Name.Label),
  119. (r'#\s*(%s)' % alphanumid_re, Name.Label),
  120. (r'#\s+(%s)' % symbolicid_re, Name.Label),
  121. # Some reserved words trigger a special, local lexer state change
  122. (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
  123. (r'(?=\b(exception)\b(?!\'))', Text, ('ename')),
  124. (r'\b(functor|include|open|signature|structure)\b(?!\')',
  125. Keyword.Reserved, 'sname'),
  126. (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
  127. # Regular identifiers, long and otherwise
  128. (r'\'[\w\']*', Name.Decorator),
  129. (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),
  130. (r'(%s)' % alphanumid_re, id_callback),
  131. (r'(%s)' % symbolicid_re, id_callback),
  132. ],
  133. 'dotted': [
  134. (r'(%s)(\.)' % alphanumid_re, long_id_callback),
  135. (r'(%s)' % alphanumid_re, end_id_callback, "#pop"),
  136. (r'(%s)' % symbolicid_re, end_id_callback, "#pop"),
  137. (r'\s+', Error),
  138. (r'\S+', Error),
  139. ],
  140. # Main parser (prevents errors in files that have scoping errors)
  141. 'root': [
  142. default('main')
  143. ],
  144. # In this scope, I expect '|' to not be followed by a function name,
  145. # and I expect 'and' to be followed by a binding site
  146. 'main': [
  147. include('whitespace'),
  148. # Special behavior of val/and/fun
  149. (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
  150. (r'\b(fun)\b(?!\')', Keyword.Reserved,
  151. ('#pop', 'main-fun', 'fname')),
  152. include('delimiters'),
  153. include('core'),
  154. (r'\S+', Error),
  155. ],
  156. # In this scope, I expect '|' and 'and' to be followed by a function
  157. 'main-fun': [
  158. include('whitespace'),
  159. (r'\s', Text),
  160. (r'\(\*', Comment.Multiline, 'comment'),
  161. # Special behavior of val/and/fun
  162. (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
  163. (r'\b(val)\b(?!\')', Keyword.Reserved,
  164. ('#pop', 'main', 'vname')),
  165. # Special behavior of '|' and '|'-manipulating keywords
  166. (r'\|', Punctuation, 'fname'),
  167. (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
  168. ('#pop', 'main')),
  169. include('delimiters'),
  170. include('core'),
  171. (r'\S+', Error),
  172. ],
  173. # Character and string parsers
  174. 'char': stringy(String.Char),
  175. 'string': stringy(String.Double),
  176. 'breakout': [
  177. (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),
  178. ],
  179. # Dealing with what comes after module system keywords
  180. 'sname': [
  181. include('whitespace'),
  182. include('breakout'),
  183. (r'(%s)' % alphanumid_re, Name.Namespace),
  184. default('#pop'),
  185. ],
  186. # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
  187. 'fname': [
  188. include('whitespace'),
  189. (r'\'[\w\']*', Name.Decorator),
  190. (r'\(', Punctuation, 'tyvarseq'),
  191. (r'(%s)' % alphanumid_re, Name.Function, '#pop'),
  192. (r'(%s)' % symbolicid_re, Name.Function, '#pop'),
  193. # Ignore interesting function declarations like "fun (x + y) = ..."
  194. default('#pop'),
  195. ],
  196. # Dealing with what comes after the 'val' (or 'and') keyword
  197. 'vname': [
  198. include('whitespace'),
  199. (r'\'[\w\']*', Name.Decorator),
  200. (r'\(', Punctuation, 'tyvarseq'),
  201. (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),
  202. bygroups(Name.Variable, Text, Punctuation), '#pop'),
  203. (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),
  204. bygroups(Name.Variable, Text, Punctuation), '#pop'),
  205. (r'(%s)' % alphanumid_re, Name.Variable, '#pop'),
  206. (r'(%s)' % symbolicid_re, Name.Variable, '#pop'),
  207. # Ignore interesting patterns like 'val (x, y)'
  208. default('#pop'),
  209. ],
  210. # Dealing with what comes after the 'type' (or 'and') keyword
  211. 'tname': [
  212. include('whitespace'),
  213. include('breakout'),
  214. (r'\'[\w\']*', Name.Decorator),
  215. (r'\(', Punctuation, 'tyvarseq'),
  216. (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),
  217. (r'(%s)' % alphanumid_re, Keyword.Type),
  218. (r'(%s)' % symbolicid_re, Keyword.Type),
  219. (r'\S+', Error, '#pop'),
  220. ],
  221. # A type binding includes most identifiers
  222. 'typbind': [
  223. include('whitespace'),
  224. (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
  225. include('breakout'),
  226. include('core'),
  227. (r'\S+', Error, '#pop'),
  228. ],
  229. # Dealing with what comes after the 'datatype' (or 'and') keyword
  230. 'dname': [
  231. include('whitespace'),
  232. include('breakout'),
  233. (r'\'[\w\']*', Name.Decorator),
  234. (r'\(', Punctuation, 'tyvarseq'),
  235. (r'(=)(\s*)(datatype)',
  236. bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
  237. (r'=(?!%s)' % symbolicid_re, Punctuation,
  238. ('#pop', 'datbind', 'datcon')),
  239. (r'(%s)' % alphanumid_re, Keyword.Type),
  240. (r'(%s)' % symbolicid_re, Keyword.Type),
  241. (r'\S+', Error, '#pop'),
  242. ],
  243. # common case - A | B | C of int
  244. 'datbind': [
  245. include('whitespace'),
  246. (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
  247. (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
  248. (r'\b(of)\b(?!\')', Keyword.Reserved),
  249. (r'(\|)(\s*)(%s)' % alphanumid_re,
  250. bygroups(Punctuation, Text, Name.Class)),
  251. (r'(\|)(\s+)(%s)' % symbolicid_re,
  252. bygroups(Punctuation, Text, Name.Class)),
  253. include('breakout'),
  254. include('core'),
  255. (r'\S+', Error),
  256. ],
  257. # Dealing with what comes after an exception
  258. 'ename': [
  259. include('whitespace'),
  260. (r'(exception|and)\b(\s+)(%s)' % alphanumid_re,
  261. bygroups(Keyword.Reserved, Text, Name.Class)),
  262. (r'(exception|and)\b(\s*)(%s)' % symbolicid_re,
  263. bygroups(Keyword.Reserved, Text, Name.Class)),
  264. (r'\b(of)\b(?!\')', Keyword.Reserved),
  265. include('breakout'),
  266. include('core'),
  267. (r'\S+', Error),
  268. ],
  269. 'datcon': [
  270. include('whitespace'),
  271. (r'(%s)' % alphanumid_re, Name.Class, '#pop'),
  272. (r'(%s)' % symbolicid_re, Name.Class, '#pop'),
  273. (r'\S+', Error, '#pop'),
  274. ],
  275. # Series of type variables
  276. 'tyvarseq': [
  277. (r'\s', Text),
  278. (r'\(\*', Comment.Multiline, 'comment'),
  279. (r'\'[\w\']*', Name.Decorator),
  280. (alphanumid_re, Name),
  281. (r',', Punctuation),
  282. (r'\)', Punctuation, '#pop'),
  283. (symbolicid_re, Name),
  284. ],
  285. 'comment': [
  286. (r'[^(*)]', Comment.Multiline),
  287. (r'\(\*', Comment.Multiline, '#push'),
  288. (r'\*\)', Comment.Multiline, '#pop'),
  289. (r'[(*)]', Comment.Multiline),
  290. ],
  291. }
  292. class OcamlLexer(RegexLexer):
  293. """
  294. For the OCaml language.
  295. .. versionadded:: 0.7
  296. """
  297. name = 'OCaml'
  298. aliases = ['ocaml']
  299. filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
  300. mimetypes = ['text/x-ocaml']
  301. keywords = (
  302. 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
  303. 'downto', 'else', 'end', 'exception', 'external', 'false',
  304. 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
  305. 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
  306. 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
  307. 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
  308. 'type', 'value', 'val', 'virtual', 'when', 'while', 'with',
  309. )
  310. keyopts = (
  311. '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
  312. r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
  313. '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
  314. r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~'
  315. )
  316. operators = r'[!$%&*+\./:<=>?@^|~-]'
  317. word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')
  318. prefix_syms = r'[!?~]'
  319. infix_syms = r'[=<>@^|&+\*/$%-]'
  320. primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
  321. tokens = {
  322. 'escape-sequence': [
  323. (r'\\[\\"\'ntbr]', String.Escape),
  324. (r'\\[0-9]{3}', String.Escape),
  325. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  326. ],
  327. 'root': [
  328. (r'\s+', Text),
  329. (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
  330. (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
  331. (r'\b([A-Z][\w\']*)', Name.Class),
  332. (r'\(\*(?![)])', Comment, 'comment'),
  333. (r'\b(%s)\b' % '|'.join(keywords), Keyword),
  334. (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
  335. (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
  336. (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
  337. (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
  338. (r"[^\W\d][\w']*", Name),
  339. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  340. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  341. (r'0[oO][0-7][0-7_]*', Number.Oct),
  342. (r'0[bB][01][01_]*', Number.Bin),
  343. (r'\d[\d_]*', Number.Integer),
  344. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  345. String.Char),
  346. (r"'.'", String.Char),
  347. (r"'", Keyword), # a stray quote is another syntax element
  348. (r'"', String.Double, 'string'),
  349. (r'[~?][a-z][\w\']*:', Name.Variable),
  350. ],
  351. 'comment': [
  352. (r'[^(*)]+', Comment),
  353. (r'\(\*', Comment, '#push'),
  354. (r'\*\)', Comment, '#pop'),
  355. (r'[(*)]', Comment),
  356. ],
  357. 'string': [
  358. (r'[^\\"]+', String.Double),
  359. include('escape-sequence'),
  360. (r'\\\n', String.Double),
  361. (r'"', String.Double, '#pop'),
  362. ],
  363. 'dotted': [
  364. (r'\s+', Text),
  365. (r'\.', Punctuation),
  366. (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
  367. (r'[A-Z][\w\']*', Name.Class, '#pop'),
  368. (r'[a-z_][\w\']*', Name, '#pop'),
  369. default('#pop'),
  370. ],
  371. }
  372. class OpaLexer(RegexLexer):
  373. """
  374. Lexer for the Opa language (http://opalang.org).
  375. .. versionadded:: 1.5
  376. """
  377. name = 'Opa'
  378. aliases = ['opa']
  379. filenames = ['*.opa']
  380. mimetypes = ['text/x-opa']
  381. # most of these aren't strictly keywords
  382. # but if you color only real keywords, you might just
  383. # as well not color anything
  384. keywords = (
  385. 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do',
  386. 'else', 'end', 'external', 'forall', 'function', 'if', 'import',
  387. 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then',
  388. 'type', 'val', 'with', 'xml_parser',
  389. )
  390. # matches both stuff and `stuff`
  391. ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
  392. op_re = r'[.=\-<>,@~%/+?*&^!]'
  393. punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere
  394. # because they are also used for inserts
  395. tokens = {
  396. # copied from the caml lexer, should be adapted
  397. 'escape-sequence': [
  398. (r'\\[\\"\'ntr}]', String.Escape),
  399. (r'\\[0-9]{3}', String.Escape),
  400. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  401. ],
  402. # factorizing these rules, because they are inserted many times
  403. 'comments': [
  404. (r'/\*', Comment, 'nested-comment'),
  405. (r'//.*?$', Comment),
  406. ],
  407. 'comments-and-spaces': [
  408. include('comments'),
  409. (r'\s+', Text),
  410. ],
  411. 'root': [
  412. include('comments-and-spaces'),
  413. # keywords
  414. (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
  415. # directives
  416. # we could parse the actual set of directives instead of anything
  417. # starting with @, but this is troublesome
  418. # because it needs to be adjusted all the time
  419. # and assuming we parse only sources that compile, it is useless
  420. (r'@' + ident_re + r'\b', Name.Builtin.Pseudo),
  421. # number literals
  422. (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),
  423. (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),
  424. (r'-?\d+[eE][+\-]?\d+', Number.Float),
  425. (r'0[xX][\da-fA-F]+', Number.Hex),
  426. (r'0[oO][0-7]+', Number.Oct),
  427. (r'0[bB][01]+', Number.Bin),
  428. (r'\d+', Number.Integer),
  429. # color literals
  430. (r'#[\da-fA-F]{3,6}', Number.Integer),
  431. # string literals
  432. (r'"', String.Double, 'string'),
  433. # char literal, should be checked because this is the regexp from
  434. # the caml lexer
  435. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
  436. String.Char),
  437. # this is meant to deal with embedded exprs in strings
  438. # every time we find a '}' we pop a state so that if we were
  439. # inside a string, we are back in the string state
  440. # as a consequence, we must also push a state every time we find a
  441. # '{' or else we will have errors when parsing {} for instance
  442. (r'\{', Operator, '#push'),
  443. (r'\}', Operator, '#pop'),
  444. # html literals
  445. # this is a much more strict that the actual parser,
  446. # since a<b would not be parsed as html
  447. # but then again, the parser is way too lax, and we can't hope
  448. # to have something as tolerant
  449. (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
  450. # db path
  451. # matching the '[_]' in '/a[_]' because it is a part
  452. # of the syntax of the db path definition
  453. # unfortunately, i don't know how to match the ']' in
  454. # /a[1], so this is somewhat inconsistent
  455. (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),
  456. # putting the same color on <- as on db path, since
  457. # it can be used only to mean Db.write
  458. (r'<-(?!'+op_re+r')', Name.Variable),
  459. # 'modules'
  460. # although modules are not distinguished by their names as in caml
  461. # the standard library seems to follow the convention that modules
  462. # only area capitalized
  463. (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
  464. # operators
  465. # = has a special role because this is the only
  466. # way to syntactic distinguish binding constructions
  467. # unfortunately, this colors the equal in {x=2} too
  468. (r'=(?!'+op_re+r')', Keyword),
  469. (r'(%s)+' % op_re, Operator),
  470. (r'(%s)+' % punc_re, Operator),
  471. # coercions
  472. (r':', Operator, 'type'),
  473. # type variables
  474. # we need this rule because we don't parse specially type
  475. # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
  476. ("'"+ident_re, Keyword.Type),
  477. # id literal, #something, or #{expr}
  478. (r'#'+ident_re, String.Single),
  479. (r'#(?=\{)', String.Single),
  480. # identifiers
  481. # this avoids to color '2' in 'a2' as an integer
  482. (ident_re, Text),
  483. # default, not sure if that is needed or not
  484. # (r'.', Text),
  485. ],
  486. # it is quite painful to have to parse types to know where they end
  487. # this is the general rule for a type
  488. # a type is either:
  489. # * -> ty
  490. # * type-with-slash
  491. # * type-with-slash -> ty
  492. # * type-with-slash (, type-with-slash)+ -> ty
  493. #
  494. # the code is pretty funky in here, but this code would roughly
  495. # translate in caml to:
  496. # let rec type stream =
  497. # match stream with
  498. # | [< "->"; stream >] -> type stream
  499. # | [< ""; stream >] ->
  500. # type_with_slash stream
  501. # type_lhs_1 stream;
  502. # and type_1 stream = ...
  503. 'type': [
  504. include('comments-and-spaces'),
  505. (r'->', Keyword.Type),
  506. default(('#pop', 'type-lhs-1', 'type-with-slash')),
  507. ],
  508. # parses all the atomic or closed constructions in the syntax of type
  509. # expressions: record types, tuple types, type constructors, basic type
  510. # and type variables
  511. 'type-1': [
  512. include('comments-and-spaces'),
  513. (r'\(', Keyword.Type, ('#pop', 'type-tuple')),
  514. (r'~?\{', Keyword.Type, ('#pop', 'type-record')),
  515. (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
  516. (ident_re, Keyword.Type, '#pop'),
  517. ("'"+ident_re, Keyword.Type),
  518. # this case is not in the syntax but sometimes
  519. # we think we are parsing types when in fact we are parsing
  520. # some css, so we just pop the states until we get back into
  521. # the root state
  522. default('#pop'),
  523. ],
  524. # type-with-slash is either:
  525. # * type-1
  526. # * type-1 (/ type-1)+
  527. 'type-with-slash': [
  528. include('comments-and-spaces'),
  529. default(('#pop', 'slash-type-1', 'type-1')),
  530. ],
  531. 'slash-type-1': [
  532. include('comments-and-spaces'),
  533. ('/', Keyword.Type, ('#pop', 'type-1')),
  534. # same remark as above
  535. default('#pop'),
  536. ],
  537. # we go in this state after having parsed a type-with-slash
  538. # while trying to parse a type
  539. # and at this point we must determine if we are parsing an arrow
  540. # type (in which case we must continue parsing) or not (in which
  541. # case we stop)
  542. 'type-lhs-1': [
  543. include('comments-and-spaces'),
  544. (r'->', Keyword.Type, ('#pop', 'type')),
  545. (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
  546. default('#pop'),
  547. ],
  548. 'type-arrow': [
  549. include('comments-and-spaces'),
  550. # the look ahead here allows to parse f(x : int, y : float -> truc)
  551. # correctly
  552. (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
  553. (r'->', Keyword.Type, ('#pop', 'type')),
  554. # same remark as above
  555. default('#pop'),
  556. ],
  557. # no need to do precise parsing for tuples and records
  558. # because they are closed constructions, so we can simply
  559. # find the closing delimiter
  560. # note that this function would be not work if the source
  561. # contained identifiers like `{)` (although it could be patched
  562. # to support it)
  563. 'type-tuple': [
  564. include('comments-and-spaces'),
  565. (r'[^()/*]+', Keyword.Type),
  566. (r'[/*]', Keyword.Type),
  567. (r'\(', Keyword.Type, '#push'),
  568. (r'\)', Keyword.Type, '#pop'),
  569. ],
  570. 'type-record': [
  571. include('comments-and-spaces'),
  572. (r'[^{}/*]+', Keyword.Type),
  573. (r'[/*]', Keyword.Type),
  574. (r'\{', Keyword.Type, '#push'),
  575. (r'\}', Keyword.Type, '#pop'),
  576. ],
  577. # 'type-tuple': [
  578. # include('comments-and-spaces'),
  579. # (r'\)', Keyword.Type, '#pop'),
  580. # default(('#pop', 'type-tuple-1', 'type-1')),
  581. # ],
  582. # 'type-tuple-1': [
  583. # include('comments-and-spaces'),
  584. # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)
  585. # (r',', Keyword.Type, 'type-1'),
  586. # ],
  587. # 'type-record':[
  588. # include('comments-and-spaces'),
  589. # (r'\}', Keyword.Type, '#pop'),
  590. # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),
  591. # ],
  592. # 'type-record-field-expr': [
  593. #
  594. # ],
  595. 'nested-comment': [
  596. (r'[^/*]+', Comment),
  597. (r'/\*', Comment, '#push'),
  598. (r'\*/', Comment, '#pop'),
  599. (r'[/*]', Comment),
  600. ],
  601. # the copy pasting between string and single-string
  602. # is kinda sad. Is there a way to avoid that??
  603. 'string': [
  604. (r'[^\\"{]+', String.Double),
  605. (r'"', String.Double, '#pop'),
  606. (r'\{', Operator, 'root'),
  607. include('escape-sequence'),
  608. ],
  609. 'single-string': [
  610. (r'[^\\\'{]+', String.Double),
  611. (r'\'', String.Double, '#pop'),
  612. (r'\{', Operator, 'root'),
  613. include('escape-sequence'),
  614. ],
  615. # all the html stuff
  616. # can't really reuse some existing html parser
  617. # because we must be able to parse embedded expressions
  618. # we are in this state after someone parsed the '<' that
  619. # started the html literal
  620. 'html-open-tag': [
  621. (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
  622. (r'>', String.Single, ('#pop', 'html-content')),
  623. ],
  624. # we are in this state after someone parsed the '</' that
  625. # started the end of the closing tag
  626. 'html-end-tag': [
  627. # this is a star, because </> is allowed
  628. (r'[\w\-:]*>', String.Single, '#pop'),
  629. ],
  630. # we are in this state after having parsed '<ident(:ident)?'
  631. # we thus parse a possibly empty list of attributes
  632. 'html-attr': [
  633. (r'\s+', Text),
  634. (r'[\w\-:]+=', String.Single, 'html-attr-value'),
  635. (r'/>', String.Single, '#pop'),
  636. (r'>', String.Single, ('#pop', 'html-content')),
  637. ],
  638. 'html-attr-value': [
  639. (r"'", String.Single, ('#pop', 'single-string')),
  640. (r'"', String.Single, ('#pop', 'string')),
  641. (r'#'+ident_re, String.Single, '#pop'),
  642. (r'#(?=\{)', String.Single, ('#pop', 'root')),
  643. (r'[^"\'{`=<>]+', String.Single, '#pop'),
  644. (r'\{', Operator, ('#pop', 'root')), # this is a tail call!
  645. ],
  646. # we should probably deal with '\' escapes here
  647. 'html-content': [
  648. (r'<!--', Comment, 'html-comment'),
  649. (r'</', String.Single, ('#pop', 'html-end-tag')),
  650. (r'<', String.Single, 'html-open-tag'),
  651. (r'\{', Operator, 'root'),
  652. (r'[^<{]+', String.Single),
  653. ],
  654. 'html-comment': [
  655. (r'-->', Comment, '#pop'),
  656. (r'[^\-]+|-', Comment),
  657. ],
  658. }