ml.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958
  1. """
  2. pygments.lexers.ml
  3. ~~~~~~~~~~~~~~~~~~
  4. Lexers for ML family languages.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, bygroups, default, words
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  11. Number, Punctuation, Error
  12. __all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer']
  13. class SMLLexer(RegexLexer):
  14. """
  15. For the Standard ML language.
  16. """
  17. name = 'Standard ML'
  18. aliases = ['sml']
  19. filenames = ['*.sml', '*.sig', '*.fun']
  20. mimetypes = ['text/x-standardml', 'application/x-standardml']
  21. url = 'https://en.wikipedia.org/wiki/Standard_ML'
  22. version_added = '1.5'
  23. alphanumid_reserved = {
  24. # Core
  25. 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
  26. 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
  27. 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
  28. 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
  29. # Modules
  30. 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
  31. 'struct', 'structure', 'where',
  32. }
  33. symbolicid_reserved = {
  34. # Core
  35. ':', r'\|', '=', '=>', '->', '#',
  36. # Modules
  37. ':>',
  38. }
  39. nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'}
  40. alphanumid_re = r"[a-zA-Z][\w']*"
  41. symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
  42. # A character constant is a sequence of the form #s, where s is a string
  43. # constant denoting a string of size one character. This setup just parses
  44. # the entire string as either a String.Double or a String.Char (depending
  45. # on the argument), even if the String.Char is an erroneous
  46. # multiple-character string.
  47. def stringy(whatkind):
  48. return [
  49. (r'[^"\\]', whatkind),
  50. (r'\\[\\"abtnvfr]', String.Escape),
  51. # Control-character notation is used for codes < 32,
  52. # where \^@ == \000
  53. (r'\\\^[\x40-\x5e]', String.Escape),
  54. # Docs say 'decimal digits'
  55. (r'\\[0-9]{3}', String.Escape),
  56. (r'\\u[0-9a-fA-F]{4}', String.Escape),
  57. (r'\\\s+\\', String.Interpol),
  58. (r'"', whatkind, '#pop'),
  59. ]
  60. # Callbacks for distinguishing tokens and reserved words
  61. def long_id_callback(self, match):
  62. if match.group(1) in self.alphanumid_reserved:
  63. token = Error
  64. else:
  65. token = Name.Namespace
  66. yield match.start(1), token, match.group(1)
  67. yield match.start(2), Punctuation, match.group(2)
  68. def end_id_callback(self, match):
  69. if match.group(1) in self.alphanumid_reserved:
  70. token = Error
  71. elif match.group(1) in self.symbolicid_reserved:
  72. token = Error
  73. else:
  74. token = Name
  75. yield match.start(1), token, match.group(1)
  76. def id_callback(self, match):
  77. str = match.group(1)
  78. if str in self.alphanumid_reserved:
  79. token = Keyword.Reserved
  80. elif str in self.symbolicid_reserved:
  81. token = Punctuation
  82. else:
  83. token = Name
  84. yield match.start(1), token, str
  85. tokens = {
  86. # Whitespace and comments are (almost) everywhere
  87. 'whitespace': [
  88. (r'\s+', Text),
  89. (r'\(\*', Comment.Multiline, 'comment'),
  90. ],
  91. 'delimiters': [
  92. # This lexer treats these delimiters specially:
  93. # Delimiters define scopes, and the scope is how the meaning of
  94. # the `|' is resolved - is it a case/handle expression, or function
  95. # definition by cases? (This is not how the Definition works, but
  96. # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
  97. (r'\(|\[|\{', Punctuation, 'main'),
  98. (r'\)|\]|\}', Punctuation, '#pop'),
  99. (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
  100. (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
  101. (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
  102. ],
  103. 'core': [
  104. # Punctuation that doesn't overlap symbolic identifiers
  105. (r'({})'.format('|'.join(re.escape(z) for z in nonid_reserved)),
  106. Punctuation),
  107. # Special constants: strings, floats, numbers in decimal and hex
  108. (r'#"', String.Char, 'char'),
  109. (r'"', String.Double, 'string'),
  110. (r'~?0x[0-9a-fA-F]+', Number.Hex),
  111. (r'0wx[0-9a-fA-F]+', Number.Hex),
  112. (r'0w\d+', Number.Integer),
  113. (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
  114. (r'~?\d+\.\d+', Number.Float),
  115. (r'~?\d+[eE]~?\d+', Number.Float),
  116. (r'~?\d+', Number.Integer),
  117. # Labels
  118. (r'#\s*[1-9][0-9]*', Name.Label),
  119. (rf'#\s*({alphanumid_re})', Name.Label),
  120. (rf'#\s+({symbolicid_re})', Name.Label),
  121. # Some reserved words trigger a special, local lexer state change
  122. (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
  123. (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'),
  124. (r'\b(functor|include|open|signature|structure)\b(?!\')',
  125. Keyword.Reserved, 'sname'),
  126. (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
  127. # Regular identifiers, long and otherwise
  128. (r'\'[\w\']*', Name.Decorator),
  129. (rf'({alphanumid_re})(\.)', long_id_callback, "dotted"),
  130. (rf'({alphanumid_re})', id_callback),
  131. (rf'({symbolicid_re})', id_callback),
  132. ],
  133. 'dotted': [
  134. (rf'({alphanumid_re})(\.)', long_id_callback),
  135. (rf'({alphanumid_re})', end_id_callback, "#pop"),
  136. (rf'({symbolicid_re})', end_id_callback, "#pop"),
  137. (r'\s+', Error),
  138. (r'\S+', Error),
  139. ],
  140. # Main parser (prevents errors in files that have scoping errors)
  141. 'root': [
  142. default('main')
  143. ],
  144. # In this scope, I expect '|' to not be followed by a function name,
  145. # and I expect 'and' to be followed by a binding site
  146. 'main': [
  147. include('whitespace'),
  148. # Special behavior of val/and/fun
  149. (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
  150. (r'\b(fun)\b(?!\')', Keyword.Reserved,
  151. ('#pop', 'main-fun', 'fname')),
  152. include('delimiters'),
  153. include('core'),
  154. (r'\S+', Error),
  155. ],
  156. # In this scope, I expect '|' and 'and' to be followed by a function
  157. 'main-fun': [
  158. include('whitespace'),
  159. (r'\s', Text),
  160. (r'\(\*', Comment.Multiline, 'comment'),
  161. # Special behavior of val/and/fun
  162. (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
  163. (r'\b(val)\b(?!\')', Keyword.Reserved,
  164. ('#pop', 'main', 'vname')),
  165. # Special behavior of '|' and '|'-manipulating keywords
  166. (r'\|', Punctuation, 'fname'),
  167. (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
  168. ('#pop', 'main')),
  169. include('delimiters'),
  170. include('core'),
  171. (r'\S+', Error),
  172. ],
  173. # Character and string parsers
  174. 'char': stringy(String.Char),
  175. 'string': stringy(String.Double),
  176. 'breakout': [
  177. (r'(?=\b({})\b(?!\'))'.format('|'.join(alphanumid_reserved)), Text, '#pop'),
  178. ],
  179. # Dealing with what comes after module system keywords
  180. 'sname': [
  181. include('whitespace'),
  182. include('breakout'),
  183. (rf'({alphanumid_re})', Name.Namespace),
  184. default('#pop'),
  185. ],
  186. # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
  187. 'fname': [
  188. include('whitespace'),
  189. (r'\'[\w\']*', Name.Decorator),
  190. (r'\(', Punctuation, 'tyvarseq'),
  191. (rf'({alphanumid_re})', Name.Function, '#pop'),
  192. (rf'({symbolicid_re})', Name.Function, '#pop'),
  193. # Ignore interesting function declarations like "fun (x + y) = ..."
  194. default('#pop'),
  195. ],
  196. # Dealing with what comes after the 'val' (or 'and') keyword
  197. 'vname': [
  198. include('whitespace'),
  199. (r'\'[\w\']*', Name.Decorator),
  200. (r'\(', Punctuation, 'tyvarseq'),
  201. (rf'({alphanumid_re})(\s*)(=(?!{symbolicid_re}))',
  202. bygroups(Name.Variable, Text, Punctuation), '#pop'),
  203. (rf'({symbolicid_re})(\s*)(=(?!{symbolicid_re}))',
  204. bygroups(Name.Variable, Text, Punctuation), '#pop'),
  205. (rf'({alphanumid_re})', Name.Variable, '#pop'),
  206. (rf'({symbolicid_re})', Name.Variable, '#pop'),
  207. # Ignore interesting patterns like 'val (x, y)'
  208. default('#pop'),
  209. ],
  210. # Dealing with what comes after the 'type' (or 'and') keyword
  211. 'tname': [
  212. include('whitespace'),
  213. include('breakout'),
  214. (r'\'[\w\']*', Name.Decorator),
  215. (r'\(', Punctuation, 'tyvarseq'),
  216. (rf'=(?!{symbolicid_re})', Punctuation, ('#pop', 'typbind')),
  217. (rf'({alphanumid_re})', Keyword.Type),
  218. (rf'({symbolicid_re})', Keyword.Type),
  219. (r'\S+', Error, '#pop'),
  220. ],
  221. # A type binding includes most identifiers
  222. 'typbind': [
  223. include('whitespace'),
  224. (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
  225. include('breakout'),
  226. include('core'),
  227. (r'\S+', Error, '#pop'),
  228. ],
  229. # Dealing with what comes after the 'datatype' (or 'and') keyword
  230. 'dname': [
  231. include('whitespace'),
  232. include('breakout'),
  233. (r'\'[\w\']*', Name.Decorator),
  234. (r'\(', Punctuation, 'tyvarseq'),
  235. (r'(=)(\s*)(datatype)',
  236. bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
  237. (rf'=(?!{symbolicid_re})', Punctuation,
  238. ('#pop', 'datbind', 'datcon')),
  239. (rf'({alphanumid_re})', Keyword.Type),
  240. (rf'({symbolicid_re})', Keyword.Type),
  241. (r'\S+', Error, '#pop'),
  242. ],
  243. # common case - A | B | C of int
  244. 'datbind': [
  245. include('whitespace'),
  246. (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
  247. (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
  248. (r'\b(of)\b(?!\')', Keyword.Reserved),
  249. (rf'(\|)(\s*)({alphanumid_re})',
  250. bygroups(Punctuation, Text, Name.Class)),
  251. (rf'(\|)(\s+)({symbolicid_re})',
  252. bygroups(Punctuation, Text, Name.Class)),
  253. include('breakout'),
  254. include('core'),
  255. (r'\S+', Error),
  256. ],
  257. # Dealing with what comes after an exception
  258. 'ename': [
  259. include('whitespace'),
  260. (rf'(and\b)(\s+)({alphanumid_re})',
  261. bygroups(Keyword.Reserved, Text, Name.Class)),
  262. (rf'(and\b)(\s*)({symbolicid_re})',
  263. bygroups(Keyword.Reserved, Text, Name.Class)),
  264. (r'\b(of)\b(?!\')', Keyword.Reserved),
  265. (rf'({alphanumid_re})|({symbolicid_re})', Name.Class),
  266. default('#pop'),
  267. ],
  268. 'datcon': [
  269. include('whitespace'),
  270. (rf'({alphanumid_re})', Name.Class, '#pop'),
  271. (rf'({symbolicid_re})', Name.Class, '#pop'),
  272. (r'\S+', Error, '#pop'),
  273. ],
  274. # Series of type variables
  275. 'tyvarseq': [
  276. (r'\s', Text),
  277. (r'\(\*', Comment.Multiline, 'comment'),
  278. (r'\'[\w\']*', Name.Decorator),
  279. (alphanumid_re, Name),
  280. (r',', Punctuation),
  281. (r'\)', Punctuation, '#pop'),
  282. (symbolicid_re, Name),
  283. ],
  284. 'comment': [
  285. (r'[^(*)]', Comment.Multiline),
  286. (r'\(\*', Comment.Multiline, '#push'),
  287. (r'\*\)', Comment.Multiline, '#pop'),
  288. (r'[(*)]', Comment.Multiline),
  289. ],
  290. }
  291. class OcamlLexer(RegexLexer):
  292. """
  293. For the OCaml language.
  294. """
  295. name = 'OCaml'
  296. url = 'https://ocaml.org/'
  297. aliases = ['ocaml']
  298. filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
  299. mimetypes = ['text/x-ocaml']
  300. version_added = '0.7'
  301. keywords = (
  302. 'and', 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
  303. 'downto', 'else', 'end', 'exception', 'external', 'false',
  304. 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
  305. 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
  306. 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
  307. 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
  308. 'type', 'val', 'virtual', 'when', 'while', 'with',
  309. )
  310. keyopts = (
  311. '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
  312. r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
  313. '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
  314. r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~'
  315. )
  316. operators = r'[!$%&*+\./:<=>?@^|~-]'
  317. word_operators = ('asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')
  318. prefix_syms = r'[!?~]'
  319. infix_syms = r'[=<>@^|&+\*/$%-]'
  320. primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
  321. tokens = {
  322. 'escape-sequence': [
  323. (r'\\[\\"\'ntbr]', String.Escape),
  324. (r'\\[0-9]{3}', String.Escape),
  325. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  326. ],
  327. 'root': [
  328. (r'\s+', Text),
  329. (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
  330. (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
  331. (r'\b([A-Z][\w\']*)', Name.Class),
  332. (r'\(\*(?![)])', Comment, 'comment'),
  333. (r'\b({})\b'.format('|'.join(keywords)), Keyword),
  334. (r'({})'.format('|'.join(keyopts[::-1])), Operator),
  335. (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),
  336. (r'\b({})\b'.format('|'.join(word_operators)), Operator.Word),
  337. (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),
  338. (r"[^\W\d][\w']*", Name),
  339. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  340. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  341. (r'0[oO][0-7][0-7_]*', Number.Oct),
  342. (r'0[bB][01][01_]*', Number.Bin),
  343. (r'\d[\d_]*', Number.Integer),
  344. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  345. String.Char),
  346. (r"'.'", String.Char),
  347. (r"'", Keyword), # a stray quote is another syntax element
  348. (r'"', String.Double, 'string'),
  349. (r'[~?][a-z][\w\']*:', Name.Variable),
  350. ],
  351. 'comment': [
  352. (r'[^(*)]+', Comment),
  353. (r'\(\*', Comment, '#push'),
  354. (r'\*\)', Comment, '#pop'),
  355. (r'[(*)]', Comment),
  356. ],
  357. 'string': [
  358. (r'[^\\"]+', String.Double),
  359. include('escape-sequence'),
  360. (r'\\\n', String.Double),
  361. (r'"', String.Double, '#pop'),
  362. ],
  363. 'dotted': [
  364. (r'\s+', Text),
  365. (r'\.', Punctuation),
  366. (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
  367. (r'[A-Z][\w\']*', Name.Class, '#pop'),
  368. (r'[a-z_][\w\']*', Name, '#pop'),
  369. default('#pop'),
  370. ],
  371. }
  372. class OpaLexer(RegexLexer):
  373. """
  374. Lexer for the Opa language.
  375. """
  376. name = 'Opa'
  377. aliases = ['opa']
  378. filenames = ['*.opa']
  379. mimetypes = ['text/x-opa']
  380. url = 'http://opalang.org'
  381. version_added = '1.5'
  382. # most of these aren't strictly keywords
  383. # but if you color only real keywords, you might just
  384. # as well not color anything
  385. keywords = (
  386. 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do',
  387. 'else', 'end', 'external', 'forall', 'function', 'if', 'import',
  388. 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then',
  389. 'type', 'val', 'with', 'xml_parser',
  390. )
  391. # matches both stuff and `stuff`
  392. ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
  393. op_re = r'[.=\-<>,@~%/+?*&^!]'
  394. punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere
  395. # because they are also used for inserts
  396. tokens = {
  397. # copied from the caml lexer, should be adapted
  398. 'escape-sequence': [
  399. (r'\\[\\"\'ntr}]', String.Escape),
  400. (r'\\[0-9]{3}', String.Escape),
  401. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  402. ],
  403. # factorizing these rules, because they are inserted many times
  404. 'comments': [
  405. (r'/\*', Comment, 'nested-comment'),
  406. (r'//.*?$', Comment),
  407. ],
  408. 'comments-and-spaces': [
  409. include('comments'),
  410. (r'\s+', Text),
  411. ],
  412. 'root': [
  413. include('comments-and-spaces'),
  414. # keywords
  415. (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
  416. # directives
  417. # we could parse the actual set of directives instead of anything
  418. # starting with @, but this is troublesome
  419. # because it needs to be adjusted all the time
  420. # and assuming we parse only sources that compile, it is useless
  421. (r'@' + ident_re + r'\b', Name.Builtin.Pseudo),
  422. # number literals
  423. (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),
  424. (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),
  425. (r'-?\d+[eE][+\-]?\d+', Number.Float),
  426. (r'0[xX][\da-fA-F]+', Number.Hex),
  427. (r'0[oO][0-7]+', Number.Oct),
  428. (r'0[bB][01]+', Number.Bin),
  429. (r'\d+', Number.Integer),
  430. # color literals
  431. (r'#[\da-fA-F]{3,6}', Number.Integer),
  432. # string literals
  433. (r'"', String.Double, 'string'),
  434. # char literal, should be checked because this is the regexp from
  435. # the caml lexer
  436. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
  437. String.Char),
  438. # this is meant to deal with embedded exprs in strings
  439. # every time we find a '}' we pop a state so that if we were
  440. # inside a string, we are back in the string state
  441. # as a consequence, we must also push a state every time we find a
  442. # '{' or else we will have errors when parsing {} for instance
  443. (r'\{', Operator, '#push'),
  444. (r'\}', Operator, '#pop'),
  445. # html literals
  446. # this is a much more strict that the actual parser,
  447. # since a<b would not be parsed as html
  448. # but then again, the parser is way too lax, and we can't hope
  449. # to have something as tolerant
  450. (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
  451. # db path
  452. # matching the '[_]' in '/a[_]' because it is a part
  453. # of the syntax of the db path definition
  454. # unfortunately, i don't know how to match the ']' in
  455. # /a[1], so this is somewhat inconsistent
  456. (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),
  457. # putting the same color on <- as on db path, since
  458. # it can be used only to mean Db.write
  459. (r'<-(?!'+op_re+r')', Name.Variable),
  460. # 'modules'
  461. # although modules are not distinguished by their names as in caml
  462. # the standard library seems to follow the convention that modules
  463. # only area capitalized
  464. (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
  465. # operators
  466. # = has a special role because this is the only
  467. # way to syntactic distinguish binding constructions
  468. # unfortunately, this colors the equal in {x=2} too
  469. (r'=(?!'+op_re+r')', Keyword),
  470. (rf'({op_re})+', Operator),
  471. (rf'({punc_re})+', Operator),
  472. # coercions
  473. (r':', Operator, 'type'),
  474. # type variables
  475. # we need this rule because we don't parse specially type
  476. # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
  477. ("'"+ident_re, Keyword.Type),
  478. # id literal, #something, or #{expr}
  479. (r'#'+ident_re, String.Single),
  480. (r'#(?=\{)', String.Single),
  481. # identifiers
  482. # this avoids to color '2' in 'a2' as an integer
  483. (ident_re, Text),
  484. # default, not sure if that is needed or not
  485. # (r'.', Text),
  486. ],
  487. # it is quite painful to have to parse types to know where they end
  488. # this is the general rule for a type
  489. # a type is either:
  490. # * -> ty
  491. # * type-with-slash
  492. # * type-with-slash -> ty
  493. # * type-with-slash (, type-with-slash)+ -> ty
  494. #
  495. # the code is pretty funky in here, but this code would roughly
  496. # translate in caml to:
  497. # let rec type stream =
  498. # match stream with
  499. # | [< "->"; stream >] -> type stream
  500. # | [< ""; stream >] ->
  501. # type_with_slash stream
  502. # type_lhs_1 stream;
  503. # and type_1 stream = ...
  504. 'type': [
  505. include('comments-and-spaces'),
  506. (r'->', Keyword.Type),
  507. default(('#pop', 'type-lhs-1', 'type-with-slash')),
  508. ],
  509. # parses all the atomic or closed constructions in the syntax of type
  510. # expressions: record types, tuple types, type constructors, basic type
  511. # and type variables
  512. 'type-1': [
  513. include('comments-and-spaces'),
  514. (r'\(', Keyword.Type, ('#pop', 'type-tuple')),
  515. (r'~?\{', Keyword.Type, ('#pop', 'type-record')),
  516. (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
  517. (ident_re, Keyword.Type, '#pop'),
  518. ("'"+ident_re, Keyword.Type),
  519. # this case is not in the syntax but sometimes
  520. # we think we are parsing types when in fact we are parsing
  521. # some css, so we just pop the states until we get back into
  522. # the root state
  523. default('#pop'),
  524. ],
  525. # type-with-slash is either:
  526. # * type-1
  527. # * type-1 (/ type-1)+
  528. 'type-with-slash': [
  529. include('comments-and-spaces'),
  530. default(('#pop', 'slash-type-1', 'type-1')),
  531. ],
  532. 'slash-type-1': [
  533. include('comments-and-spaces'),
  534. ('/', Keyword.Type, ('#pop', 'type-1')),
  535. # same remark as above
  536. default('#pop'),
  537. ],
  538. # we go in this state after having parsed a type-with-slash
  539. # while trying to parse a type
  540. # and at this point we must determine if we are parsing an arrow
  541. # type (in which case we must continue parsing) or not (in which
  542. # case we stop)
  543. 'type-lhs-1': [
  544. include('comments-and-spaces'),
  545. (r'->', Keyword.Type, ('#pop', 'type')),
  546. (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
  547. default('#pop'),
  548. ],
  549. 'type-arrow': [
  550. include('comments-and-spaces'),
  551. # the look ahead here allows to parse f(x : int, y : float -> truc)
  552. # correctly
  553. (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
  554. (r'->', Keyword.Type, ('#pop', 'type')),
  555. # same remark as above
  556. default('#pop'),
  557. ],
  558. # no need to do precise parsing for tuples and records
  559. # because they are closed constructions, so we can simply
  560. # find the closing delimiter
  561. # note that this function would be not work if the source
  562. # contained identifiers like `{)` (although it could be patched
  563. # to support it)
  564. 'type-tuple': [
  565. include('comments-and-spaces'),
  566. (r'[^()/*]+', Keyword.Type),
  567. (r'[/*]', Keyword.Type),
  568. (r'\(', Keyword.Type, '#push'),
  569. (r'\)', Keyword.Type, '#pop'),
  570. ],
  571. 'type-record': [
  572. include('comments-and-spaces'),
  573. (r'[^{}/*]+', Keyword.Type),
  574. (r'[/*]', Keyword.Type),
  575. (r'\{', Keyword.Type, '#push'),
  576. (r'\}', Keyword.Type, '#pop'),
  577. ],
  578. # 'type-tuple': [
  579. # include('comments-and-spaces'),
  580. # (r'\)', Keyword.Type, '#pop'),
  581. # default(('#pop', 'type-tuple-1', 'type-1')),
  582. # ],
  583. # 'type-tuple-1': [
  584. # include('comments-and-spaces'),
  585. # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)
  586. # (r',', Keyword.Type, 'type-1'),
  587. # ],
  588. # 'type-record':[
  589. # include('comments-and-spaces'),
  590. # (r'\}', Keyword.Type, '#pop'),
  591. # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),
  592. # ],
  593. # 'type-record-field-expr': [
  594. #
  595. # ],
  596. 'nested-comment': [
  597. (r'[^/*]+', Comment),
  598. (r'/\*', Comment, '#push'),
  599. (r'\*/', Comment, '#pop'),
  600. (r'[/*]', Comment),
  601. ],
  602. # the copy pasting between string and single-string
  603. # is kinda sad. Is there a way to avoid that??
  604. 'string': [
  605. (r'[^\\"{]+', String.Double),
  606. (r'"', String.Double, '#pop'),
  607. (r'\{', Operator, 'root'),
  608. include('escape-sequence'),
  609. ],
  610. 'single-string': [
  611. (r'[^\\\'{]+', String.Double),
  612. (r'\'', String.Double, '#pop'),
  613. (r'\{', Operator, 'root'),
  614. include('escape-sequence'),
  615. ],
  616. # all the html stuff
  617. # can't really reuse some existing html parser
  618. # because we must be able to parse embedded expressions
  619. # we are in this state after someone parsed the '<' that
  620. # started the html literal
  621. 'html-open-tag': [
  622. (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
  623. (r'>', String.Single, ('#pop', 'html-content')),
  624. ],
  625. # we are in this state after someone parsed the '</' that
  626. # started the end of the closing tag
  627. 'html-end-tag': [
  628. # this is a star, because </> is allowed
  629. (r'[\w\-:]*>', String.Single, '#pop'),
  630. ],
  631. # we are in this state after having parsed '<ident(:ident)?'
  632. # we thus parse a possibly empty list of attributes
  633. 'html-attr': [
  634. (r'\s+', Text),
  635. (r'[\w\-:]+=', String.Single, 'html-attr-value'),
  636. (r'/>', String.Single, '#pop'),
  637. (r'>', String.Single, ('#pop', 'html-content')),
  638. ],
  639. 'html-attr-value': [
  640. (r"'", String.Single, ('#pop', 'single-string')),
  641. (r'"', String.Single, ('#pop', 'string')),
  642. (r'#'+ident_re, String.Single, '#pop'),
  643. (r'#(?=\{)', String.Single, ('#pop', 'root')),
  644. (r'[^"\'{`=<>]+', String.Single, '#pop'),
  645. (r'\{', Operator, ('#pop', 'root')), # this is a tail call!
  646. ],
  647. # we should probably deal with '\' escapes here
  648. 'html-content': [
  649. (r'<!--', Comment, 'html-comment'),
  650. (r'</', String.Single, ('#pop', 'html-end-tag')),
  651. (r'<', String.Single, 'html-open-tag'),
  652. (r'\{', Operator, 'root'),
  653. (r'[^<{]+', String.Single),
  654. ],
  655. 'html-comment': [
  656. (r'-->', Comment, '#pop'),
  657. (r'[^\-]+|-', Comment),
  658. ],
  659. }
  660. class ReasonLexer(RegexLexer):
  661. """
  662. For the ReasonML language.
  663. """
  664. name = 'ReasonML'
  665. url = 'https://reasonml.github.io/'
  666. aliases = ['reasonml', 'reason']
  667. filenames = ['*.re', '*.rei']
  668. mimetypes = ['text/x-reasonml']
  669. version_added = '2.6'
  670. keywords = (
  671. 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto',
  672. 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun',
  673. 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy',
  674. 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of',
  675. 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
  676. 'type', 'val', 'virtual', 'when', 'while', 'with',
  677. )
  678. keyopts = (
  679. '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
  680. r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
  681. '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
  682. r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~'
  683. )
  684. operators = r'[!$%&*+\./:<=>?@^|~-]'
  685. word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or')
  686. prefix_syms = r'[!?~]'
  687. infix_syms = r'[=<>@^|&+\*/$%-]'
  688. primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
  689. tokens = {
  690. 'escape-sequence': [
  691. (r'\\[\\"\'ntbr]', String.Escape),
  692. (r'\\[0-9]{3}', String.Escape),
  693. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  694. ],
  695. 'root': [
  696. (r'\s+', Text),
  697. (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
  698. (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
  699. (r'\b([A-Z][\w\']*)', Name.Class),
  700. (r'//.*?\n', Comment.Single),
  701. (r'\/\*(?!/)', Comment.Multiline, 'comment'),
  702. (r'\b({})\b'.format('|'.join(keywords)), Keyword),
  703. (r'({})'.format('|'.join(keyopts[::-1])), Operator.Word),
  704. (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),
  705. (r'\b({})\b'.format('|'.join(word_operators)), Operator.Word),
  706. (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),
  707. (r"[^\W\d][\w']*", Name),
  708. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  709. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  710. (r'0[oO][0-7][0-7_]*', Number.Oct),
  711. (r'0[bB][01][01_]*', Number.Bin),
  712. (r'\d[\d_]*', Number.Integer),
  713. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  714. String.Char),
  715. (r"'.'", String.Char),
  716. (r"'", Keyword),
  717. (r'"', String.Double, 'string'),
  718. (r'[~?][a-z][\w\']*:', Name.Variable),
  719. ],
  720. 'comment': [
  721. (r'[^/*]+', Comment.Multiline),
  722. (r'\/\*', Comment.Multiline, '#push'),
  723. (r'\*\/', Comment.Multiline, '#pop'),
  724. (r'\*', Comment.Multiline),
  725. ],
  726. 'string': [
  727. (r'[^\\"]+', String.Double),
  728. include('escape-sequence'),
  729. (r'\\\n', String.Double),
  730. (r'"', String.Double, '#pop'),
  731. ],
  732. 'dotted': [
  733. (r'\s+', Text),
  734. (r'\.', Punctuation),
  735. (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
  736. (r'[A-Z][\w\']*', Name.Class, '#pop'),
  737. (r'[a-z_][\w\']*', Name, '#pop'),
  738. default('#pop'),
  739. ],
  740. }
  741. class FStarLexer(RegexLexer):
  742. """
  743. For the F* language.
  744. """
  745. name = 'FStar'
  746. url = 'https://www.fstar-lang.org/'
  747. aliases = ['fstar']
  748. filenames = ['*.fst', '*.fsti']
  749. mimetypes = ['text/x-fstar']
  750. version_added = '2.7'
  751. keywords = (
  752. 'abstract', 'attributes', 'noeq', 'unopteq', 'and'
  753. 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures',
  754. 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if',
  755. 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible',
  756. 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract',
  757. 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable',
  758. 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect',
  759. 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable',
  760. 'val', 'when', 'with', 'not'
  761. )
  762. decl_keywords = ('let', 'rec')
  763. assume_keywords = ('assume', 'admit', 'assert', 'calc')
  764. keyopts = (
  765. r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'\(\|', r'\|\)', r'#', r'u#',
  766. r'&', r'\(', r'\)', r'\(\)', r',', r'~>', r'->', r'<-', r'<--', r'<==>',
  767. r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|',
  768. r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{',
  769. r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$'
  770. )
  771. operators = r'[!$%&*+\./:<=>?@^|~-]'
  772. prefix_syms = r'[!?~]'
  773. infix_syms = r'[=<>@^|&+\*/$%-]'
  774. primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
  775. tokens = {
  776. 'escape-sequence': [
  777. (r'\\[\\"\'ntbr]', String.Escape),
  778. (r'\\[0-9]{3}', String.Escape),
  779. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  780. ],
  781. 'root': [
  782. (r'\s+', Text),
  783. (r'false|true|False|True|\(\)|\[\]', Name.Builtin.Pseudo),
  784. (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
  785. (r'\b([A-Z][\w\']*)', Name.Class),
  786. (r'\(\*(?![)])', Comment, 'comment'),
  787. (r'\/\/.+$', Comment),
  788. (r'\b({})\b'.format('|'.join(keywords)), Keyword),
  789. (r'\b({})\b'.format('|'.join(assume_keywords)), Name.Exception),
  790. (r'\b({})\b'.format('|'.join(decl_keywords)), Keyword.Declaration),
  791. (r'({})'.format('|'.join(keyopts[::-1])), Operator),
  792. (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),
  793. (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),
  794. (r"[^\W\d][\w']*", Name),
  795. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  796. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  797. (r'0[oO][0-7][0-7_]*', Number.Oct),
  798. (r'0[bB][01][01_]*', Number.Bin),
  799. (r'\d[\d_]*', Number.Integer),
  800. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  801. String.Char),
  802. (r"'.'", String.Char),
  803. (r"'", Keyword), # a stray quote is another syntax element
  804. (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications
  805. (r"\`", Keyword), # for quoting
  806. (r'"', String.Double, 'string'),
  807. (r'[~?][a-z][\w\']*:', Name.Variable),
  808. ],
  809. 'comment': [
  810. (r'[^(*)]+', Comment),
  811. (r'\(\*', Comment, '#push'),
  812. (r'\*\)', Comment, '#pop'),
  813. (r'[(*)]', Comment),
  814. ],
  815. 'string': [
  816. (r'[^\\"]+', String.Double),
  817. include('escape-sequence'),
  818. (r'\\\n', String.Double),
  819. (r'"', String.Double, '#pop'),
  820. ],
  821. 'dotted': [
  822. (r'\s+', Text),
  823. (r'\.', Punctuation),
  824. (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
  825. (r'[A-Z][\w\']*', Name.Class, '#pop'),
  826. (r'[a-z_][\w\']*', Name, '#pop'),
  827. default('#pop'),
  828. ],
  829. }