ml.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960
  1. """
  2. pygments.lexers.ml
  3. ~~~~~~~~~~~~~~~~~~
  4. Lexers for ML family languages.
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, bygroups, default, words
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  11. Number, Punctuation, Error
  12. __all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer']
  13. class SMLLexer(RegexLexer):
  14. """
  15. For the Standard ML language.
  16. .. versionadded:: 1.5
  17. """
  18. name = 'Standard ML'
  19. aliases = ['sml']
  20. filenames = ['*.sml', '*.sig', '*.fun']
  21. mimetypes = ['text/x-standardml', 'application/x-standardml']
  22. alphanumid_reserved = {
  23. # Core
  24. 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
  25. 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
  26. 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
  27. 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
  28. # Modules
  29. 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
  30. 'struct', 'structure', 'where',
  31. }
  32. symbolicid_reserved = {
  33. # Core
  34. ':', r'\|', '=', '=>', '->', '#',
  35. # Modules
  36. ':>',
  37. }
  38. nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'}
  39. alphanumid_re = r"[a-zA-Z][\w']*"
  40. symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
  41. # A character constant is a sequence of the form #s, where s is a string
  42. # constant denoting a string of size one character. This setup just parses
  43. # the entire string as either a String.Double or a String.Char (depending
  44. # on the argument), even if the String.Char is an erroneous
  45. # multiple-character string.
  46. def stringy(whatkind):
  47. return [
  48. (r'[^"\\]', whatkind),
  49. (r'\\[\\"abtnvfr]', String.Escape),
  50. # Control-character notation is used for codes < 32,
  51. # where \^@ == \000
  52. (r'\\\^[\x40-\x5e]', String.Escape),
  53. # Docs say 'decimal digits'
  54. (r'\\[0-9]{3}', String.Escape),
  55. (r'\\u[0-9a-fA-F]{4}', String.Escape),
  56. (r'\\\s+\\', String.Interpol),
  57. (r'"', whatkind, '#pop'),
  58. ]
  59. # Callbacks for distinguishing tokens and reserved words
  60. def long_id_callback(self, match):
  61. if match.group(1) in self.alphanumid_reserved:
  62. token = Error
  63. else:
  64. token = Name.Namespace
  65. yield match.start(1), token, match.group(1)
  66. yield match.start(2), Punctuation, match.group(2)
  67. def end_id_callback(self, match):
  68. if match.group(1) in self.alphanumid_reserved:
  69. token = Error
  70. elif match.group(1) in self.symbolicid_reserved:
  71. token = Error
  72. else:
  73. token = Name
  74. yield match.start(1), token, match.group(1)
  75. def id_callback(self, match):
  76. str = match.group(1)
  77. if str in self.alphanumid_reserved:
  78. token = Keyword.Reserved
  79. elif str in self.symbolicid_reserved:
  80. token = Punctuation
  81. else:
  82. token = Name
  83. yield match.start(1), token, str
  84. tokens = {
  85. # Whitespace and comments are (almost) everywhere
  86. 'whitespace': [
  87. (r'\s+', Text),
  88. (r'\(\*', Comment.Multiline, 'comment'),
  89. ],
  90. 'delimiters': [
  91. # This lexer treats these delimiters specially:
  92. # Delimiters define scopes, and the scope is how the meaning of
  93. # the `|' is resolved - is it a case/handle expression, or function
  94. # definition by cases? (This is not how the Definition works, but
  95. # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
  96. (r'\(|\[|\{', Punctuation, 'main'),
  97. (r'\)|\]|\}', Punctuation, '#pop'),
  98. (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
  99. (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
  100. (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
  101. ],
  102. 'core': [
  103. # Punctuation that doesn't overlap symbolic identifiers
  104. (r'(%s)' % '|'.join(re.escape(z) for z in nonid_reserved),
  105. Punctuation),
  106. # Special constants: strings, floats, numbers in decimal and hex
  107. (r'#"', String.Char, 'char'),
  108. (r'"', String.Double, 'string'),
  109. (r'~?0x[0-9a-fA-F]+', Number.Hex),
  110. (r'0wx[0-9a-fA-F]+', Number.Hex),
  111. (r'0w\d+', Number.Integer),
  112. (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
  113. (r'~?\d+\.\d+', Number.Float),
  114. (r'~?\d+[eE]~?\d+', Number.Float),
  115. (r'~?\d+', Number.Integer),
  116. # Labels
  117. (r'#\s*[1-9][0-9]*', Name.Label),
  118. (r'#\s*(%s)' % alphanumid_re, Name.Label),
  119. (r'#\s+(%s)' % symbolicid_re, Name.Label),
  120. # Some reserved words trigger a special, local lexer state change
  121. (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
  122. (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'),
  123. (r'\b(functor|include|open|signature|structure)\b(?!\')',
  124. Keyword.Reserved, 'sname'),
  125. (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
  126. # Regular identifiers, long and otherwise
  127. (r'\'[\w\']*', Name.Decorator),
  128. (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),
  129. (r'(%s)' % alphanumid_re, id_callback),
  130. (r'(%s)' % symbolicid_re, id_callback),
  131. ],
  132. 'dotted': [
  133. (r'(%s)(\.)' % alphanumid_re, long_id_callback),
  134. (r'(%s)' % alphanumid_re, end_id_callback, "#pop"),
  135. (r'(%s)' % symbolicid_re, end_id_callback, "#pop"),
  136. (r'\s+', Error),
  137. (r'\S+', Error),
  138. ],
  139. # Main parser (prevents errors in files that have scoping errors)
  140. 'root': [
  141. default('main')
  142. ],
  143. # In this scope, I expect '|' to not be followed by a function name,
  144. # and I expect 'and' to be followed by a binding site
  145. 'main': [
  146. include('whitespace'),
  147. # Special behavior of val/and/fun
  148. (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
  149. (r'\b(fun)\b(?!\')', Keyword.Reserved,
  150. ('#pop', 'main-fun', 'fname')),
  151. include('delimiters'),
  152. include('core'),
  153. (r'\S+', Error),
  154. ],
  155. # In this scope, I expect '|' and 'and' to be followed by a function
  156. 'main-fun': [
  157. include('whitespace'),
  158. (r'\s', Text),
  159. (r'\(\*', Comment.Multiline, 'comment'),
  160. # Special behavior of val/and/fun
  161. (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
  162. (r'\b(val)\b(?!\')', Keyword.Reserved,
  163. ('#pop', 'main', 'vname')),
  164. # Special behavior of '|' and '|'-manipulating keywords
  165. (r'\|', Punctuation, 'fname'),
  166. (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
  167. ('#pop', 'main')),
  168. include('delimiters'),
  169. include('core'),
  170. (r'\S+', Error),
  171. ],
  172. # Character and string parsers
  173. 'char': stringy(String.Char),
  174. 'string': stringy(String.Double),
  175. 'breakout': [
  176. (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),
  177. ],
  178. # Dealing with what comes after module system keywords
  179. 'sname': [
  180. include('whitespace'),
  181. include('breakout'),
  182. (r'(%s)' % alphanumid_re, Name.Namespace),
  183. default('#pop'),
  184. ],
  185. # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
  186. 'fname': [
  187. include('whitespace'),
  188. (r'\'[\w\']*', Name.Decorator),
  189. (r'\(', Punctuation, 'tyvarseq'),
  190. (r'(%s)' % alphanumid_re, Name.Function, '#pop'),
  191. (r'(%s)' % symbolicid_re, Name.Function, '#pop'),
  192. # Ignore interesting function declarations like "fun (x + y) = ..."
  193. default('#pop'),
  194. ],
  195. # Dealing with what comes after the 'val' (or 'and') keyword
  196. 'vname': [
  197. include('whitespace'),
  198. (r'\'[\w\']*', Name.Decorator),
  199. (r'\(', Punctuation, 'tyvarseq'),
  200. (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),
  201. bygroups(Name.Variable, Text, Punctuation), '#pop'),
  202. (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),
  203. bygroups(Name.Variable, Text, Punctuation), '#pop'),
  204. (r'(%s)' % alphanumid_re, Name.Variable, '#pop'),
  205. (r'(%s)' % symbolicid_re, Name.Variable, '#pop'),
  206. # Ignore interesting patterns like 'val (x, y)'
  207. default('#pop'),
  208. ],
  209. # Dealing with what comes after the 'type' (or 'and') keyword
  210. 'tname': [
  211. include('whitespace'),
  212. include('breakout'),
  213. (r'\'[\w\']*', Name.Decorator),
  214. (r'\(', Punctuation, 'tyvarseq'),
  215. (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),
  216. (r'(%s)' % alphanumid_re, Keyword.Type),
  217. (r'(%s)' % symbolicid_re, Keyword.Type),
  218. (r'\S+', Error, '#pop'),
  219. ],
  220. # A type binding includes most identifiers
  221. 'typbind': [
  222. include('whitespace'),
  223. (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
  224. include('breakout'),
  225. include('core'),
  226. (r'\S+', Error, '#pop'),
  227. ],
  228. # Dealing with what comes after the 'datatype' (or 'and') keyword
  229. 'dname': [
  230. include('whitespace'),
  231. include('breakout'),
  232. (r'\'[\w\']*', Name.Decorator),
  233. (r'\(', Punctuation, 'tyvarseq'),
  234. (r'(=)(\s*)(datatype)',
  235. bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
  236. (r'=(?!%s)' % symbolicid_re, Punctuation,
  237. ('#pop', 'datbind', 'datcon')),
  238. (r'(%s)' % alphanumid_re, Keyword.Type),
  239. (r'(%s)' % symbolicid_re, Keyword.Type),
  240. (r'\S+', Error, '#pop'),
  241. ],
  242. # common case - A | B | C of int
  243. 'datbind': [
  244. include('whitespace'),
  245. (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
  246. (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
  247. (r'\b(of)\b(?!\')', Keyword.Reserved),
  248. (r'(\|)(\s*)(%s)' % alphanumid_re,
  249. bygroups(Punctuation, Text, Name.Class)),
  250. (r'(\|)(\s+)(%s)' % symbolicid_re,
  251. bygroups(Punctuation, Text, Name.Class)),
  252. include('breakout'),
  253. include('core'),
  254. (r'\S+', Error),
  255. ],
  256. # Dealing with what comes after an exception
  257. 'ename': [
  258. include('whitespace'),
  259. (r'(and\b)(\s+)(%s)' % alphanumid_re,
  260. bygroups(Keyword.Reserved, Text, Name.Class)),
  261. (r'(and\b)(\s*)(%s)' % symbolicid_re,
  262. bygroups(Keyword.Reserved, Text, Name.Class)),
  263. (r'\b(of)\b(?!\')', Keyword.Reserved),
  264. (r'(%s)|(%s)' % (alphanumid_re, symbolicid_re), Name.Class),
  265. default('#pop'),
  266. ],
  267. 'datcon': [
  268. include('whitespace'),
  269. (r'(%s)' % alphanumid_re, Name.Class, '#pop'),
  270. (r'(%s)' % symbolicid_re, Name.Class, '#pop'),
  271. (r'\S+', Error, '#pop'),
  272. ],
  273. # Series of type variables
  274. 'tyvarseq': [
  275. (r'\s', Text),
  276. (r'\(\*', Comment.Multiline, 'comment'),
  277. (r'\'[\w\']*', Name.Decorator),
  278. (alphanumid_re, Name),
  279. (r',', Punctuation),
  280. (r'\)', Punctuation, '#pop'),
  281. (symbolicid_re, Name),
  282. ],
  283. 'comment': [
  284. (r'[^(*)]', Comment.Multiline),
  285. (r'\(\*', Comment.Multiline, '#push'),
  286. (r'\*\)', Comment.Multiline, '#pop'),
  287. (r'[(*)]', Comment.Multiline),
  288. ],
  289. }
  290. class OcamlLexer(RegexLexer):
  291. """
  292. For the OCaml language.
  293. .. versionadded:: 0.7
  294. """
  295. name = 'OCaml'
  296. url = 'https://ocaml.org/'
  297. aliases = ['ocaml']
  298. filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
  299. mimetypes = ['text/x-ocaml']
  300. keywords = (
  301. 'and', 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
  302. 'downto', 'else', 'end', 'exception', 'external', 'false',
  303. 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
  304. 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
  305. 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
  306. 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
  307. 'type', 'val', 'virtual', 'when', 'while', 'with',
  308. )
  309. keyopts = (
  310. '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
  311. r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
  312. '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
  313. r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~'
  314. )
  315. operators = r'[!$%&*+\./:<=>?@^|~-]'
  316. word_operators = ('asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')
  317. prefix_syms = r'[!?~]'
  318. infix_syms = r'[=<>@^|&+\*/$%-]'
  319. primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
  320. tokens = {
  321. 'escape-sequence': [
  322. (r'\\[\\"\'ntbr]', String.Escape),
  323. (r'\\[0-9]{3}', String.Escape),
  324. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  325. ],
  326. 'root': [
  327. (r'\s+', Text),
  328. (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
  329. (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
  330. (r'\b([A-Z][\w\']*)', Name.Class),
  331. (r'\(\*(?![)])', Comment, 'comment'),
  332. (r'\b(%s)\b' % '|'.join(keywords), Keyword),
  333. (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
  334. (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
  335. (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
  336. (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
  337. (r"[^\W\d][\w']*", Name),
  338. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  339. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  340. (r'0[oO][0-7][0-7_]*', Number.Oct),
  341. (r'0[bB][01][01_]*', Number.Bin),
  342. (r'\d[\d_]*', Number.Integer),
  343. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  344. String.Char),
  345. (r"'.'", String.Char),
  346. (r"'", Keyword), # a stray quote is another syntax element
  347. (r'"', String.Double, 'string'),
  348. (r'[~?][a-z][\w\']*:', Name.Variable),
  349. ],
  350. 'comment': [
  351. (r'[^(*)]+', Comment),
  352. (r'\(\*', Comment, '#push'),
  353. (r'\*\)', Comment, '#pop'),
  354. (r'[(*)]', Comment),
  355. ],
  356. 'string': [
  357. (r'[^\\"]+', String.Double),
  358. include('escape-sequence'),
  359. (r'\\\n', String.Double),
  360. (r'"', String.Double, '#pop'),
  361. ],
  362. 'dotted': [
  363. (r'\s+', Text),
  364. (r'\.', Punctuation),
  365. (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
  366. (r'[A-Z][\w\']*', Name.Class, '#pop'),
  367. (r'[a-z_][\w\']*', Name, '#pop'),
  368. default('#pop'),
  369. ],
  370. }
  371. class OpaLexer(RegexLexer):
  372. """
  373. Lexer for the Opa language.
  374. .. versionadded:: 1.5
  375. """
  376. name = 'Opa'
  377. aliases = ['opa']
  378. filenames = ['*.opa']
  379. mimetypes = ['text/x-opa']
  380. # most of these aren't strictly keywords
  381. # but if you color only real keywords, you might just
  382. # as well not color anything
  383. keywords = (
  384. 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do',
  385. 'else', 'end', 'external', 'forall', 'function', 'if', 'import',
  386. 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then',
  387. 'type', 'val', 'with', 'xml_parser',
  388. )
  389. # matches both stuff and `stuff`
  390. ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
  391. op_re = r'[.=\-<>,@~%/+?*&^!]'
  392. punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere
  393. # because they are also used for inserts
  394. tokens = {
  395. # copied from the caml lexer, should be adapted
  396. 'escape-sequence': [
  397. (r'\\[\\"\'ntr}]', String.Escape),
  398. (r'\\[0-9]{3}', String.Escape),
  399. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  400. ],
  401. # factorizing these rules, because they are inserted many times
  402. 'comments': [
  403. (r'/\*', Comment, 'nested-comment'),
  404. (r'//.*?$', Comment),
  405. ],
  406. 'comments-and-spaces': [
  407. include('comments'),
  408. (r'\s+', Text),
  409. ],
  410. 'root': [
  411. include('comments-and-spaces'),
  412. # keywords
  413. (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
  414. # directives
  415. # we could parse the actual set of directives instead of anything
  416. # starting with @, but this is troublesome
  417. # because it needs to be adjusted all the time
  418. # and assuming we parse only sources that compile, it is useless
  419. (r'@' + ident_re + r'\b', Name.Builtin.Pseudo),
  420. # number literals
  421. (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),
  422. (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),
  423. (r'-?\d+[eE][+\-]?\d+', Number.Float),
  424. (r'0[xX][\da-fA-F]+', Number.Hex),
  425. (r'0[oO][0-7]+', Number.Oct),
  426. (r'0[bB][01]+', Number.Bin),
  427. (r'\d+', Number.Integer),
  428. # color literals
  429. (r'#[\da-fA-F]{3,6}', Number.Integer),
  430. # string literals
  431. (r'"', String.Double, 'string'),
  432. # char literal, should be checked because this is the regexp from
  433. # the caml lexer
  434. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
  435. String.Char),
  436. # this is meant to deal with embedded exprs in strings
  437. # every time we find a '}' we pop a state so that if we were
  438. # inside a string, we are back in the string state
  439. # as a consequence, we must also push a state every time we find a
  440. # '{' or else we will have errors when parsing {} for instance
  441. (r'\{', Operator, '#push'),
  442. (r'\}', Operator, '#pop'),
  443. # html literals
  444. # this is a much more strict that the actual parser,
  445. # since a<b would not be parsed as html
  446. # but then again, the parser is way too lax, and we can't hope
  447. # to have something as tolerant
  448. (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
  449. # db path
  450. # matching the '[_]' in '/a[_]' because it is a part
  451. # of the syntax of the db path definition
  452. # unfortunately, i don't know how to match the ']' in
  453. # /a[1], so this is somewhat inconsistent
  454. (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),
  455. # putting the same color on <- as on db path, since
  456. # it can be used only to mean Db.write
  457. (r'<-(?!'+op_re+r')', Name.Variable),
  458. # 'modules'
  459. # although modules are not distinguished by their names as in caml
  460. # the standard library seems to follow the convention that modules
  461. # only area capitalized
  462. (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
  463. # operators
  464. # = has a special role because this is the only
  465. # way to syntactic distinguish binding constructions
  466. # unfortunately, this colors the equal in {x=2} too
  467. (r'=(?!'+op_re+r')', Keyword),
  468. (r'(%s)+' % op_re, Operator),
  469. (r'(%s)+' % punc_re, Operator),
  470. # coercions
  471. (r':', Operator, 'type'),
  472. # type variables
  473. # we need this rule because we don't parse specially type
  474. # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
  475. ("'"+ident_re, Keyword.Type),
  476. # id literal, #something, or #{expr}
  477. (r'#'+ident_re, String.Single),
  478. (r'#(?=\{)', String.Single),
  479. # identifiers
  480. # this avoids to color '2' in 'a2' as an integer
  481. (ident_re, Text),
  482. # default, not sure if that is needed or not
  483. # (r'.', Text),
  484. ],
  485. # it is quite painful to have to parse types to know where they end
  486. # this is the general rule for a type
  487. # a type is either:
  488. # * -> ty
  489. # * type-with-slash
  490. # * type-with-slash -> ty
  491. # * type-with-slash (, type-with-slash)+ -> ty
  492. #
  493. # the code is pretty funky in here, but this code would roughly
  494. # translate in caml to:
  495. # let rec type stream =
  496. # match stream with
  497. # | [< "->"; stream >] -> type stream
  498. # | [< ""; stream >] ->
  499. # type_with_slash stream
  500. # type_lhs_1 stream;
  501. # and type_1 stream = ...
  502. 'type': [
  503. include('comments-and-spaces'),
  504. (r'->', Keyword.Type),
  505. default(('#pop', 'type-lhs-1', 'type-with-slash')),
  506. ],
  507. # parses all the atomic or closed constructions in the syntax of type
  508. # expressions: record types, tuple types, type constructors, basic type
  509. # and type variables
  510. 'type-1': [
  511. include('comments-and-spaces'),
  512. (r'\(', Keyword.Type, ('#pop', 'type-tuple')),
  513. (r'~?\{', Keyword.Type, ('#pop', 'type-record')),
  514. (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
  515. (ident_re, Keyword.Type, '#pop'),
  516. ("'"+ident_re, Keyword.Type),
  517. # this case is not in the syntax but sometimes
  518. # we think we are parsing types when in fact we are parsing
  519. # some css, so we just pop the states until we get back into
  520. # the root state
  521. default('#pop'),
  522. ],
  523. # type-with-slash is either:
  524. # * type-1
  525. # * type-1 (/ type-1)+
  526. 'type-with-slash': [
  527. include('comments-and-spaces'),
  528. default(('#pop', 'slash-type-1', 'type-1')),
  529. ],
  530. 'slash-type-1': [
  531. include('comments-and-spaces'),
  532. ('/', Keyword.Type, ('#pop', 'type-1')),
  533. # same remark as above
  534. default('#pop'),
  535. ],
  536. # we go in this state after having parsed a type-with-slash
  537. # while trying to parse a type
  538. # and at this point we must determine if we are parsing an arrow
  539. # type (in which case we must continue parsing) or not (in which
  540. # case we stop)
  541. 'type-lhs-1': [
  542. include('comments-and-spaces'),
  543. (r'->', Keyword.Type, ('#pop', 'type')),
  544. (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
  545. default('#pop'),
  546. ],
  547. 'type-arrow': [
  548. include('comments-and-spaces'),
  549. # the look ahead here allows to parse f(x : int, y : float -> truc)
  550. # correctly
  551. (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
  552. (r'->', Keyword.Type, ('#pop', 'type')),
  553. # same remark as above
  554. default('#pop'),
  555. ],
  556. # no need to do precise parsing for tuples and records
  557. # because they are closed constructions, so we can simply
  558. # find the closing delimiter
  559. # note that this function would be not work if the source
  560. # contained identifiers like `{)` (although it could be patched
  561. # to support it)
  562. 'type-tuple': [
  563. include('comments-and-spaces'),
  564. (r'[^()/*]+', Keyword.Type),
  565. (r'[/*]', Keyword.Type),
  566. (r'\(', Keyword.Type, '#push'),
  567. (r'\)', Keyword.Type, '#pop'),
  568. ],
  569. 'type-record': [
  570. include('comments-and-spaces'),
  571. (r'[^{}/*]+', Keyword.Type),
  572. (r'[/*]', Keyword.Type),
  573. (r'\{', Keyword.Type, '#push'),
  574. (r'\}', Keyword.Type, '#pop'),
  575. ],
  576. # 'type-tuple': [
  577. # include('comments-and-spaces'),
  578. # (r'\)', Keyword.Type, '#pop'),
  579. # default(('#pop', 'type-tuple-1', 'type-1')),
  580. # ],
  581. # 'type-tuple-1': [
  582. # include('comments-and-spaces'),
  583. # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)
  584. # (r',', Keyword.Type, 'type-1'),
  585. # ],
  586. # 'type-record':[
  587. # include('comments-and-spaces'),
  588. # (r'\}', Keyword.Type, '#pop'),
  589. # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),
  590. # ],
  591. # 'type-record-field-expr': [
  592. #
  593. # ],
  594. 'nested-comment': [
  595. (r'[^/*]+', Comment),
  596. (r'/\*', Comment, '#push'),
  597. (r'\*/', Comment, '#pop'),
  598. (r'[/*]', Comment),
  599. ],
  600. # the copy pasting between string and single-string
  601. # is kinda sad. Is there a way to avoid that??
  602. 'string': [
  603. (r'[^\\"{]+', String.Double),
  604. (r'"', String.Double, '#pop'),
  605. (r'\{', Operator, 'root'),
  606. include('escape-sequence'),
  607. ],
  608. 'single-string': [
  609. (r'[^\\\'{]+', String.Double),
  610. (r'\'', String.Double, '#pop'),
  611. (r'\{', Operator, 'root'),
  612. include('escape-sequence'),
  613. ],
  614. # all the html stuff
  615. # can't really reuse some existing html parser
  616. # because we must be able to parse embedded expressions
  617. # we are in this state after someone parsed the '<' that
  618. # started the html literal
  619. 'html-open-tag': [
  620. (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
  621. (r'>', String.Single, ('#pop', 'html-content')),
  622. ],
  623. # we are in this state after someone parsed the '</' that
  624. # started the end of the closing tag
  625. 'html-end-tag': [
  626. # this is a star, because </> is allowed
  627. (r'[\w\-:]*>', String.Single, '#pop'),
  628. ],
  629. # we are in this state after having parsed '<ident(:ident)?'
  630. # we thus parse a possibly empty list of attributes
  631. 'html-attr': [
  632. (r'\s+', Text),
  633. (r'[\w\-:]+=', String.Single, 'html-attr-value'),
  634. (r'/>', String.Single, '#pop'),
  635. (r'>', String.Single, ('#pop', 'html-content')),
  636. ],
  637. 'html-attr-value': [
  638. (r"'", String.Single, ('#pop', 'single-string')),
  639. (r'"', String.Single, ('#pop', 'string')),
  640. (r'#'+ident_re, String.Single, '#pop'),
  641. (r'#(?=\{)', String.Single, ('#pop', 'root')),
  642. (r'[^"\'{`=<>]+', String.Single, '#pop'),
  643. (r'\{', Operator, ('#pop', 'root')), # this is a tail call!
  644. ],
  645. # we should probably deal with '\' escapes here
  646. 'html-content': [
  647. (r'<!--', Comment, 'html-comment'),
  648. (r'</', String.Single, ('#pop', 'html-end-tag')),
  649. (r'<', String.Single, 'html-open-tag'),
  650. (r'\{', Operator, 'root'),
  651. (r'[^<{]+', String.Single),
  652. ],
  653. 'html-comment': [
  654. (r'-->', Comment, '#pop'),
  655. (r'[^\-]+|-', Comment),
  656. ],
  657. }
  658. class ReasonLexer(RegexLexer):
  659. """
  660. For the ReasonML language.
  661. .. versionadded:: 2.6
  662. """
  663. name = 'ReasonML'
  664. url = 'https://reasonml.github.io/'
  665. aliases = ['reasonml', 'reason']
  666. filenames = ['*.re', '*.rei']
  667. mimetypes = ['text/x-reasonml']
  668. keywords = (
  669. 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto',
  670. 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun',
  671. 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy',
  672. 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of',
  673. 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
  674. 'type', 'val', 'virtual', 'when', 'while', 'with',
  675. )
  676. keyopts = (
  677. '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
  678. r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
  679. '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
  680. r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~'
  681. )
  682. operators = r'[!$%&*+\./:<=>?@^|~-]'
  683. word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or')
  684. prefix_syms = r'[!?~]'
  685. infix_syms = r'[=<>@^|&+\*/$%-]'
  686. primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
  687. tokens = {
  688. 'escape-sequence': [
  689. (r'\\[\\"\'ntbr]', String.Escape),
  690. (r'\\[0-9]{3}', String.Escape),
  691. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  692. ],
  693. 'root': [
  694. (r'\s+', Text),
  695. (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
  696. (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
  697. (r'\b([A-Z][\w\']*)', Name.Class),
  698. (r'//.*?\n', Comment.Single),
  699. (r'\/\*(?!/)', Comment.Multiline, 'comment'),
  700. (r'\b(%s)\b' % '|'.join(keywords), Keyword),
  701. (r'(%s)' % '|'.join(keyopts[::-1]), Operator.Word),
  702. (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
  703. (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
  704. (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
  705. (r"[^\W\d][\w']*", Name),
  706. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  707. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  708. (r'0[oO][0-7][0-7_]*', Number.Oct),
  709. (r'0[bB][01][01_]*', Number.Bin),
  710. (r'\d[\d_]*', Number.Integer),
  711. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  712. String.Char),
  713. (r"'.'", String.Char),
  714. (r"'", Keyword),
  715. (r'"', String.Double, 'string'),
  716. (r'[~?][a-z][\w\']*:', Name.Variable),
  717. ],
  718. 'comment': [
  719. (r'[^/*]+', Comment.Multiline),
  720. (r'\/\*', Comment.Multiline, '#push'),
  721. (r'\*\/', Comment.Multiline, '#pop'),
  722. (r'\*', Comment.Multiline),
  723. ],
  724. 'string': [
  725. (r'[^\\"]+', String.Double),
  726. include('escape-sequence'),
  727. (r'\\\n', String.Double),
  728. (r'"', String.Double, '#pop'),
  729. ],
  730. 'dotted': [
  731. (r'\s+', Text),
  732. (r'\.', Punctuation),
  733. (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
  734. (r'[A-Z][\w\']*', Name.Class, '#pop'),
  735. (r'[a-z_][\w\']*', Name, '#pop'),
  736. default('#pop'),
  737. ],
  738. }
  739. class FStarLexer(RegexLexer):
  740. """
  741. For the F* language.
  742. .. versionadded:: 2.7
  743. """
  744. name = 'FStar'
  745. url = 'https://www.fstar-lang.org/'
  746. aliases = ['fstar']
  747. filenames = ['*.fst', '*.fsti']
  748. mimetypes = ['text/x-fstar']
  749. keywords = (
  750. 'abstract', 'attributes', 'noeq', 'unopteq', 'and'
  751. 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures',
  752. 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if',
  753. 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible',
  754. 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract',
  755. 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable',
  756. 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect',
  757. 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable',
  758. 'val', 'when', 'with', 'not'
  759. )
  760. decl_keywords = ('let', 'rec')
  761. assume_keywords = ('assume', 'admit', 'assert', 'calc')
  762. keyopts = (
  763. r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'\(\|', r'\|\)', r'#', r'u#',
  764. r'&', r'\(', r'\)', r'\(\)', r',', r'~>', r'->', r'<-', r'<--', r'<==>',
  765. r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|',
  766. r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{',
  767. r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$'
  768. )
  769. operators = r'[!$%&*+\./:<=>?@^|~-]'
  770. prefix_syms = r'[!?~]'
  771. infix_syms = r'[=<>@^|&+\*/$%-]'
  772. primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
  773. tokens = {
  774. 'escape-sequence': [
  775. (r'\\[\\"\'ntbr]', String.Escape),
  776. (r'\\[0-9]{3}', String.Escape),
  777. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  778. ],
  779. 'root': [
  780. (r'\s+', Text),
  781. (r'false|true|False|True|\(\)|\[\]', Name.Builtin.Pseudo),
  782. (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
  783. (r'\b([A-Z][\w\']*)', Name.Class),
  784. (r'\(\*(?![)])', Comment, 'comment'),
  785. (r'\/\/.+$', Comment),
  786. (r'\b(%s)\b' % '|'.join(keywords), Keyword),
  787. (r'\b(%s)\b' % '|'.join(assume_keywords), Name.Exception),
  788. (r'\b(%s)\b' % '|'.join(decl_keywords), Keyword.Declaration),
  789. (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
  790. (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
  791. (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
  792. (r"[^\W\d][\w']*", Name),
  793. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  794. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  795. (r'0[oO][0-7][0-7_]*', Number.Oct),
  796. (r'0[bB][01][01_]*', Number.Bin),
  797. (r'\d[\d_]*', Number.Integer),
  798. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  799. String.Char),
  800. (r"'.'", String.Char),
  801. (r"'", Keyword), # a stray quote is another syntax element
  802. (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications
  803. (r"\`", Keyword), # for quoting
  804. (r'"', String.Double, 'string'),
  805. (r'[~?][a-z][\w\']*:', Name.Variable),
  806. ],
  807. 'comment': [
  808. (r'[^(*)]+', Comment),
  809. (r'\(\*', Comment, '#push'),
  810. (r'\*\)', Comment, '#pop'),
  811. (r'[(*)]', Comment),
  812. ],
  813. 'string': [
  814. (r'[^\\"]+', String.Double),
  815. include('escape-sequence'),
  816. (r'\\\n', String.Double),
  817. (r'"', String.Double, '#pop'),
  818. ],
  819. 'dotted': [
  820. (r'\s+', Text),
  821. (r'\.', Punctuation),
  822. (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
  823. (r'[A-Z][\w\']*', Name.Class, '#pop'),
  824. (r'[a-z_][\w\']*', Name, '#pop'),
  825. default('#pop'),
  826. ],
  827. }