crystal.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. """
  2. pygments.lexers.crystal
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexer for Crystal.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import ExtendedRegexLexer, include, bygroups, default, \
  10. words, line_re
  11. from pygments.token import Comment, Operator, Keyword, Name, String, Number, \
  12. Punctuation, Error, Whitespace
  13. __all__ = ['CrystalLexer']
  14. CRYSTAL_OPERATORS = [
  15. '!=', '!~', '!', '%', '&&', '&', '**', '*', '+', '-', '/', '<=>', '<<', '<=', '<',
  16. '===', '==', '=~', '=', '>=', '>>', '>', '[]=', '[]?', '[]', '^', '||', '|', '~'
  17. ]
  18. class CrystalLexer(ExtendedRegexLexer):
  19. """
  20. For Crystal source code.
  21. """
  22. name = 'Crystal'
  23. url = 'https://crystal-lang.org'
  24. aliases = ['cr', 'crystal']
  25. filenames = ['*.cr']
  26. mimetypes = ['text/x-crystal']
  27. version_added = '2.2'
  28. flags = re.DOTALL | re.MULTILINE
  29. def heredoc_callback(self, match, ctx):
  30. # okay, this is the hardest part of parsing Crystal...
  31. # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line
  32. start = match.start(1)
  33. yield start, Operator, match.group(1) # <<-?
  34. yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
  35. yield match.start(3), String.Delimiter, match.group(3) # heredoc name
  36. yield match.start(4), String.Heredoc, match.group(4) # quote again
  37. heredocstack = ctx.__dict__.setdefault('heredocstack', [])
  38. outermost = not bool(heredocstack)
  39. heredocstack.append((match.group(1) == '<<-', match.group(3)))
  40. ctx.pos = match.start(5)
  41. ctx.end = match.end(5)
  42. # this may find other heredocs, so limit the recursion depth
  43. if len(heredocstack) < 100:
  44. yield from self.get_tokens_unprocessed(context=ctx)
  45. else:
  46. yield ctx.pos, String.Heredoc, match.group(5)
  47. ctx.pos = match.end()
  48. if outermost:
  49. # this is the outer heredoc again, now we can process them all
  50. for tolerant, hdname in heredocstack:
  51. lines = []
  52. for match in line_re.finditer(ctx.text, ctx.pos):
  53. if tolerant:
  54. check = match.group().strip()
  55. else:
  56. check = match.group().rstrip()
  57. if check == hdname:
  58. for amatch in lines:
  59. yield amatch.start(), String.Heredoc, amatch.group()
  60. yield match.start(), String.Delimiter, match.group()
  61. ctx.pos = match.end()
  62. break
  63. else:
  64. lines.append(match)
  65. else:
  66. # end of heredoc not found -- error!
  67. for amatch in lines:
  68. yield amatch.start(), Error, amatch.group()
  69. ctx.end = len(ctx.text)
  70. del heredocstack[:]
  71. def gen_crystalstrings_rules():
  72. states = {}
  73. states['strings'] = [
  74. (r'\:\w+[!?]?', String.Symbol),
  75. (words(CRYSTAL_OPERATORS, prefix=r'\:'), String.Symbol),
  76. (r":'(\\\\|\\[^\\]|[^'\\])*'", String.Symbol),
  77. # This allows arbitrary text after '\ for simplicity
  78. (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char),
  79. (r':"', String.Symbol, 'simple-sym'),
  80. # Crystal doesn't have "symbol:"s but this simplifies function args
  81. (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)),
  82. (r'"', String.Double, 'simple-string'),
  83. (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
  84. ]
  85. # double-quoted string and symbol
  86. for name, ttype, end in ('string', String.Double, '"'), \
  87. ('sym', String.Symbol, '"'), \
  88. ('backtick', String.Backtick, '`'):
  89. states['simple-'+name] = [
  90. include('string-escaped' if name == 'sym' else 'string-intp-escaped'),
  91. (rf'[^\\{end}#]+', ttype),
  92. (r'[\\#]', ttype),
  93. (end, ttype, '#pop'),
  94. ]
  95. # https://crystal-lang.org/docs/syntax_and_semantics/literals/string.html#percent-string-literals
  96. for lbrace, rbrace, bracecc, name in \
  97. ('\\{', '\\}', '{}', 'cb'), \
  98. ('\\[', '\\]', '\\[\\]', 'sb'), \
  99. ('\\(', '\\)', '()', 'pa'), \
  100. ('<', '>', '<>', 'ab'), \
  101. ('\\|', '\\|', '\\|', 'pi'):
  102. states[name+'-intp-string'] = [
  103. (r'\\' + lbrace, String.Other),
  104. ] + (lbrace != rbrace) * [
  105. (lbrace, String.Other, '#push'),
  106. ] + [
  107. (rbrace, String.Other, '#pop'),
  108. include('string-intp-escaped'),
  109. (r'[\\#' + bracecc + ']', String.Other),
  110. (r'[^\\#' + bracecc + ']+', String.Other),
  111. ]
  112. states['strings'].append((r'%Q?' + lbrace, String.Other,
  113. name+'-intp-string'))
  114. states[name+'-string'] = [
  115. (r'\\[\\' + bracecc + ']', String.Other),
  116. ] + (lbrace != rbrace) * [
  117. (lbrace, String.Other, '#push'),
  118. ] + [
  119. (rbrace, String.Other, '#pop'),
  120. (r'[\\#' + bracecc + ']', String.Other),
  121. (r'[^\\#' + bracecc + ']+', String.Other),
  122. ]
  123. # https://crystal-lang.org/docs/syntax_and_semantics/literals/array.html#percent-array-literals
  124. states['strings'].append((r'%[qwi]' + lbrace, String.Other,
  125. name+'-string'))
  126. states[name+'-regex'] = [
  127. (r'\\[\\' + bracecc + ']', String.Regex),
  128. ] + (lbrace != rbrace) * [
  129. (lbrace, String.Regex, '#push'),
  130. ] + [
  131. (rbrace + '[imsx]*', String.Regex, '#pop'),
  132. include('string-intp'),
  133. (r'[\\#' + bracecc + ']', String.Regex),
  134. (r'[^\\#' + bracecc + ']+', String.Regex),
  135. ]
  136. states['strings'].append((r'%r' + lbrace, String.Regex,
  137. name+'-regex'))
  138. return states
  139. tokens = {
  140. 'root': [
  141. (r'#.*?$', Comment.Single),
  142. # keywords
  143. (words('''
  144. abstract asm begin break case do else elsif end ensure extend if in
  145. include next of private protected require rescue return select self super
  146. then unless until when while with yield
  147. '''.split(), suffix=r'\b'), Keyword),
  148. (words('''
  149. previous_def forall out uninitialized __DIR__ __FILE__ __LINE__
  150. __END_LINE__
  151. '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Keyword.Pseudo),
  152. # https://crystal-lang.org/docs/syntax_and_semantics/is_a.html
  153. (r'\.(is_a\?|nil\?|responds_to\?|as\?|as\b)', Keyword.Pseudo),
  154. (words(['true', 'false', 'nil'], suffix=r'\b'), Keyword.Constant),
  155. # start of function, class and module names
  156. (r'(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
  157. bygroups(Keyword, Whitespace, Name.Namespace)),
  158. (r'(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)',
  159. bygroups(Keyword, Whitespace, Name.Namespace), 'funcname'),
  160. (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
  161. (r'(annotation|class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)',
  162. bygroups(Keyword, Whitespace, Name.Namespace), 'classname'),
  163. # https://crystal-lang.org/api/toplevel.html
  164. (words('''
  165. instance_sizeof offsetof pointerof sizeof typeof
  166. '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Keyword.Pseudo),
  167. # macros
  168. (r'(?<!\.)(debugger\b|p!|pp!|record\b|spawn\b)', Name.Builtin.Pseudo),
  169. # builtins
  170. (words('''
  171. abort at_exit caller exit gets loop main p pp print printf puts
  172. raise rand read_line sleep spawn sprintf system
  173. '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin),
  174. # https://crystal-lang.org/api/Object.html#macro-summary
  175. (r'(?<!\.)(((class_)?((getter|property)\b[!?]?|setter\b))|'
  176. r'(def_(clone|equals|equals_and_hash|hash)|delegate|forward_missing_to)\b)',
  177. Name.Builtin.Pseudo),
  178. # normal heredocs
  179. (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)',
  180. heredoc_callback),
  181. # empty string heredocs
  182. (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback),
  183. (r'__END__', Comment.Preproc, 'end-part'),
  184. # multiline regex (after keywords or assignments)
  185. (r'(?:^|(?<=[=<>~!:])|'
  186. r'(?<=(?:\s|;)when\s)|'
  187. r'(?<=(?:\s|;)or\s)|'
  188. r'(?<=(?:\s|;)and\s)|'
  189. r'(?<=\.index\s)|'
  190. r'(?<=\.scan\s)|'
  191. r'(?<=\.sub\s)|'
  192. r'(?<=\.sub!\s)|'
  193. r'(?<=\.gsub\s)|'
  194. r'(?<=\.gsub!\s)|'
  195. r'(?<=\.match\s)|'
  196. r'(?<=(?:\s|;)if\s)|'
  197. r'(?<=(?:\s|;)elsif\s)|'
  198. r'(?<=^when\s)|'
  199. r'(?<=^index\s)|'
  200. r'(?<=^scan\s)|'
  201. r'(?<=^sub\s)|'
  202. r'(?<=^gsub\s)|'
  203. r'(?<=^sub!\s)|'
  204. r'(?<=^gsub!\s)|'
  205. r'(?<=^match\s)|'
  206. r'(?<=^if\s)|'
  207. r'(?<=^elsif\s)'
  208. r')(\s*)(/)', bygroups(Whitespace, String.Regex), 'multiline-regex'),
  209. # multiline regex (in method calls or subscripts)
  210. (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'),
  211. # multiline regex (this time the funny no whitespace rule)
  212. (r'(\s+)(/)(?![\s=])', bygroups(Whitespace, String.Regex),
  213. 'multiline-regex'),
  214. # lex numbers and ignore following regular expressions which
  215. # are division operators in fact (grrrr. i hate that. any
  216. # better ideas?)
  217. # since pygments 0.7 we also eat a "?" operator after numbers
  218. # so that the char operator does not work. Chars are not allowed
  219. # there so that you can use the ternary operator.
  220. # stupid example:
  221. # x>=0?n[x]:""
  222. (r'(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
  223. bygroups(Number.Oct, Whitespace, Operator)),
  224. (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
  225. bygroups(Number.Hex, Whitespace, Operator)),
  226. (r'(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
  227. bygroups(Number.Bin, Whitespace, Operator)),
  228. # 3 separate expressions for floats because any of the 3 optional
  229. # parts makes it a float
  230. (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?'
  231. r'(?:_?f[0-9]+)?)(\s*)([/?])?',
  232. bygroups(Number.Float, Whitespace, Operator)),
  233. (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)'
  234. r'(?:_?f[0-9]+)?)(\s*)([/?])?',
  235. bygroups(Number.Float, Whitespace, Operator)),
  236. (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?'
  237. r'(?:_?f[0-9]+))(\s*)([/?])?',
  238. bygroups(Number.Float, Whitespace, Operator)),
  239. (r'(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
  240. bygroups(Number.Integer, Whitespace, Operator)),
  241. # Names
  242. (r'@@[a-zA-Z_]\w*', Name.Variable.Class),
  243. (r'@[a-zA-Z_]\w*', Name.Variable.Instance),
  244. (r'\$\w+', Name.Variable.Global),
  245. (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global),
  246. (r'\$-[0adFiIlpvw]', Name.Variable.Global),
  247. (r'::', Operator),
  248. include('strings'),
  249. # https://crystal-lang.org/reference/syntax_and_semantics/literals/char.html
  250. (r'\?(\\[MC]-)*' # modifiers
  251. r'(\\([\\abefnrtv#"\']|[0-7]{1,3}|x[a-fA-F0-9]{2}|u[a-fA-F0-9]{4}|u\{[a-fA-F0-9 ]+\})|\S)'
  252. r'(?!\w)',
  253. String.Char),
  254. (r'[A-Z][A-Z_]+\b(?!::|\.)', Name.Constant),
  255. # macro expansion
  256. (r'\{%', String.Interpol, 'in-macro-control'),
  257. (r'\{\{', String.Interpol, 'in-macro-expr'),
  258. # annotations
  259. (r'(@\[)(\s*)([A-Z]\w*(::[A-Z]\w*)*)',
  260. bygroups(Operator, Whitespace, Name.Decorator), 'in-annot'),
  261. # this is needed because Crystal attributes can look
  262. # like keywords (class) or like this: ` ?!?
  263. (words(CRYSTAL_OPERATORS, prefix=r'(\.|::)'),
  264. bygroups(Operator, Name.Operator)),
  265. (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])',
  266. bygroups(Operator, Name)),
  267. # Names can end with [!?] unless it's "!="
  268. (r'[a-zA-Z_]\w*(?:[!?](?!=))?', Name),
  269. (r'(\[|\]\??|\*\*|<=>?|>=|<<?|>>?|=~|===|'
  270. r'!~|&&?|\|\||\.{1,3})', Operator),
  271. (r'[-+/*%=<>&!^|~]=?', Operator),
  272. (r'[(){};,/?:\\]', Punctuation),
  273. (r'\s+', Whitespace)
  274. ],
  275. 'funcname': [
  276. (r'(?:([a-zA-Z_]\w*)(\.))?'
  277. r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|'
  278. r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
  279. bygroups(Name.Class, Operator, Name.Function), '#pop'),
  280. default('#pop')
  281. ],
  282. 'classname': [
  283. (r'[A-Z_]\w*', Name.Class),
  284. (r'(\()(\s*)([A-Z_]\w*)(\s*)(\))',
  285. bygroups(Punctuation, Whitespace, Name.Class, Whitespace, Punctuation)),
  286. default('#pop')
  287. ],
  288. 'in-intp': [
  289. (r'\{', String.Interpol, '#push'),
  290. (r'\}', String.Interpol, '#pop'),
  291. include('root'),
  292. ],
  293. 'string-intp': [
  294. (r'#\{', String.Interpol, 'in-intp'),
  295. ],
  296. 'string-escaped': [
  297. # https://crystal-lang.org/reference/syntax_and_semantics/literals/string.html
  298. (r'\\([\\abefnrtv#"\']|[0-7]{1,3}|x[a-fA-F0-9]{2}|u[a-fA-F0-9]{4}|u\{[a-fA-F0-9 ]+\})',
  299. String.Escape)
  300. ],
  301. 'string-intp-escaped': [
  302. include('string-intp'),
  303. include('string-escaped'),
  304. ],
  305. 'interpolated-regex': [
  306. include('string-intp'),
  307. (r'[\\#]', String.Regex),
  308. (r'[^\\#]+', String.Regex),
  309. ],
  310. 'interpolated-string': [
  311. include('string-intp'),
  312. (r'[\\#]', String.Other),
  313. (r'[^\\#]+', String.Other),
  314. ],
  315. 'multiline-regex': [
  316. include('string-intp'),
  317. (r'\\\\', String.Regex),
  318. (r'\\/', String.Regex),
  319. (r'[\\#]', String.Regex),
  320. (r'[^\\/#]+', String.Regex),
  321. (r'/[imsx]*', String.Regex, '#pop'),
  322. ],
  323. 'end-part': [
  324. (r'.+', Comment.Preproc, '#pop')
  325. ],
  326. 'in-macro-control': [
  327. (r'\{%', String.Interpol, '#push'),
  328. (r'%\}', String.Interpol, '#pop'),
  329. (r'(for|verbatim)\b', Keyword),
  330. include('root'),
  331. ],
  332. 'in-macro-expr': [
  333. (r'\{\{', String.Interpol, '#push'),
  334. (r'\}\}', String.Interpol, '#pop'),
  335. include('root'),
  336. ],
  337. 'in-annot': [
  338. (r'\[', Operator, '#push'),
  339. (r'\]', Operator, '#pop'),
  340. include('root'),
  341. ],
  342. }
  343. tokens.update(gen_crystalstrings_rules())