crystal.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.crystal
  4. ~~~~~~~~~~~~~~~~~~~~~~~
  5. Lexer for Crystal.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import ExtendedRegexLexer, include, \
  11. bygroups, default, LexerContext, words
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  13. Number, Punctuation, Error
  14. __all__ = ['CrystalLexer']
  15. line_re = re.compile('.*?\n')
  16. CRYSTAL_OPERATORS = [
  17. '!=', '!~', '!', '%', '&&', '&', '**', '*', '+', '-', '/', '<=>', '<<', '<=', '<',
  18. '===', '==', '=~', '=', '>=', '>>', '>', '[]=', '[]?', '[]', '^', '||', '|', '~'
  19. ]
  20. class CrystalLexer(ExtendedRegexLexer):
  21. """
  22. For `Crystal <http://crystal-lang.org>`_ source code.
  23. .. versionadded:: 2.2
  24. """
  25. name = 'Crystal'
  26. aliases = ['cr', 'crystal']
  27. filenames = ['*.cr']
  28. mimetypes = ['text/x-crystal']
  29. flags = re.DOTALL | re.MULTILINE
  30. def heredoc_callback(self, match, ctx):
  31. # okay, this is the hardest part of parsing Crystal...
  32. # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line
  33. start = match.start(1)
  34. yield start, Operator, match.group(1) # <<-?
  35. yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
  36. yield match.start(3), String.Delimiter, match.group(3) # heredoc name
  37. yield match.start(4), String.Heredoc, match.group(4) # quote again
  38. heredocstack = ctx.__dict__.setdefault('heredocstack', [])
  39. outermost = not bool(heredocstack)
  40. heredocstack.append((match.group(1) == '<<-', match.group(3)))
  41. ctx.pos = match.start(5)
  42. ctx.end = match.end(5)
  43. # this may find other heredocs
  44. for i, t, v in self.get_tokens_unprocessed(context=ctx):
  45. yield i, t, v
  46. ctx.pos = match.end()
  47. if outermost:
  48. # this is the outer heredoc again, now we can process them all
  49. for tolerant, hdname in heredocstack:
  50. lines = []
  51. for match in line_re.finditer(ctx.text, ctx.pos):
  52. if tolerant:
  53. check = match.group().strip()
  54. else:
  55. check = match.group().rstrip()
  56. if check == hdname:
  57. for amatch in lines:
  58. yield amatch.start(), String.Heredoc, amatch.group()
  59. yield match.start(), String.Delimiter, match.group()
  60. ctx.pos = match.end()
  61. break
  62. else:
  63. lines.append(match)
  64. else:
  65. # end of heredoc not found -- error!
  66. for amatch in lines:
  67. yield amatch.start(), Error, amatch.group()
  68. ctx.end = len(ctx.text)
  69. del heredocstack[:]
  70. def gen_crystalstrings_rules():
  71. def intp_regex_callback(self, match, ctx):
  72. yield match.start(1), String.Regex, match.group(1) # begin
  73. nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
  74. for i, t, v in self.get_tokens_unprocessed(context=nctx):
  75. yield match.start(3)+i, t, v
  76. yield match.start(4), String.Regex, match.group(4) # end[imsx]*
  77. ctx.pos = match.end()
  78. def intp_string_callback(self, match, ctx):
  79. yield match.start(1), String.Other, match.group(1)
  80. nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
  81. for i, t, v in self.get_tokens_unprocessed(context=nctx):
  82. yield match.start(3)+i, t, v
  83. yield match.start(4), String.Other, match.group(4) # end
  84. ctx.pos = match.end()
  85. states = {}
  86. states['strings'] = [
  87. (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol),
  88. (words(CRYSTAL_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol),
  89. (r":'(\\\\|\\'|[^'])*'", String.Symbol),
  90. # This allows arbitrary text after '\ for simplicity
  91. (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char),
  92. (r':"', String.Symbol, 'simple-sym'),
  93. # Crystal doesn't have "symbol:"s but this simplifies function args
  94. (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)),
  95. (r'"', String.Double, 'simple-string'),
  96. (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
  97. ]
  98. # double-quoted string and symbol
  99. for name, ttype, end in ('string', String.Double, '"'), \
  100. ('sym', String.Symbol, '"'), \
  101. ('backtick', String.Backtick, '`'):
  102. states['simple-'+name] = [
  103. include('string-escaped' if name == 'sym' else 'string-intp-escaped'),
  104. (r'[^\\%s#]+' % end, ttype),
  105. (r'[\\#]', ttype),
  106. (end, ttype, '#pop'),
  107. ]
  108. # braced quoted strings
  109. for lbrace, rbrace, bracecc, name in \
  110. ('\\{', '\\}', '{}', 'cb'), \
  111. ('\\[', '\\]', '\\[\\]', 'sb'), \
  112. ('\\(', '\\)', '()', 'pa'), \
  113. ('<', '>', '<>', 'ab'):
  114. states[name+'-intp-string'] = [
  115. (r'\\[' + lbrace + ']', String.Other),
  116. (lbrace, String.Other, '#push'),
  117. (rbrace, String.Other, '#pop'),
  118. include('string-intp-escaped'),
  119. (r'[\\#' + bracecc + ']', String.Other),
  120. (r'[^\\#' + bracecc + ']+', String.Other),
  121. ]
  122. states['strings'].append((r'%' + lbrace, String.Other,
  123. name+'-intp-string'))
  124. states[name+'-string'] = [
  125. (r'\\[\\' + bracecc + ']', String.Other),
  126. (lbrace, String.Other, '#push'),
  127. (rbrace, String.Other, '#pop'),
  128. (r'[\\#' + bracecc + ']', String.Other),
  129. (r'[^\\#' + bracecc + ']+', String.Other),
  130. ]
  131. # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html
  132. states['strings'].append((r'%[wi]' + lbrace, String.Other,
  133. name+'-string'))
  134. states[name+'-regex'] = [
  135. (r'\\[\\' + bracecc + ']', String.Regex),
  136. (lbrace, String.Regex, '#push'),
  137. (rbrace + '[imsx]*', String.Regex, '#pop'),
  138. include('string-intp'),
  139. (r'[\\#' + bracecc + ']', String.Regex),
  140. (r'[^\\#' + bracecc + ']+', String.Regex),
  141. ]
  142. states['strings'].append((r'%r' + lbrace, String.Regex,
  143. name+'-regex'))
  144. # these must come after %<brace>!
  145. states['strings'] += [
  146. # %r regex
  147. (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)',
  148. intp_regex_callback),
  149. # regular fancy strings with qsw
  150. (r'(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)',
  151. intp_string_callback),
  152. # special forms of fancy strings after operators or
  153. # in method calls with braces
  154. (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
  155. bygroups(Text, String.Other, None)),
  156. # and because of fixed width lookbehinds the whole thing a
  157. # second time for line startings...
  158. (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
  159. bygroups(Text, String.Other, None)),
  160. # all regular fancy strings without qsw
  161. (r'(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)',
  162. intp_string_callback),
  163. ]
  164. return states
  165. tokens = {
  166. 'root': [
  167. (r'#.*?$', Comment.Single),
  168. # keywords
  169. (words('''
  170. abstract asm as begin break case do else elsif end ensure extend ifdef if
  171. include instance_sizeof next of pointerof private protected rescue return
  172. require sizeof super then typeof unless until when while with yield
  173. '''.split(), suffix=r'\b'), Keyword),
  174. (words(['true', 'false', 'nil'], suffix=r'\b'), Keyword.Constant),
  175. # start of function, class and module names
  176. (r'(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
  177. bygroups(Keyword, Text, Name.Namespace)),
  178. (r'(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)',
  179. bygroups(Keyword, Text, Name.Namespace), 'funcname'),
  180. (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
  181. (r'(class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)',
  182. bygroups(Keyword, Text, Name.Namespace), 'classname'),
  183. (r'(self|out|uninitialized)\b|(is_a|responds_to)\?', Keyword.Pseudo),
  184. # macros
  185. (words('''
  186. debugger record pp assert_responds_to spawn parallel
  187. getter setter property delegate def_hash def_equals def_equals_and_hash
  188. forward_missing_to
  189. '''.split(), suffix=r'\b'), Name.Builtin.Pseudo),
  190. (r'getter[!?]|property[!?]|__(DIR|FILE|LINE)__\b', Name.Builtin.Pseudo),
  191. # builtins
  192. # http://crystal-lang.org/api/toplevel.html
  193. (words('''
  194. Object Value Struct Reference Proc Class Nil Symbol Enum Void
  195. Bool Number Int Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64
  196. Float Float32 Float64 Char String
  197. Pointer Slice Range Exception Regex
  198. Mutex StaticArray Array Hash Set Tuple Deque Box Process File
  199. Dir Time Channel Concurrent Scheduler
  200. abort at_exit caller delay exit fork future get_stack_top gets
  201. lazy loop main p print printf puts
  202. raise rand read_line sleep sprintf system with_color
  203. '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin),
  204. # normal heredocs
  205. (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)',
  206. heredoc_callback),
  207. # empty string heredocs
  208. (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback),
  209. (r'__END__', Comment.Preproc, 'end-part'),
  210. # multiline regex (after keywords or assignments)
  211. (r'(?:^|(?<=[=<>~!:])|'
  212. r'(?<=(?:\s|;)when\s)|'
  213. r'(?<=(?:\s|;)or\s)|'
  214. r'(?<=(?:\s|;)and\s)|'
  215. r'(?<=\.index\s)|'
  216. r'(?<=\.scan\s)|'
  217. r'(?<=\.sub\s)|'
  218. r'(?<=\.sub!\s)|'
  219. r'(?<=\.gsub\s)|'
  220. r'(?<=\.gsub!\s)|'
  221. r'(?<=\.match\s)|'
  222. r'(?<=(?:\s|;)if\s)|'
  223. r'(?<=(?:\s|;)elsif\s)|'
  224. r'(?<=^when\s)|'
  225. r'(?<=^index\s)|'
  226. r'(?<=^scan\s)|'
  227. r'(?<=^sub\s)|'
  228. r'(?<=^gsub\s)|'
  229. r'(?<=^sub!\s)|'
  230. r'(?<=^gsub!\s)|'
  231. r'(?<=^match\s)|'
  232. r'(?<=^if\s)|'
  233. r'(?<=^elsif\s)'
  234. r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'),
  235. # multiline regex (in method calls or subscripts)
  236. (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'),
  237. # multiline regex (this time the funny no whitespace rule)
  238. (r'(\s+)(/)(?![\s=])', bygroups(Text, String.Regex),
  239. 'multiline-regex'),
  240. # lex numbers and ignore following regular expressions which
  241. # are division operators in fact (grrrr. i hate that. any
  242. # better ideas?)
  243. # since pygments 0.7 we also eat a "?" operator after numbers
  244. # so that the char operator does not work. Chars are not allowed
  245. # there so that you can use the ternary operator.
  246. # stupid example:
  247. # x>=0?n[x]:""
  248. (r'(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
  249. bygroups(Number.Oct, Text, Operator)),
  250. (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
  251. bygroups(Number.Hex, Text, Operator)),
  252. (r'(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
  253. bygroups(Number.Bin, Text, Operator)),
  254. # 3 separate expressions for floats because any of the 3 optional
  255. # parts makes it a float
  256. (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?'
  257. r'(?:_?f[0-9]+)?)(\s*)([/?])?',
  258. bygroups(Number.Float, Text, Operator)),
  259. (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)'
  260. r'(?:_?f[0-9]+)?)(\s*)([/?])?',
  261. bygroups(Number.Float, Text, Operator)),
  262. (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?'
  263. r'(?:_?f[0-9]+))(\s*)([/?])?',
  264. bygroups(Number.Float, Text, Operator)),
  265. (r'(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
  266. bygroups(Number.Integer, Text, Operator)),
  267. # Names
  268. (r'@@[a-zA-Z_]\w*', Name.Variable.Class),
  269. (r'@[a-zA-Z_]\w*', Name.Variable.Instance),
  270. (r'\$\w+', Name.Variable.Global),
  271. (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global),
  272. (r'\$-[0adFiIlpvw]', Name.Variable.Global),
  273. (r'::', Operator),
  274. include('strings'),
  275. # chars
  276. (r'\?(\\[MC]-)*' # modifiers
  277. r'(\\([\\befnrtv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
  278. r'(?!\w)',
  279. String.Char),
  280. (r'[A-Z][A-Z_]+\b', Name.Constant),
  281. # macro expansion
  282. (r'\{%', String.Interpol, 'in-macro-control'),
  283. (r'\{\{', String.Interpol, 'in-macro-expr'),
  284. # attributes
  285. (r'(@\[)(\s*)([A-Z]\w*)',
  286. bygroups(Operator, Text, Name.Decorator), 'in-attr'),
  287. # this is needed because Crystal attributes can look
  288. # like keywords (class) or like this: ` ?!?
  289. (words(CRYSTAL_OPERATORS, prefix=r'(\.|::)'),
  290. bygroups(Operator, Name.Operator)),
  291. (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])',
  292. bygroups(Operator, Name)),
  293. # Names can end with [!?] unless it's "!="
  294. (r'[a-zA-Z_]\w*(?:[!?](?!=))?', Name),
  295. (r'(\[|\]\??|\*\*|<=>?|>=|<<?|>>?|=~|===|'
  296. r'!~|&&?|\|\||\.{1,3})', Operator),
  297. (r'[-+/*%=<>&!^|~]=?', Operator),
  298. (r'[(){};,/?:\\]', Punctuation),
  299. (r'\s+', Text)
  300. ],
  301. 'funcname': [
  302. (r'(?:([a-zA-Z_]\w*)(\.))?'
  303. r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|'
  304. r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
  305. bygroups(Name.Class, Operator, Name.Function), '#pop'),
  306. default('#pop')
  307. ],
  308. 'classname': [
  309. (r'[A-Z_]\w*', Name.Class),
  310. (r'(\()(\s*)([A-Z_]\w*)(\s*)(\))',
  311. bygroups(Punctuation, Text, Name.Class, Text, Punctuation)),
  312. default('#pop')
  313. ],
  314. 'in-intp': [
  315. (r'\{', String.Interpol, '#push'),
  316. (r'\}', String.Interpol, '#pop'),
  317. include('root'),
  318. ],
  319. 'string-intp': [
  320. (r'#\{', String.Interpol, 'in-intp'),
  321. ],
  322. 'string-escaped': [
  323. (r'\\([\\befnstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape)
  324. ],
  325. 'string-intp-escaped': [
  326. include('string-intp'),
  327. include('string-escaped'),
  328. ],
  329. 'interpolated-regex': [
  330. include('string-intp'),
  331. (r'[\\#]', String.Regex),
  332. (r'[^\\#]+', String.Regex),
  333. ],
  334. 'interpolated-string': [
  335. include('string-intp'),
  336. (r'[\\#]', String.Other),
  337. (r'[^\\#]+', String.Other),
  338. ],
  339. 'multiline-regex': [
  340. include('string-intp'),
  341. (r'\\\\', String.Regex),
  342. (r'\\/', String.Regex),
  343. (r'[\\#]', String.Regex),
  344. (r'[^\\/#]+', String.Regex),
  345. (r'/[imsx]*', String.Regex, '#pop'),
  346. ],
  347. 'end-part': [
  348. (r'.+', Comment.Preproc, '#pop')
  349. ],
  350. 'in-macro-control': [
  351. (r'\{%', String.Interpol, '#push'),
  352. (r'%\}', String.Interpol, '#pop'),
  353. (r'for\b|in\b', Keyword),
  354. include('root'),
  355. ],
  356. 'in-macro-expr': [
  357. (r'\{\{', String.Interpol, '#push'),
  358. (r'\}\}', String.Interpol, '#pop'),
  359. include('root'),
  360. ],
  361. 'in-attr': [
  362. (r'\[', Operator, '#push'),
  363. (r'\]', Operator, '#pop'),
  364. include('root'),
  365. ],
  366. }
  367. tokens.update(gen_crystalstrings_rules())