ruby.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. """
  2. pygments.lexers.ruby
  3. ~~~~~~~~~~~~~~~~~~~~
  4. Lexers for Ruby and related languages.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, include, \
  10. bygroups, default, LexerContext, do_insertions, words, line_re
  11. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  12. Number, Punctuation, Error, Generic, Whitespace
  13. from pygments.util import shebang_matches
  14. __all__ = ['RubyLexer', 'RubyConsoleLexer', 'FancyLexer']
  15. RUBY_OPERATORS = (
  16. '*', '**', '-', '+', '-@', '+@', '/', '%', '&', '|', '^', '`', '~',
  17. '[]', '[]=', '<<', '>>', '<', '<>', '<=>', '>', '>=', '==', '==='
  18. )
  19. class RubyLexer(ExtendedRegexLexer):
  20. """
  21. For Ruby source code.
  22. """
  23. name = 'Ruby'
  24. url = 'http://www.ruby-lang.org'
  25. aliases = ['ruby', 'rb', 'duby']
  26. filenames = ['*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec',
  27. '*.rbx', '*.duby', 'Gemfile', 'Vagrantfile']
  28. mimetypes = ['text/x-ruby', 'application/x-ruby']
  29. version_added = ''
  30. flags = re.DOTALL | re.MULTILINE
  31. def heredoc_callback(self, match, ctx):
  32. # okay, this is the hardest part of parsing Ruby...
  33. # match: 1 = <<[-~]?, 2 = quote? 3 = name 4 = quote? 5 = rest of line
  34. start = match.start(1)
  35. yield start, Operator, match.group(1) # <<[-~]?
  36. yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
  37. yield match.start(3), String.Delimiter, match.group(3) # heredoc name
  38. yield match.start(4), String.Heredoc, match.group(4) # quote again
  39. heredocstack = ctx.__dict__.setdefault('heredocstack', [])
  40. outermost = not bool(heredocstack)
  41. heredocstack.append((match.group(1) in ('<<-', '<<~'), match.group(3)))
  42. ctx.pos = match.start(5)
  43. ctx.end = match.end(5)
  44. # this may find other heredocs, so limit the recursion depth
  45. if len(heredocstack) < 100:
  46. yield from self.get_tokens_unprocessed(context=ctx)
  47. else:
  48. yield ctx.pos, String.Heredoc, match.group(5)
  49. ctx.pos = match.end()
  50. if outermost:
  51. # this is the outer heredoc again, now we can process them all
  52. for tolerant, hdname in heredocstack:
  53. lines = []
  54. for match in line_re.finditer(ctx.text, ctx.pos):
  55. if tolerant:
  56. check = match.group().strip()
  57. else:
  58. check = match.group().rstrip()
  59. if check == hdname:
  60. for amatch in lines:
  61. yield amatch.start(), String.Heredoc, amatch.group()
  62. yield match.start(), String.Delimiter, match.group()
  63. ctx.pos = match.end()
  64. break
  65. else:
  66. lines.append(match)
  67. else:
  68. # end of heredoc not found -- error!
  69. for amatch in lines:
  70. yield amatch.start(), Error, amatch.group()
  71. ctx.end = len(ctx.text)
  72. del heredocstack[:]
  73. def gen_rubystrings_rules():
  74. def intp_regex_callback(self, match, ctx):
  75. yield match.start(1), String.Regex, match.group(1) # begin
  76. nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
  77. for i, t, v in self.get_tokens_unprocessed(context=nctx):
  78. yield match.start(3)+i, t, v
  79. yield match.start(4), String.Regex, match.group(4) # end[mixounse]*
  80. ctx.pos = match.end()
  81. def intp_string_callback(self, match, ctx):
  82. yield match.start(1), String.Other, match.group(1)
  83. nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
  84. for i, t, v in self.get_tokens_unprocessed(context=nctx):
  85. yield match.start(3)+i, t, v
  86. yield match.start(4), String.Other, match.group(4) # end
  87. ctx.pos = match.end()
  88. states = {}
  89. states['strings'] = [
  90. # easy ones
  91. (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol),
  92. (words(RUBY_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol),
  93. (r":'(\\\\|\\[^\\]|[^'\\])*'", String.Symbol),
  94. (r':"', String.Symbol, 'simple-sym'),
  95. (r'([a-zA-Z_]\w*)(:)(?!:)',
  96. bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9
  97. (r'"', String.Double, 'simple-string-double'),
  98. (r"'", String.Single, 'simple-string-single'),
  99. (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
  100. ]
  101. # quoted string and symbol
  102. for name, ttype, end in ('string-double', String.Double, '"'), \
  103. ('string-single', String.Single, "'"),\
  104. ('sym', String.Symbol, '"'), \
  105. ('backtick', String.Backtick, '`'):
  106. states['simple-'+name] = [
  107. include('string-intp-escaped'),
  108. (rf'[^\\{end}#]+', ttype),
  109. (r'[\\#]', ttype),
  110. (end, ttype, '#pop'),
  111. ]
  112. # braced quoted strings
  113. for lbrace, rbrace, bracecc, name in \
  114. ('\\{', '\\}', '{}', 'cb'), \
  115. ('\\[', '\\]', '\\[\\]', 'sb'), \
  116. ('\\(', '\\)', '()', 'pa'), \
  117. ('<', '>', '<>', 'ab'):
  118. states[name+'-intp-string'] = [
  119. (r'\\[\\' + bracecc + ']', String.Other),
  120. (lbrace, String.Other, '#push'),
  121. (rbrace, String.Other, '#pop'),
  122. include('string-intp-escaped'),
  123. (r'[\\#' + bracecc + ']', String.Other),
  124. (r'[^\\#' + bracecc + ']+', String.Other),
  125. ]
  126. states['strings'].append((r'%[QWx]?' + lbrace, String.Other,
  127. name+'-intp-string'))
  128. states[name+'-string'] = [
  129. (r'\\[\\' + bracecc + ']', String.Other),
  130. (lbrace, String.Other, '#push'),
  131. (rbrace, String.Other, '#pop'),
  132. (r'[\\#' + bracecc + ']', String.Other),
  133. (r'[^\\#' + bracecc + ']+', String.Other),
  134. ]
  135. states['strings'].append((r'%[qsw]' + lbrace, String.Other,
  136. name+'-string'))
  137. states[name+'-regex'] = [
  138. (r'\\[\\' + bracecc + ']', String.Regex),
  139. (lbrace, String.Regex, '#push'),
  140. (rbrace + '[mixounse]*', String.Regex, '#pop'),
  141. include('string-intp'),
  142. (r'[\\#' + bracecc + ']', String.Regex),
  143. (r'[^\\#' + bracecc + ']+', String.Regex),
  144. ]
  145. states['strings'].append((r'%r' + lbrace, String.Regex,
  146. name+'-regex'))
  147. # these must come after %<brace>!
  148. states['strings'] += [
  149. # %r regex
  150. (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)',
  151. intp_regex_callback),
  152. # regular fancy strings with qsw
  153. (r'%[qsw]([\W_])((?:\\\1|(?!\1).)*)\1', String.Other),
  154. (r'(%[QWx]([\W_]))((?:\\\2|(?!\2).)*)(\2)',
  155. intp_string_callback),
  156. # special forms of fancy strings after operators or
  157. # in method calls with braces
  158. (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
  159. bygroups(Whitespace, String.Other, None)),
  160. # and because of fixed width lookbehinds the whole thing a
  161. # second time for line startings...
  162. (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
  163. bygroups(Whitespace, String.Other, None)),
  164. # all regular fancy strings without qsw
  165. (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)',
  166. intp_string_callback),
  167. ]
  168. return states
  169. tokens = {
  170. 'root': [
  171. (r'\A#!.+?$', Comment.Hashbang),
  172. (r'#.*?$', Comment.Single),
  173. (r'=begin\s.*?\n=end.*?$', Comment.Multiline),
  174. # keywords
  175. (words((
  176. 'BEGIN', 'END', 'alias', 'begin', 'break', 'case', 'defined?',
  177. 'do', 'else', 'elsif', 'end', 'ensure', 'for', 'if', 'in', 'next', 'redo',
  178. 'rescue', 'raise', 'retry', 'return', 'super', 'then', 'undef',
  179. 'unless', 'until', 'when', 'while', 'yield'), suffix=r'\b'),
  180. Keyword),
  181. # start of function, class and module names
  182. (r'(module)(\s+)([a-zA-Z_]\w*'
  183. r'(?:::[a-zA-Z_]\w*)*)',
  184. bygroups(Keyword, Whitespace, Name.Namespace)),
  185. (r'(def)(\s+)', bygroups(Keyword, Whitespace), 'funcname'),
  186. (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
  187. (r'(class)(\s+)', bygroups(Keyword, Whitespace), 'classname'),
  188. # special methods
  189. (words((
  190. 'initialize', 'new', 'loop', 'include', 'extend', 'raise', 'attr_reader',
  191. 'attr_writer', 'attr_accessor', 'attr', 'catch', 'throw', 'private',
  192. 'module_function', 'public', 'protected', 'true', 'false', 'nil'),
  193. suffix=r'\b'),
  194. Keyword.Pseudo),
  195. (r'(not|and|or)\b', Operator.Word),
  196. (words((
  197. 'autoload', 'block_given', 'const_defined', 'eql', 'equal', 'frozen', 'include',
  198. 'instance_of', 'is_a', 'iterator', 'kind_of', 'method_defined', 'nil',
  199. 'private_method_defined', 'protected_method_defined',
  200. 'public_method_defined', 'respond_to', 'tainted'), suffix=r'\?'),
  201. Name.Builtin),
  202. (r'(chomp|chop|exit|gsub|sub)!', Name.Builtin),
  203. (words((
  204. 'Array', 'Float', 'Integer', 'String', '__id__', '__send__', 'abort',
  205. 'ancestors', 'at_exit', 'autoload', 'binding', 'callcc', 'caller',
  206. 'catch', 'chomp', 'chop', 'class_eval', 'class_variables',
  207. 'clone', 'const_defined?', 'const_get', 'const_missing', 'const_set',
  208. 'constants', 'display', 'dup', 'eval', 'exec', 'exit', 'extend', 'fail', 'fork',
  209. 'format', 'freeze', 'getc', 'gets', 'global_variables', 'gsub',
  210. 'hash', 'id', 'included_modules', 'inspect', 'instance_eval',
  211. 'instance_method', 'instance_methods',
  212. 'instance_variable_get', 'instance_variable_set', 'instance_variables',
  213. 'lambda', 'load', 'local_variables', 'loop',
  214. 'method', 'method_missing', 'methods', 'module_eval', 'name',
  215. 'object_id', 'open', 'p', 'print', 'printf', 'private_class_method',
  216. 'private_instance_methods',
  217. 'private_methods', 'proc', 'protected_instance_methods',
  218. 'protected_methods', 'public_class_method',
  219. 'public_instance_methods', 'public_methods',
  220. 'putc', 'puts', 'raise', 'rand', 'readline', 'readlines', 'require',
  221. 'scan', 'select', 'self', 'send', 'set_trace_func', 'singleton_methods', 'sleep',
  222. 'split', 'sprintf', 'srand', 'sub', 'syscall', 'system', 'taint',
  223. 'test', 'throw', 'to_a', 'to_s', 'trace_var', 'trap', 'untaint',
  224. 'untrace_var', 'warn'), prefix=r'(?<!\.)', suffix=r'\b'),
  225. Name.Builtin),
  226. (r'__(FILE|LINE)__\b', Name.Builtin.Pseudo),
  227. # normal heredocs
  228. (r'(?<!\w)(<<[-~]?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)',
  229. heredoc_callback),
  230. # empty string heredocs
  231. (r'(<<[-~]?)("|\')()(\2)(.*?\n)', heredoc_callback),
  232. (r'__END__', Comment.Preproc, 'end-part'),
  233. # multiline regex (after keywords or assignments)
  234. (r'(?:^|(?<=[=<>~!:])|'
  235. r'(?<=(?:\s|;)when\s)|'
  236. r'(?<=(?:\s|;)or\s)|'
  237. r'(?<=(?:\s|;)and\s)|'
  238. r'(?<=\.index\s)|'
  239. r'(?<=\.scan\s)|'
  240. r'(?<=\.sub\s)|'
  241. r'(?<=\.sub!\s)|'
  242. r'(?<=\.gsub\s)|'
  243. r'(?<=\.gsub!\s)|'
  244. r'(?<=\.match\s)|'
  245. r'(?<=(?:\s|;)if\s)|'
  246. r'(?<=(?:\s|;)elsif\s)|'
  247. r'(?<=^when\s)|'
  248. r'(?<=^index\s)|'
  249. r'(?<=^scan\s)|'
  250. r'(?<=^sub\s)|'
  251. r'(?<=^gsub\s)|'
  252. r'(?<=^sub!\s)|'
  253. r'(?<=^gsub!\s)|'
  254. r'(?<=^match\s)|'
  255. r'(?<=^if\s)|'
  256. r'(?<=^elsif\s)'
  257. r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'),
  258. # multiline regex (in method calls or subscripts)
  259. (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'),
  260. # multiline regex (this time the funny no whitespace rule)
  261. (r'(\s+)(/)(?![\s=])', bygroups(Whitespace, String.Regex),
  262. 'multiline-regex'),
  263. # lex numbers and ignore following regular expressions which
  264. # are division operators in fact (grrrr. i hate that. any
  265. # better ideas?)
  266. # since pygments 0.7 we also eat a "?" operator after numbers
  267. # so that the char operator does not work. Chars are not allowed
  268. # there so that you can use the ternary operator.
  269. # stupid example:
  270. # x>=0?n[x]:""
  271. (r'(0_?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?',
  272. bygroups(Number.Oct, Whitespace, Operator)),
  273. (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?',
  274. bygroups(Number.Hex, Whitespace, Operator)),
  275. (r'(0b[01]+(?:_[01]+)*)(\s*)([/?])?',
  276. bygroups(Number.Bin, Whitespace, Operator)),
  277. (r'([\d]+(?:_\d+)*)(\s*)([/?])?',
  278. bygroups(Number.Integer, Whitespace, Operator)),
  279. # Names
  280. (r'@@[a-zA-Z_]\w*', Name.Variable.Class),
  281. (r'@[a-zA-Z_]\w*', Name.Variable.Instance),
  282. (r'\$\w+', Name.Variable.Global),
  283. (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global),
  284. (r'\$-[0adFiIlpvw]', Name.Variable.Global),
  285. (r'::', Operator),
  286. include('strings'),
  287. # chars
  288. (r'\?(\\[MC]-)*' # modifiers
  289. r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
  290. r'(?!\w)',
  291. String.Char),
  292. (r'[A-Z]\w+', Name.Constant),
  293. # this is needed because ruby attributes can look
  294. # like keywords (class) or like this: ` ?!?
  295. (words(RUBY_OPERATORS, prefix=r'(\.|::)'),
  296. bygroups(Operator, Name.Operator)),
  297. (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])',
  298. bygroups(Operator, Name)),
  299. (r'[a-zA-Z_]\w*[!?]?', Name),
  300. (r'(\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|'
  301. r'!~|&&?|\|\||\.{1,3})', Operator),
  302. (r'[-+/*%=<>&!^|~]=?', Operator),
  303. (r'[(){};,/?:\\]', Punctuation),
  304. (r'\s+', Whitespace)
  305. ],
  306. 'funcname': [
  307. (r'\(', Punctuation, 'defexpr'),
  308. (r'(?:([a-zA-Z_]\w*)(\.))?' # optional scope name, like "self."
  309. r'('
  310. r'[a-zA-Z\u0080-\uffff][a-zA-Z0-9_\u0080-\uffff]*[!?=]?' # method name
  311. r'|!=|!~|=~|\*\*?|[-+!~]@?|[/%&|^]|<=>|<[<=]?|>[>=]?|===?' # or operator override
  312. r'|\[\]=?' # or element reference/assignment override
  313. r'|`' # or the undocumented backtick override
  314. r')',
  315. bygroups(Name.Class, Operator, Name.Function), '#pop'),
  316. default('#pop')
  317. ],
  318. 'classname': [
  319. (r'\(', Punctuation, 'defexpr'),
  320. (r'<<', Operator, '#pop'),
  321. (r'[A-Z_]\w*', Name.Class, '#pop'),
  322. default('#pop')
  323. ],
  324. 'defexpr': [
  325. (r'(\))(\.|::)?', bygroups(Punctuation, Operator), '#pop'),
  326. (r'\(', Operator, '#push'),
  327. include('root')
  328. ],
  329. 'in-intp': [
  330. (r'\{', String.Interpol, '#push'),
  331. (r'\}', String.Interpol, '#pop'),
  332. include('root'),
  333. ],
  334. 'string-intp': [
  335. (r'#\{', String.Interpol, 'in-intp'),
  336. (r'#@@?[a-zA-Z_]\w*', String.Interpol),
  337. (r'#\$[a-zA-Z_]\w*', String.Interpol)
  338. ],
  339. 'string-intp-escaped': [
  340. include('string-intp'),
  341. (r'\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})',
  342. String.Escape)
  343. ],
  344. 'interpolated-regex': [
  345. include('string-intp'),
  346. (r'[\\#]', String.Regex),
  347. (r'[^\\#]+', String.Regex),
  348. ],
  349. 'interpolated-string': [
  350. include('string-intp'),
  351. (r'[\\#]', String.Other),
  352. (r'[^\\#]+', String.Other),
  353. ],
  354. 'multiline-regex': [
  355. include('string-intp'),
  356. (r'\\\\', String.Regex),
  357. (r'\\/', String.Regex),
  358. (r'[\\#]', String.Regex),
  359. (r'[^\\/#]+', String.Regex),
  360. (r'/[mixounse]*', String.Regex, '#pop'),
  361. ],
  362. 'end-part': [
  363. (r'.+', Comment.Preproc, '#pop')
  364. ]
  365. }
  366. tokens.update(gen_rubystrings_rules())
  367. def analyse_text(text):
  368. return shebang_matches(text, r'ruby(1\.\d)?')
  369. class RubyConsoleLexer(Lexer):
  370. """
  371. For Ruby interactive console (**irb**) output.
  372. """
  373. name = 'Ruby irb session'
  374. aliases = ['rbcon', 'irb']
  375. mimetypes = ['text/x-ruby-shellsession']
  376. url = 'https://www.ruby-lang.org'
  377. version_added = ''
  378. _example = 'rbcon/console'
  379. _prompt_re = re.compile(r'irb\([a-zA-Z_]\w*\):\d{3}:\d+[>*"\'] '
  380. r'|>> |\?> ')
  381. def get_tokens_unprocessed(self, text):
  382. rblexer = RubyLexer(**self.options)
  383. curcode = ''
  384. insertions = []
  385. for match in line_re.finditer(text):
  386. line = match.group()
  387. m = self._prompt_re.match(line)
  388. if m is not None:
  389. end = m.end()
  390. insertions.append((len(curcode),
  391. [(0, Generic.Prompt, line[:end])]))
  392. curcode += line[end:]
  393. else:
  394. if curcode:
  395. yield from do_insertions(
  396. insertions, rblexer.get_tokens_unprocessed(curcode))
  397. curcode = ''
  398. insertions = []
  399. yield match.start(), Generic.Output, line
  400. if curcode:
  401. yield from do_insertions(
  402. insertions, rblexer.get_tokens_unprocessed(curcode))
  403. class FancyLexer(RegexLexer):
  404. """
  405. Pygments Lexer For Fancy.
  406. Fancy is a self-hosted, pure object-oriented, dynamic,
  407. class-based, concurrent general-purpose programming language
  408. running on Rubinius, the Ruby VM.
  409. """
  410. name = 'Fancy'
  411. url = 'https://github.com/bakkdoor/fancy'
  412. filenames = ['*.fy', '*.fancypack']
  413. aliases = ['fancy', 'fy']
  414. mimetypes = ['text/x-fancysrc']
  415. version_added = '1.5'
  416. tokens = {
  417. # copied from PerlLexer:
  418. 'balanced-regex': [
  419. (r'/(\\\\|\\[^\\]|[^/\\])*/[egimosx]*', String.Regex, '#pop'),
  420. (r'!(\\\\|\\[^\\]|[^!\\])*![egimosx]*', String.Regex, '#pop'),
  421. (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
  422. (r'\{(\\\\|\\[^\\]|[^}\\])*\}[egimosx]*', String.Regex, '#pop'),
  423. (r'<(\\\\|\\[^\\]|[^>\\])*>[egimosx]*', String.Regex, '#pop'),
  424. (r'\[(\\\\|\\[^\\]|[^\]\\])*\][egimosx]*', String.Regex, '#pop'),
  425. (r'\((\\\\|\\[^\\]|[^)\\])*\)[egimosx]*', String.Regex, '#pop'),
  426. (r'@(\\\\|\\[^\\]|[^@\\])*@[egimosx]*', String.Regex, '#pop'),
  427. (r'%(\\\\|\\[^\\]|[^%\\])*%[egimosx]*', String.Regex, '#pop'),
  428. (r'\$(\\\\|\\[^\\]|[^$\\])*\$[egimosx]*', String.Regex, '#pop'),
  429. ],
  430. 'root': [
  431. (r'\s+', Whitespace),
  432. # balanced delimiters (copied from PerlLexer):
  433. (r's\{(\\\\|\\[^\\]|[^}\\])*\}\s*', String.Regex, 'balanced-regex'),
  434. (r's<(\\\\|\\[^\\]|[^>\\])*>\s*', String.Regex, 'balanced-regex'),
  435. (r's\[(\\\\|\\[^\\]|[^\]\\])*\]\s*', String.Regex, 'balanced-regex'),
  436. (r's\((\\\\|\\[^\\]|[^)\\])*\)\s*', String.Regex, 'balanced-regex'),
  437. (r'm?/(\\\\|\\[^\\]|[^///\n])*/[gcimosx]*', String.Regex),
  438. (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'),
  439. # Comments
  440. (r'#(.*?)\n', Comment.Single),
  441. # Symbols
  442. (r'\'([^\'\s\[\](){}]+|\[\])', String.Symbol),
  443. # Multi-line DoubleQuotedString
  444. (r'"""(\\\\|\\[^\\]|[^\\])*?"""', String),
  445. # DoubleQuotedString
  446. (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
  447. # keywords
  448. (r'(def|class|try|catch|finally|retry|return|return_local|match|'
  449. r'case|->|=>)\b', Keyword),
  450. # constants
  451. (r'(self|super|nil|false|true)\b', Name.Constant),
  452. (r'[(){};,/?|:\\]', Punctuation),
  453. # names
  454. (words((
  455. 'Object', 'Array', 'Hash', 'Directory', 'File', 'Class', 'String',
  456. 'Number', 'Enumerable', 'FancyEnumerable', 'Block', 'TrueClass',
  457. 'NilClass', 'FalseClass', 'Tuple', 'Symbol', 'Stack', 'Set',
  458. 'FancySpec', 'Method', 'Package', 'Range'), suffix=r'\b'),
  459. Name.Builtin),
  460. # functions
  461. (r'[a-zA-Z](\w|[-+?!=*/^><%])*:', Name.Function),
  462. # operators, must be below functions
  463. (r'[-+*/~,<>=&!?%^\[\].$]+', Operator),
  464. (r'[A-Z]\w*', Name.Constant),
  465. (r'@[a-zA-Z_]\w*', Name.Variable.Instance),
  466. (r'@@[a-zA-Z_]\w*', Name.Variable.Class),
  467. ('@@?', Operator),
  468. (r'[a-zA-Z_]\w*', Name),
  469. # numbers - / checks are necessary to avoid mismarking regexes,
  470. # see comment in RubyLexer
  471. (r'(0[oO]?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?',
  472. bygroups(Number.Oct, Whitespace, Operator)),
  473. (r'(0[xX][0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?',
  474. bygroups(Number.Hex, Whitespace, Operator)),
  475. (r'(0[bB][01]+(?:_[01]+)*)(\s*)([/?])?',
  476. bygroups(Number.Bin, Whitespace, Operator)),
  477. (r'([\d]+(?:_\d+)*)(\s*)([/?])?',
  478. bygroups(Number.Integer, Whitespace, Operator)),
  479. (r'\d+([eE][+-]?[0-9]+)|\d+\.\d+([eE][+-]?[0-9]+)?', Number.Float),
  480. (r'\d+', Number.Integer)
  481. ]
  482. }