r.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. """
  2. pygments.lexers.r
  3. ~~~~~~~~~~~~~~~~~
  4. Lexers for the R/S languages.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import Lexer, RegexLexer, include, do_insertions
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  11. Number, Punctuation, Generic, Whitespace
  12. __all__ = ['RConsoleLexer', 'SLexer', 'RdLexer']
  13. line_re = re.compile('.*?\n')
  14. class RConsoleLexer(Lexer):
  15. """
  16. For R console transcripts or R CMD BATCH output files.
  17. """
  18. name = 'RConsole'
  19. aliases = ['rconsole', 'rout']
  20. filenames = ['*.Rout']
  21. url = 'https://www.r-project.org'
  22. version_added = ''
  23. def get_tokens_unprocessed(self, text):
  24. slexer = SLexer(**self.options)
  25. current_code_block = ''
  26. insertions = []
  27. for match in line_re.finditer(text):
  28. line = match.group()
  29. if line.startswith('>') or line.startswith('+'):
  30. # Colorize the prompt as such,
  31. # then put rest of line into current_code_block
  32. insertions.append((len(current_code_block),
  33. [(0, Generic.Prompt, line[:2])]))
  34. current_code_block += line[2:]
  35. else:
  36. # We have reached a non-prompt line!
  37. # If we have stored prompt lines, need to process them first.
  38. if current_code_block:
  39. # Weave together the prompts and highlight code.
  40. yield from do_insertions(
  41. insertions, slexer.get_tokens_unprocessed(current_code_block))
  42. # Reset vars for next code block.
  43. current_code_block = ''
  44. insertions = []
  45. # Now process the actual line itself, this is output from R.
  46. yield match.start(), Generic.Output, line
  47. # If we happen to end on a code block with nothing after it, need to
  48. # process the last code block. This is neither elegant nor DRY so
  49. # should be changed.
  50. if current_code_block:
  51. yield from do_insertions(
  52. insertions, slexer.get_tokens_unprocessed(current_code_block))
  53. class SLexer(RegexLexer):
  54. """
  55. For S, S-plus, and R source code.
  56. """
  57. name = 'S'
  58. aliases = ['splus', 's', 'r']
  59. filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron']
  60. mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r',
  61. 'text/x-R', 'text/x-r-history', 'text/x-r-profile']
  62. url = 'https://www.r-project.org'
  63. version_added = '0.10'
  64. valid_name = r'`[^`\\]*(?:\\.[^`\\]*)*`|(?:[a-zA-Z]|\.[A-Za-z_.])[\w.]*|\.'
  65. tokens = {
  66. 'comments': [
  67. (r'#.*$', Comment.Single),
  68. ],
  69. 'valid_name': [
  70. (valid_name, Name),
  71. ],
  72. 'punctuation': [
  73. (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation),
  74. ],
  75. 'keywords': [
  76. (r'(if|else|for|while|repeat|in|next|break|return|switch|function)'
  77. r'(?![\w.])',
  78. Keyword.Reserved),
  79. ],
  80. 'operators': [
  81. (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator),
  82. (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator),
  83. ],
  84. 'builtin_symbols': [
  85. (r'(NULL|NA(_(integer|real|complex|character)_)?|'
  86. r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))'
  87. r'(?![\w.])',
  88. Keyword.Constant),
  89. (r'(T|F)\b', Name.Builtin.Pseudo),
  90. ],
  91. 'numbers': [
  92. # hex number
  93. (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex),
  94. # decimal number
  95. (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?',
  96. Number),
  97. ],
  98. 'statements': [
  99. include('comments'),
  100. # whitespaces
  101. (r'\s+', Whitespace),
  102. (r'\'', String, 'string_squote'),
  103. (r'\"', String, 'string_dquote'),
  104. include('builtin_symbols'),
  105. include('valid_name'),
  106. include('numbers'),
  107. include('keywords'),
  108. include('punctuation'),
  109. include('operators'),
  110. ],
  111. 'root': [
  112. # calls:
  113. (rf'({valid_name})\s*(?=\()', Name.Function),
  114. include('statements'),
  115. # blocks:
  116. (r'\{|\}', Punctuation),
  117. # (r'\{', Punctuation, 'block'),
  118. (r'.', Text),
  119. ],
  120. # 'block': [
  121. # include('statements'),
  122. # ('\{', Punctuation, '#push'),
  123. # ('\}', Punctuation, '#pop')
  124. # ],
  125. 'string_squote': [
  126. (r'([^\'\\]|\\.)*\'', String, '#pop'),
  127. ],
  128. 'string_dquote': [
  129. (r'([^"\\]|\\.)*"', String, '#pop'),
  130. ],
  131. }
  132. def analyse_text(text):
  133. if re.search(r'[a-z0-9_\])\s]<-(?!-)', text):
  134. return 0.11
  135. class RdLexer(RegexLexer):
  136. """
  137. Pygments Lexer for R documentation (Rd) files
  138. This is a very minimal implementation, highlighting little more
  139. than the macros. A description of Rd syntax is found in `Writing R
  140. Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_
  141. and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_.
  142. """
  143. name = 'Rd'
  144. aliases = ['rd']
  145. filenames = ['*.Rd']
  146. mimetypes = ['text/x-r-doc']
  147. url = 'http://cran.r-project.org/doc/manuals/R-exts.html'
  148. version_added = '1.6'
  149. # To account for verbatim / LaTeX-like / and R-like areas
  150. # would require parsing.
  151. tokens = {
  152. 'root': [
  153. # catch escaped brackets and percent sign
  154. (r'\\[\\{}%]', String.Escape),
  155. # comments
  156. (r'%.*$', Comment),
  157. # special macros with no arguments
  158. (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant),
  159. # macros
  160. (r'\\[a-zA-Z]+\b', Keyword),
  161. # special preprocessor macros
  162. (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc),
  163. # non-escaped brackets
  164. (r'[{}]', Name.Builtin),
  165. # everything else
  166. (r'[^\\%\n{}]+', Text),
  167. (r'.', Text),
  168. ]
  169. }