r.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.r
  4. ~~~~~~~~~~~~~~~~~
  5. Lexers for the R/S languages.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import Lexer, RegexLexer, include, do_insertions, bygroups
  11. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  12. Number, Punctuation, Generic
  13. __all__ = ['RConsoleLexer', 'SLexer', 'RdLexer']
  14. line_re = re.compile('.*?\n')
  15. class RConsoleLexer(Lexer):
  16. """
  17. For R console transcripts or R CMD BATCH output files.
  18. """
  19. name = 'RConsole'
  20. aliases = ['rconsole', 'rout']
  21. filenames = ['*.Rout']
  22. def get_tokens_unprocessed(self, text):
  23. slexer = SLexer(**self.options)
  24. current_code_block = ''
  25. insertions = []
  26. for match in line_re.finditer(text):
  27. line = match.group()
  28. if line.startswith('>') or line.startswith('+'):
  29. # Colorize the prompt as such,
  30. # then put rest of line into current_code_block
  31. insertions.append((len(current_code_block),
  32. [(0, Generic.Prompt, line[:2])]))
  33. current_code_block += line[2:]
  34. else:
  35. # We have reached a non-prompt line!
  36. # If we have stored prompt lines, need to process them first.
  37. if current_code_block:
  38. # Weave together the prompts and highlight code.
  39. for item in do_insertions(
  40. insertions, slexer.get_tokens_unprocessed(current_code_block)):
  41. yield item
  42. # Reset vars for next code block.
  43. current_code_block = ''
  44. insertions = []
  45. # Now process the actual line itself, this is output from R.
  46. yield match.start(), Generic.Output, line
  47. # If we happen to end on a code block with nothing after it, need to
  48. # process the last code block. This is neither elegant nor DRY so
  49. # should be changed.
  50. if current_code_block:
  51. for item in do_insertions(
  52. insertions, slexer.get_tokens_unprocessed(current_code_block)):
  53. yield item
  54. class SLexer(RegexLexer):
  55. """
  56. For S, S-plus, and R source code.
  57. .. versionadded:: 0.10
  58. """
  59. name = 'S'
  60. aliases = ['splus', 's', 'r']
  61. filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron']
  62. mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r',
  63. 'text/x-R', 'text/x-r-history', 'text/x-r-profile']
  64. valid_name = r'(?:`[^`\\]*(?:\\.[^`\\]*)*`)|(?:(?:[a-zA-z]|[_.][^0-9])[\w_.]*)'
  65. tokens = {
  66. 'comments': [
  67. (r'#.*$', Comment.Single),
  68. ],
  69. 'valid_name': [
  70. (valid_name, Name),
  71. ],
  72. 'punctuation': [
  73. (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation),
  74. ],
  75. 'keywords': [
  76. (r'(if|else|for|while|repeat|in|next|break|return|switch|function)'
  77. r'(?![\w.])',
  78. Keyword.Reserved),
  79. ],
  80. 'operators': [
  81. (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator),
  82. (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator),
  83. ],
  84. 'builtin_symbols': [
  85. (r'(NULL|NA(_(integer|real|complex|character)_)?|'
  86. r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))'
  87. r'(?![\w.])',
  88. Keyword.Constant),
  89. (r'(T|F)\b', Name.Builtin.Pseudo),
  90. ],
  91. 'numbers': [
  92. # hex number
  93. (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex),
  94. # decimal number
  95. (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?',
  96. Number),
  97. ],
  98. 'statements': [
  99. include('comments'),
  100. # whitespaces
  101. (r'\s+', Text),
  102. (r'\'', String, 'string_squote'),
  103. (r'\"', String, 'string_dquote'),
  104. include('builtin_symbols'),
  105. include('valid_name'),
  106. include('numbers'),
  107. include('keywords'),
  108. include('punctuation'),
  109. include('operators'),
  110. ],
  111. 'root': [
  112. # calls:
  113. (r'(%s)\s*(?=\()' % valid_name, Name.Function),
  114. include('statements'),
  115. # blocks:
  116. (r'\{|\}', Punctuation),
  117. # (r'\{', Punctuation, 'block'),
  118. (r'.', Text),
  119. ],
  120. # 'block': [
  121. # include('statements'),
  122. # ('\{', Punctuation, '#push'),
  123. # ('\}', Punctuation, '#pop')
  124. # ],
  125. 'string_squote': [
  126. (r'([^\'\\]|\\.)*\'', String, '#pop'),
  127. ],
  128. 'string_dquote': [
  129. (r'([^"\\]|\\.)*"', String, '#pop'),
  130. ],
  131. }
  132. def analyse_text(text):
  133. if re.search(r'[a-z0-9_\])\s]<-(?!-)', text):
  134. return 0.11
  135. class RdLexer(RegexLexer):
  136. """
  137. Pygments Lexer for R documentation (Rd) files
  138. This is a very minimal implementation, highlighting little more
  139. than the macros. A description of Rd syntax is found in `Writing R
  140. Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_
  141. and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_.
  142. .. versionadded:: 1.6
  143. """
  144. name = 'Rd'
  145. aliases = ['rd']
  146. filenames = ['*.Rd']
  147. mimetypes = ['text/x-r-doc']
  148. # To account for verbatim / LaTeX-like / and R-like areas
  149. # would require parsing.
  150. tokens = {
  151. 'root': [
  152. # catch escaped brackets and percent sign
  153. (r'\\[\\{}%]', String.Escape),
  154. # comments
  155. (r'%.*$', Comment),
  156. # special macros with no arguments
  157. (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant),
  158. # macros
  159. (r'\\[a-zA-Z]+\b', Keyword),
  160. # special preprocessor macros
  161. (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc),
  162. # non-escaped brackets
  163. (r'[{}]', Name.Builtin),
  164. # everything else
  165. (r'[^\\%\n{}]+', Text),
  166. (r'.', Text),
  167. ]
  168. }