stata.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. """
  2. pygments.lexers.stata
  3. ~~~~~~~~~~~~~~~~~~~~~
  4. Lexer for Stata
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, default, include, words
  10. from pygments.token import Comment, Keyword, Name, Number, \
  11. String, Text, Operator
  12. from pygments.lexers._stata_builtins import builtins_base, builtins_functions
  13. __all__ = ['StataLexer']
  14. class StataLexer(RegexLexer):
  15. """
  16. For Stata do files.
  17. .. versionadded:: 2.2
  18. """
  19. # Syntax based on
  20. # - http://fmwww.bc.edu/RePEc/bocode/s/synlightlist.ado
  21. # - https://github.com/isagalaev/highlight.js/blob/master/src/languages/stata.js
  22. # - https://github.com/jpitblado/vim-stata/blob/master/syntax/stata.vim
  23. name = 'Stata'
  24. url = 'http://www.stata.com/'
  25. aliases = ['stata', 'do']
  26. filenames = ['*.do', '*.ado']
  27. mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata']
  28. flags = re.MULTILINE | re.DOTALL
  29. tokens = {
  30. 'root': [
  31. include('comments'),
  32. include('strings'),
  33. include('macros'),
  34. include('numbers'),
  35. include('keywords'),
  36. include('operators'),
  37. include('format'),
  38. (r'.', Text),
  39. ],
  40. # Comments are a complicated beast in Stata because they can be
  41. # nested and there are a few corner cases with that. See:
  42. # - github.com/kylebarron/language-stata/issues/90
  43. # - statalist.org/forums/forum/general-stata-discussion/general/1448244
  44. 'comments': [
  45. (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'),
  46. (r'^\s*\*', Comment.Single, 'comments-star'),
  47. (r'/\*', Comment.Multiline, 'comments-block'),
  48. (r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash')
  49. ],
  50. 'comments-block': [
  51. (r'/\*', Comment.Multiline, '#push'),
  52. # this ends and restarts a comment block. but need to catch this so
  53. # that it doesn\'t start _another_ level of comment blocks
  54. (r'\*/\*', Comment.Multiline),
  55. (r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'),
  56. # Match anything else as a character inside the comment
  57. (r'.', Comment.Multiline),
  58. ],
  59. 'comments-star': [
  60. (r'///.*?\n', Comment.Single,
  61. ('#pop', 'comments-triple-slash')),
  62. (r'(^//|(?<=\s)//)(?!/)', Comment.Single,
  63. ('#pop', 'comments-double-slash')),
  64. (r'/\*', Comment.Multiline, 'comments-block'),
  65. (r'.(?=\n)', Comment.Single, '#pop'),
  66. (r'.', Comment.Single),
  67. ],
  68. 'comments-triple-slash': [
  69. (r'\n', Comment.Special, '#pop'),
  70. # A // breaks out of a comment for the rest of the line
  71. (r'//.*?(?=\n)', Comment.Single, '#pop'),
  72. (r'.', Comment.Special),
  73. ],
  74. 'comments-double-slash': [
  75. (r'\n', Text, '#pop'),
  76. (r'.', Comment.Single),
  77. ],
  78. # `"compound string"' and regular "string"; note the former are
  79. # nested.
  80. 'strings': [
  81. (r'`"', String, 'string-compound'),
  82. (r'(?<!`)"', String, 'string-regular'),
  83. ],
  84. 'string-compound': [
  85. (r'`"', String, '#push'),
  86. (r'"\'', String, '#pop'),
  87. (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
  88. include('macros'),
  89. (r'.', String)
  90. ],
  91. 'string-regular': [
  92. (r'(")(?!\')|(?=\n)', String, '#pop'),
  93. (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
  94. include('macros'),
  95. (r'.', String)
  96. ],
  97. # A local is usually
  98. # `\w{0,31}'
  99. # `:extended macro'
  100. # `=expression'
  101. # `[rsen](results)'
  102. # `(++--)scalar(++--)'
  103. #
  104. # However, there are all sorts of weird rules wrt edge
  105. # cases. Instead of writing 27 exceptions, anything inside
  106. # `' is a local.
  107. #
  108. # A global is more restricted, so we do follow rules. Note only
  109. # locals explicitly enclosed ${} can be nested.
  110. 'macros': [
  111. (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'),
  112. (r'\$', Name.Variable.Global, 'macro-global-name'),
  113. (r'`', Name.Variable, 'macro-local'),
  114. ],
  115. 'macro-local': [
  116. (r'`', Name.Variable, '#push'),
  117. (r"'", Name.Variable, '#pop'),
  118. (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'),
  119. (r'\$', Name.Variable.Global, 'macro-global-name'),
  120. (r'.', Name.Variable), # fallback
  121. ],
  122. 'macro-global-nested': [
  123. (r'\$(\{|(?=[$`]))', Name.Variable.Global, '#push'),
  124. (r'\}', Name.Variable.Global, '#pop'),
  125. (r'\$', Name.Variable.Global, 'macro-global-name'),
  126. (r'`', Name.Variable, 'macro-local'),
  127. (r'\w', Name.Variable.Global), # fallback
  128. default('#pop'),
  129. ],
  130. 'macro-global-name': [
  131. (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'),
  132. (r'\$', Name.Variable.Global, 'macro-global-name', '#pop'),
  133. (r'`', Name.Variable, 'macro-local', '#pop'),
  134. (r'\w{1,32}', Name.Variable.Global, '#pop'),
  135. ],
  136. # Built in functions and statements
  137. 'keywords': [
  138. (words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'),
  139. Name.Function),
  140. (words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'),
  141. Keyword),
  142. ],
  143. # http://www.stata.com/help.cgi?operators
  144. 'operators': [
  145. (r'-|==|<=|>=|<|>|&|!=', Operator),
  146. (r'\*|\+|\^|/|!|~|==|~=', Operator)
  147. ],
  148. # Stata numbers
  149. 'numbers': [
  150. # decimal number
  151. (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[i]?\b',
  152. Number),
  153. ],
  154. # Stata formats
  155. 'format': [
  156. (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Other),
  157. (r'%(21x|16H|16L|8H|8L)', Name.Other),
  158. (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg)\S{0,32}', Name.Other),
  159. (r'%[-~]?\d{1,4}s', Name.Other),
  160. ]
  161. }