textedit.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. """
  2. pygments.lexers.textedit
  3. ~~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for languages related to text processing.
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from bisect import bisect
  10. from pygments.lexer import RegexLexer, bygroups, default, include, this, using
  11. from pygments.lexers.python import PythonLexer
  12. from pygments.token import Comment, Keyword, Name, Number, Operator, \
  13. Punctuation, String, Text, Whitespace
  14. __all__ = ['AwkLexer', 'SedLexer', 'VimLexer']
  15. class AwkLexer(RegexLexer):
  16. """
  17. For Awk scripts.
  18. .. versionadded:: 1.5
  19. """
  20. name = 'Awk'
  21. aliases = ['awk', 'gawk', 'mawk', 'nawk']
  22. filenames = ['*.awk']
  23. mimetypes = ['application/x-awk']
  24. tokens = {
  25. 'commentsandwhitespace': [
  26. (r'\s+', Text),
  27. (r'#.*$', Comment.Single)
  28. ],
  29. 'slashstartsregex': [
  30. include('commentsandwhitespace'),
  31. (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
  32. r'\B', String.Regex, '#pop'),
  33. (r'(?=/)', Text, ('#pop', 'badregex')),
  34. default('#pop')
  35. ],
  36. 'badregex': [
  37. (r'\n', Text, '#pop')
  38. ],
  39. 'root': [
  40. (r'^(?=\s|/)', Text, 'slashstartsregex'),
  41. include('commentsandwhitespace'),
  42. (r'\+\+|--|\|\||&&|in\b|\$|!?~|'
  43. r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),
  44. (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
  45. (r'[})\].]', Punctuation),
  46. (r'(break|continue|do|while|exit|for|if|else|'
  47. r'return)\b', Keyword, 'slashstartsregex'),
  48. (r'function\b', Keyword.Declaration, 'slashstartsregex'),
  49. (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
  50. r'length|match|split|sprintf|sub|substr|tolower|toupper|close|'
  51. r'fflush|getline|next|nextfile|print|printf|strftime|systime|'
  52. r'delete|system)\b', Keyword.Reserved),
  53. (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|'
  54. r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'
  55. r'RSTART|RT|SUBSEP)\b', Name.Builtin),
  56. (r'[$a-zA-Z_]\w*', Name.Other),
  57. (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
  58. (r'0x[0-9a-fA-F]+', Number.Hex),
  59. (r'[0-9]+', Number.Integer),
  60. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  61. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  62. ]
  63. }
  64. class SedLexer(RegexLexer):
  65. """
  66. Lexer for Sed script files.
  67. """
  68. name = 'Sed'
  69. aliases = ['sed', 'gsed', 'ssed']
  70. filenames = ['*.sed', '*.[gs]sed']
  71. mimetypes = ['text/x-sed']
  72. flags = re.MULTILINE
  73. # Match the contents within delimiters such as /<contents>/
  74. _inside_delims = r'((?:(?:\\[^\n]|[^\\])*?\\\n)*?(?:\\.|[^\\])*?)'
  75. tokens = {
  76. 'root': [
  77. (r'\s+', Whitespace),
  78. (r'#.*$', Comment.Single),
  79. (r'[0-9]+', Number.Integer),
  80. (r'\$', Operator),
  81. (r'[{};,!]', Punctuation),
  82. (r'[dDFgGhHlnNpPqQxz=]', Keyword),
  83. (r'([berRtTvwW:])([^;\n]*)', bygroups(Keyword, String.Single)),
  84. (r'([aci])((?:.*?\\\n)*(?:.*?[^\\]$))', bygroups(Keyword, String.Double)),
  85. (r'([qQ])([0-9]*)', bygroups(Keyword, Number.Integer)),
  86. (r'(/)' + _inside_delims + r'(/)', bygroups(Punctuation, String.Regex, Punctuation)),
  87. (r'(\\(.))' + _inside_delims + r'(\2)',
  88. bygroups(Punctuation, None, String.Regex, Punctuation)),
  89. (r'(y)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)',
  90. bygroups(Keyword, Punctuation, String.Single, Punctuation, String.Single, Punctuation)),
  91. (r'(s)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)((?:[gpeIiMm]|[0-9])*)',
  92. bygroups(Keyword, Punctuation, String.Regex, Punctuation, String.Single, Punctuation,
  93. Keyword))
  94. ]
  95. }
  96. class VimLexer(RegexLexer):
  97. """
  98. Lexer for VimL script files.
  99. .. versionadded:: 0.8
  100. """
  101. name = 'VimL'
  102. aliases = ['vim']
  103. filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc',
  104. '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc']
  105. mimetypes = ['text/x-vim']
  106. flags = re.MULTILINE
  107. _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?'
  108. tokens = {
  109. 'root': [
  110. (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)',
  111. bygroups(using(this), Keyword, Text, Operator, Text, Text,
  112. using(PythonLexer), Text)),
  113. (r'^([ \t:]*)(' + _python + r')([ \t])(.*)',
  114. bygroups(using(this), Keyword, Text, using(PythonLexer))),
  115. (r'^\s*".*', Comment),
  116. (r'[ \t]+', Text),
  117. # TODO: regexes can have other delims
  118. (r'/[^/\\\n]*(?:\\[\s\S][^/\\\n]*)*/', String.Regex),
  119. (r'"[^"\\\n]*(?:\\[\s\S][^"\\\n]*)*"', String.Double),
  120. (r"'[^\n']*(?:''[^\n']*)*'", String.Single),
  121. # Who decided that doublequote was a good comment character??
  122. (r'(?<=\s)"[^\-:.%#=*].*', Comment),
  123. (r'-?\d+', Number),
  124. (r'#[0-9a-f]{6}', Number.Hex),
  125. (r'^:', Punctuation),
  126. (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.
  127. (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
  128. Keyword),
  129. (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
  130. (r'\b\w+\b', Name.Other), # These are postprocessed below
  131. (r'.', Text),
  132. ],
  133. }
  134. def __init__(self, **options):
  135. from pygments.lexers._vim_builtins import auto, command, option
  136. self._cmd = command
  137. self._opt = option
  138. self._aut = auto
  139. RegexLexer.__init__(self, **options)
  140. def is_in(self, w, mapping):
  141. r"""
  142. It's kind of difficult to decide if something might be a keyword
  143. in VimL because it allows you to abbreviate them. In fact,
  144. 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are
  145. valid ways to call it so rather than making really awful regexps
  146. like::
  147. \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
  148. we match `\b\w+\b` and then call is_in() on those tokens. See
  149. `scripts/get_vimkw.py` for how the lists are extracted.
  150. """
  151. p = bisect(mapping, (w,))
  152. if p > 0:
  153. if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
  154. mapping[p-1][1][:len(w)] == w:
  155. return True
  156. if p < len(mapping):
  157. return mapping[p][0] == w[:len(mapping[p][0])] and \
  158. mapping[p][1][:len(w)] == w
  159. return False
  160. def get_tokens_unprocessed(self, text):
  161. # TODO: builtins are only subsequent tokens on lines
  162. # and 'keywords' only happen at the beginning except
  163. # for :au ones
  164. for index, token, value in \
  165. RegexLexer.get_tokens_unprocessed(self, text):
  166. if token is Name.Other:
  167. if self.is_in(value, self._cmd):
  168. yield index, Keyword, value
  169. elif self.is_in(value, self._opt) or \
  170. self.is_in(value, self._aut):
  171. yield index, Name.Builtin, value
  172. else:
  173. yield index, Text, value
  174. else:
  175. yield index, token, value