textedit.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. """
  2. pygments.lexers.textedit
  3. ~~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for languages related to text processing.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from bisect import bisect
  10. from pygments.lexer import RegexLexer, bygroups, default, include, this, using
  11. from pygments.lexers.python import PythonLexer
  12. from pygments.token import Comment, Keyword, Name, Number, Operator, \
  13. Punctuation, String, Text, Whitespace
  14. __all__ = ['AwkLexer', 'SedLexer', 'VimLexer']
  15. class AwkLexer(RegexLexer):
  16. """
  17. For Awk scripts.
  18. """
  19. name = 'Awk'
  20. aliases = ['awk', 'gawk', 'mawk', 'nawk']
  21. filenames = ['*.awk']
  22. mimetypes = ['application/x-awk']
  23. url = 'https://en.wikipedia.org/wiki/AWK'
  24. version_added = '1.5'
  25. tokens = {
  26. 'commentsandwhitespace': [
  27. (r'\s+', Text),
  28. (r'#.*$', Comment.Single)
  29. ],
  30. 'slashstartsregex': [
  31. include('commentsandwhitespace'),
  32. (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
  33. r'\B', String.Regex, '#pop'),
  34. (r'(?=/)', Text, ('#pop', 'badregex')),
  35. default('#pop')
  36. ],
  37. 'badregex': [
  38. (r'\n', Text, '#pop')
  39. ],
  40. 'root': [
  41. (r'^(?=\s|/)', Text, 'slashstartsregex'),
  42. include('commentsandwhitespace'),
  43. (r'\+\+|--|\|\||&&|in\b|\$|!?~|\?|:|'
  44. r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),
  45. (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
  46. (r'[})\].]', Punctuation),
  47. (r'(break|continue|do|while|exit|for|if|else|'
  48. r'return)\b', Keyword, 'slashstartsregex'),
  49. (r'function\b', Keyword.Declaration, 'slashstartsregex'),
  50. (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
  51. r'length|match|split|sprintf|sub|substr|tolower|toupper|close|'
  52. r'fflush|getline|next|nextfile|print|printf|strftime|systime|'
  53. r'delete|system)\b', Keyword.Reserved),
  54. (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|'
  55. r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'
  56. r'RSTART|RT|SUBSEP)\b', Name.Builtin),
  57. (r'[$a-zA-Z_]\w*', Name.Other),
  58. (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
  59. (r'0x[0-9a-fA-F]+', Number.Hex),
  60. (r'[0-9]+', Number.Integer),
  61. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  62. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  63. ]
  64. }
  65. class SedLexer(RegexLexer):
  66. """
  67. Lexer for Sed script files.
  68. """
  69. name = 'Sed'
  70. aliases = ['sed', 'gsed', 'ssed']
  71. filenames = ['*.sed', '*.[gs]sed']
  72. mimetypes = ['text/x-sed']
  73. url = 'https://en.wikipedia.org/wiki/Sed'
  74. version_added = ''
  75. flags = re.MULTILINE
  76. # Match the contents within delimiters such as /<contents>/
  77. _inside_delims = r'((?:(?:\\[^\n]|[^\\])*?\\\n)*?(?:\\.|[^\\])*?)'
  78. tokens = {
  79. 'root': [
  80. (r'\s+', Whitespace),
  81. (r'#.*$', Comment.Single),
  82. (r'[0-9]+', Number.Integer),
  83. (r'\$', Operator),
  84. (r'[{};,!]', Punctuation),
  85. (r'[dDFgGhHlnNpPqQxz=]', Keyword),
  86. (r'([berRtTvwW:])([^;\n]*)', bygroups(Keyword, String.Single)),
  87. (r'([aci])((?:.*?\\\n)*(?:.*?[^\\]$))', bygroups(Keyword, String.Double)),
  88. (r'([qQ])([0-9]*)', bygroups(Keyword, Number.Integer)),
  89. (r'(/)' + _inside_delims + r'(/)', bygroups(Punctuation, String.Regex, Punctuation)),
  90. (r'(\\(.))' + _inside_delims + r'(\2)',
  91. bygroups(Punctuation, None, String.Regex, Punctuation)),
  92. (r'(y)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)',
  93. bygroups(Keyword, Punctuation, String.Single, Punctuation, String.Single, Punctuation)),
  94. (r'(s)(.)' + _inside_delims + r'(\2)' + _inside_delims + r'(\2)((?:[gpeIiMm]|[0-9])*)',
  95. bygroups(Keyword, Punctuation, String.Regex, Punctuation, String.Single, Punctuation,
  96. Keyword))
  97. ]
  98. }
  99. class VimLexer(RegexLexer):
  100. """
  101. Lexer for VimL script files.
  102. """
  103. name = 'VimL'
  104. aliases = ['vim']
  105. filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc',
  106. '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc']
  107. mimetypes = ['text/x-vim']
  108. url = 'https://www.vim.org'
  109. version_added = '0.8'
  110. flags = re.MULTILINE
  111. _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?'
  112. tokens = {
  113. 'root': [
  114. (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)',
  115. bygroups(using(this), Keyword, Text, Operator, Text, Text,
  116. using(PythonLexer), Text)),
  117. (r'^([ \t:]*)(' + _python + r')([ \t])(.*)',
  118. bygroups(using(this), Keyword, Text, using(PythonLexer))),
  119. (r'^\s*".*', Comment),
  120. (r'[ \t]+', Text),
  121. # TODO: regexes can have other delims
  122. (r'/[^/\\\n]*(?:\\[\s\S][^/\\\n]*)*/', String.Regex),
  123. (r'"[^"\\\n]*(?:\\[\s\S][^"\\\n]*)*"', String.Double),
  124. (r"'[^\n']*(?:''[^\n']*)*'", String.Single),
  125. # Who decided that doublequote was a good comment character??
  126. (r'(?<=\s)"[^\-:.%#=*].*', Comment),
  127. (r'-?\d+', Number),
  128. (r'#[0-9a-f]{6}', Number.Hex),
  129. (r'^:', Punctuation),
  130. (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.
  131. (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
  132. Keyword),
  133. (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
  134. (r'\b\w+\b', Name.Other), # These are postprocessed below
  135. (r'.', Text),
  136. ],
  137. }
  138. def __init__(self, **options):
  139. from pygments.lexers._vim_builtins import auto, command, option
  140. self._cmd = command
  141. self._opt = option
  142. self._aut = auto
  143. RegexLexer.__init__(self, **options)
  144. def is_in(self, w, mapping):
  145. r"""
  146. It's kind of difficult to decide if something might be a keyword
  147. in VimL because it allows you to abbreviate them. In fact,
  148. 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are
  149. valid ways to call it so rather than making really awful regexps
  150. like::
  151. \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
  152. we match `\b\w+\b` and then call is_in() on those tokens. See
  153. `scripts/get_vimkw.py` for how the lists are extracted.
  154. """
  155. p = bisect(mapping, (w,))
  156. if p > 0:
  157. if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
  158. mapping[p-1][1][:len(w)] == w:
  159. return True
  160. if p < len(mapping):
  161. return mapping[p][0] == w[:len(mapping[p][0])] and \
  162. mapping[p][1][:len(w)] == w
  163. return False
  164. def get_tokens_unprocessed(self, text):
  165. # TODO: builtins are only subsequent tokens on lines
  166. # and 'keywords' only happen at the beginning except
  167. # for :au ones
  168. for index, token, value in \
  169. RegexLexer.get_tokens_unprocessed(self, text):
  170. if token is Name.Other:
  171. if self.is_in(value, self._cmd):
  172. yield index, Keyword, value
  173. elif self.is_in(value, self._opt) or \
  174. self.is_in(value, self._aut):
  175. yield index, Name.Builtin, value
  176. else:
  177. yield index, Text, value
  178. else:
  179. yield index, token, value