textedit.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.textedit
  4. ~~~~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for languages related to text processing.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from bisect import bisect
  11. from pygments.lexer import RegexLexer, include, default, bygroups, using, this
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  13. Number, Punctuation
  14. from pygments.lexers.python import PythonLexer
  15. __all__ = ['AwkLexer', 'VimLexer']
  16. class AwkLexer(RegexLexer):
  17. """
  18. For Awk scripts.
  19. .. versionadded:: 1.5
  20. """
  21. name = 'Awk'
  22. aliases = ['awk', 'gawk', 'mawk', 'nawk']
  23. filenames = ['*.awk']
  24. mimetypes = ['application/x-awk']
  25. tokens = {
  26. 'commentsandwhitespace': [
  27. (r'\s+', Text),
  28. (r'#.*$', Comment.Single)
  29. ],
  30. 'slashstartsregex': [
  31. include('commentsandwhitespace'),
  32. (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
  33. r'\B', String.Regex, '#pop'),
  34. (r'(?=/)', Text, ('#pop', 'badregex')),
  35. default('#pop')
  36. ],
  37. 'badregex': [
  38. (r'\n', Text, '#pop')
  39. ],
  40. 'root': [
  41. (r'^(?=\s|/)', Text, 'slashstartsregex'),
  42. include('commentsandwhitespace'),
  43. (r'\+\+|--|\|\||&&|in\b|\$|!?~|'
  44. r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),
  45. (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
  46. (r'[})\].]', Punctuation),
  47. (r'(break|continue|do|while|exit|for|if|else|'
  48. r'return)\b', Keyword, 'slashstartsregex'),
  49. (r'function\b', Keyword.Declaration, 'slashstartsregex'),
  50. (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
  51. r'length|match|split|sprintf|sub|substr|tolower|toupper|close|'
  52. r'fflush|getline|next|nextfile|print|printf|strftime|systime|'
  53. r'delete|system)\b', Keyword.Reserved),
  54. (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|'
  55. r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'
  56. r'RSTART|RT|SUBSEP)\b', Name.Builtin),
  57. (r'[$a-zA-Z_]\w*', Name.Other),
  58. (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
  59. (r'0x[0-9a-fA-F]+', Number.Hex),
  60. (r'[0-9]+', Number.Integer),
  61. (r'"(\\\\|\\"|[^"])*"', String.Double),
  62. (r"'(\\\\|\\'|[^'])*'", String.Single),
  63. ]
  64. }
  65. class VimLexer(RegexLexer):
  66. """
  67. Lexer for VimL script files.
  68. .. versionadded:: 0.8
  69. """
  70. name = 'VimL'
  71. aliases = ['vim']
  72. filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc',
  73. '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc']
  74. mimetypes = ['text/x-vim']
  75. flags = re.MULTILINE
  76. _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?'
  77. tokens = {
  78. 'root': [
  79. (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)',
  80. bygroups(using(this), Keyword, Text, Operator, Text, Text,
  81. using(PythonLexer), Text)),
  82. (r'^([ \t:]*)(' + _python + r')([ \t])(.*)',
  83. bygroups(using(this), Keyword, Text, using(PythonLexer))),
  84. (r'^\s*".*', Comment),
  85. (r'[ \t]+', Text),
  86. # TODO: regexes can have other delims
  87. (r'/[^/\\\n]*(?:\\[\s\S][^/\\\n]*)*/', String.Regex),
  88. (r'"[^"\\\n]*(?:\\[\s\S][^"\\\n]*)*"', String.Double),
  89. (r"'[^\n']*(?:''[^\n']*)*'", String.Single),
  90. # Who decided that doublequote was a good comment character??
  91. (r'(?<=\s)"[^\-:.%#=*].*', Comment),
  92. (r'-?\d+', Number),
  93. (r'#[0-9a-f]{6}', Number.Hex),
  94. (r'^:', Punctuation),
  95. (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.
  96. (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
  97. Keyword),
  98. (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
  99. (r'\b\w+\b', Name.Other), # These are postprocessed below
  100. (r'.', Text),
  101. ],
  102. }
  103. def __init__(self, **options):
  104. from pygments.lexers._vim_builtins import command, option, auto
  105. self._cmd = command
  106. self._opt = option
  107. self._aut = auto
  108. RegexLexer.__init__(self, **options)
  109. def is_in(self, w, mapping):
  110. r"""
  111. It's kind of difficult to decide if something might be a keyword
  112. in VimL because it allows you to abbreviate them. In fact,
  113. 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are
  114. valid ways to call it so rather than making really awful regexps
  115. like::
  116. \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
  117. we match `\b\w+\b` and then call is_in() on those tokens. See
  118. `scripts/get_vimkw.py` for how the lists are extracted.
  119. """
  120. p = bisect(mapping, (w,))
  121. if p > 0:
  122. if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
  123. mapping[p-1][1][:len(w)] == w:
  124. return True
  125. if p < len(mapping):
  126. return mapping[p][0] == w[:len(mapping[p][0])] and \
  127. mapping[p][1][:len(w)] == w
  128. return False
  129. def get_tokens_unprocessed(self, text):
  130. # TODO: builtins are only subsequent tokens on lines
  131. # and 'keywords' only happen at the beginning except
  132. # for :au ones
  133. for index, token, value in \
  134. RegexLexer.get_tokens_unprocessed(self, text):
  135. if token is Name.Other:
  136. if self.is_in(value, self._cmd):
  137. yield index, Keyword, value
  138. elif self.is_in(value, self._opt) or \
  139. self.is_in(value, self._aut):
  140. yield index, Name.Builtin, value
  141. else:
  142. yield index, Text, value
  143. else:
  144. yield index, token, value