special.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. """
  2. pygments.lexers.special
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Special lexers.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import ast
  9. from pygments.lexer import Lexer, line_re
  10. from pygments.token import Token, Error, Text, Generic
  11. from pygments.util import get_choice_opt
  12. __all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer']
  13. class TextLexer(Lexer):
  14. """
  15. "Null" lexer, doesn't highlight anything.
  16. """
  17. name = 'Text only'
  18. aliases = ['text']
  19. filenames = ['*.txt']
  20. mimetypes = ['text/plain']
  21. url = ""
  22. version_added = ''
  23. priority = 0.01
  24. def get_tokens_unprocessed(self, text):
  25. yield 0, Text, text
  26. def analyse_text(text):
  27. return TextLexer.priority
  28. class OutputLexer(Lexer):
  29. """
  30. Simple lexer that highlights everything as ``Token.Generic.Output``.
  31. """
  32. name = 'Text output'
  33. aliases = ['output']
  34. url = ""
  35. version_added = '2.10'
  36. def get_tokens_unprocessed(self, text):
  37. yield 0, Generic.Output, text
  38. _ttype_cache = {}
  39. class RawTokenLexer(Lexer):
  40. """
  41. Recreate a token stream formatted with the `RawTokenFormatter`.
  42. Additional options accepted:
  43. `compress`
  44. If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
  45. the given compression algorithm before lexing (default: ``""``).
  46. """
  47. name = 'Raw token data'
  48. aliases = []
  49. filenames = []
  50. mimetypes = ['application/x-pygments-tokens']
  51. url = 'https://pygments.org/docs/formatters/#RawTokenFormatter'
  52. version_added = ''
  53. def __init__(self, **options):
  54. self.compress = get_choice_opt(options, 'compress',
  55. ['', 'none', 'gz', 'bz2'], '')
  56. Lexer.__init__(self, **options)
  57. def get_tokens(self, text):
  58. if self.compress:
  59. if isinstance(text, str):
  60. text = text.encode('latin1')
  61. try:
  62. if self.compress == 'gz':
  63. import gzip
  64. text = gzip.decompress(text)
  65. elif self.compress == 'bz2':
  66. import bz2
  67. text = bz2.decompress(text)
  68. except OSError:
  69. yield Error, text.decode('latin1')
  70. if isinstance(text, bytes):
  71. text = text.decode('latin1')
  72. # do not call Lexer.get_tokens() because stripping is not optional.
  73. text = text.strip('\n') + '\n'
  74. for i, t, v in self.get_tokens_unprocessed(text):
  75. yield t, v
  76. def get_tokens_unprocessed(self, text):
  77. length = 0
  78. for match in line_re.finditer(text):
  79. try:
  80. ttypestr, val = match.group().rstrip().split('\t', 1)
  81. ttype = _ttype_cache.get(ttypestr)
  82. if not ttype:
  83. ttype = Token
  84. ttypes = ttypestr.split('.')[1:]
  85. for ttype_ in ttypes:
  86. if not ttype_ or not ttype_[0].isupper():
  87. raise ValueError('malformed token name')
  88. ttype = getattr(ttype, ttype_)
  89. _ttype_cache[ttypestr] = ttype
  90. val = ast.literal_eval(val)
  91. if not isinstance(val, str):
  92. raise ValueError('expected str')
  93. except (SyntaxError, ValueError):
  94. val = match.group()
  95. ttype = Error
  96. yield length, ttype, val
  97. length += len(val)