other.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. """
  2. pygments.formatters.other
  3. ~~~~~~~~~~~~~~~~~~~~~~~~~
  4. Other formatters: NullFormatter, RawTokenFormatter.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from pygments.formatter import Formatter
  9. from pygments.util import get_choice_opt
  10. from pygments.token import Token
  11. from pygments.console import colorize
  12. __all__ = ['NullFormatter', 'RawTokenFormatter', 'TestcaseFormatter']
  13. class NullFormatter(Formatter):
  14. """
  15. Output the text unchanged without any formatting.
  16. """
  17. name = 'Text only'
  18. aliases = ['text', 'null']
  19. filenames = ['*.txt']
  20. def format(self, tokensource, outfile):
  21. enc = self.encoding
  22. for ttype, value in tokensource:
  23. if enc:
  24. outfile.write(value.encode(enc))
  25. else:
  26. outfile.write(value)
  27. class RawTokenFormatter(Formatter):
  28. r"""
  29. Format tokens as a raw representation for storing token streams.
  30. The format is ``tokentype<TAB>repr(tokenstring)\n``. The output can later
  31. be converted to a token stream with the `RawTokenLexer`, described in the
  32. :doc:`lexer list <lexers>`.
  33. Only two options are accepted:
  34. `compress`
  35. If set to ``'gz'`` or ``'bz2'``, compress the output with the given
  36. compression algorithm after encoding (default: ``''``).
  37. `error_color`
  38. If set to a color name, highlight error tokens using that color. If
  39. set but with no value, defaults to ``'red'``.
  40. .. versionadded:: 0.11
  41. """
  42. name = 'Raw tokens'
  43. aliases = ['raw', 'tokens']
  44. filenames = ['*.raw']
  45. unicodeoutput = False
  46. def __init__(self, **options):
  47. Formatter.__init__(self, **options)
  48. # We ignore self.encoding if it is set, since it gets set for lexer
  49. # and formatter if given with -Oencoding on the command line.
  50. # The RawTokenFormatter outputs only ASCII. Override here.
  51. self.encoding = 'ascii' # let pygments.format() do the right thing
  52. self.compress = get_choice_opt(options, 'compress',
  53. ['', 'none', 'gz', 'bz2'], '')
  54. self.error_color = options.get('error_color', None)
  55. if self.error_color is True:
  56. self.error_color = 'red'
  57. if self.error_color is not None:
  58. try:
  59. colorize(self.error_color, '')
  60. except KeyError:
  61. raise ValueError(f"Invalid color {self.error_color!r} specified")
  62. def format(self, tokensource, outfile):
  63. try:
  64. outfile.write(b'')
  65. except TypeError:
  66. raise TypeError('The raw tokens formatter needs a binary '
  67. 'output file')
  68. if self.compress == 'gz':
  69. import gzip
  70. outfile = gzip.GzipFile('', 'wb', 9, outfile)
  71. write = outfile.write
  72. flush = outfile.close
  73. elif self.compress == 'bz2':
  74. import bz2
  75. compressor = bz2.BZ2Compressor(9)
  76. def write(text):
  77. outfile.write(compressor.compress(text))
  78. def flush():
  79. outfile.write(compressor.flush())
  80. outfile.flush()
  81. else:
  82. write = outfile.write
  83. flush = outfile.flush
  84. if self.error_color:
  85. for ttype, value in tokensource:
  86. line = b"%r\t%r\n" % (ttype, value)
  87. if ttype is Token.Error:
  88. write(colorize(self.error_color, line))
  89. else:
  90. write(line)
  91. else:
  92. for ttype, value in tokensource:
  93. write(b"%r\t%r\n" % (ttype, value))
  94. flush()
  95. TESTCASE_BEFORE = '''\
  96. def testNeedsName(lexer):
  97. fragment = %r
  98. tokens = [
  99. '''
  100. TESTCASE_AFTER = '''\
  101. ]
  102. assert list(lexer.get_tokens(fragment)) == tokens
  103. '''
  104. class TestcaseFormatter(Formatter):
  105. """
  106. Format tokens as appropriate for a new testcase.
  107. .. versionadded:: 2.0
  108. """
  109. name = 'Testcase'
  110. aliases = ['testcase']
  111. def __init__(self, **options):
  112. Formatter.__init__(self, **options)
  113. if self.encoding is not None and self.encoding != 'utf-8':
  114. raise ValueError("Only None and utf-8 are allowed encodings.")
  115. def format(self, tokensource, outfile):
  116. indentation = ' ' * 12
  117. rawbuf = []
  118. outbuf = []
  119. for ttype, value in tokensource:
  120. rawbuf.append(value)
  121. outbuf.append(f'{indentation}({ttype}, {value!r}),\n')
  122. before = TESTCASE_BEFORE % (''.join(rawbuf),)
  123. during = ''.join(outbuf)
  124. after = TESTCASE_AFTER
  125. if self.encoding is None:
  126. outfile.write(before + during + after)
  127. else:
  128. outfile.write(before.encode('utf-8'))
  129. outfile.write(during.encode('utf-8'))
  130. outfile.write(after.encode('utf-8'))
  131. outfile.flush()