other.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. """
  2. pygments.formatters.other
  3. ~~~~~~~~~~~~~~~~~~~~~~~~~
  4. Other formatters: NullFormatter, RawTokenFormatter.
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from pygments.formatter import Formatter
  9. from pygments.util import get_choice_opt
  10. from pygments.token import Token
  11. from pygments.console import colorize
  12. __all__ = ['NullFormatter', 'RawTokenFormatter', 'TestcaseFormatter']
  13. class NullFormatter(Formatter):
  14. """
  15. Output the text unchanged without any formatting.
  16. """
  17. name = 'Text only'
  18. aliases = ['text', 'null']
  19. filenames = ['*.txt']
  20. def format(self, tokensource, outfile):
  21. enc = self.encoding
  22. for ttype, value in tokensource:
  23. if enc:
  24. outfile.write(value.encode(enc))
  25. else:
  26. outfile.write(value)
  27. class RawTokenFormatter(Formatter):
  28. r"""
  29. Format tokens as a raw representation for storing token streams.
  30. The format is ``tokentype<TAB>repr(tokenstring)\n``. The output can later
  31. be converted to a token stream with the `RawTokenLexer`, described in the
  32. :doc:`lexer list <lexers>`.
  33. Only two options are accepted:
  34. `compress`
  35. If set to ``'gz'`` or ``'bz2'``, compress the output with the given
  36. compression algorithm after encoding (default: ``''``).
  37. `error_color`
  38. If set to a color name, highlight error tokens using that color. If
  39. set but with no value, defaults to ``'red'``.
  40. .. versionadded:: 0.11
  41. """
  42. name = 'Raw tokens'
  43. aliases = ['raw', 'tokens']
  44. filenames = ['*.raw']
  45. unicodeoutput = False
  46. def __init__(self, **options):
  47. Formatter.__init__(self, **options)
  48. # We ignore self.encoding if it is set, since it gets set for lexer
  49. # and formatter if given with -Oencoding on the command line.
  50. # The RawTokenFormatter outputs only ASCII. Override here.
  51. self.encoding = 'ascii' # let pygments.format() do the right thing
  52. self.compress = get_choice_opt(options, 'compress',
  53. ['', 'none', 'gz', 'bz2'], '')
  54. self.error_color = options.get('error_color', None)
  55. if self.error_color is True:
  56. self.error_color = 'red'
  57. if self.error_color is not None:
  58. try:
  59. colorize(self.error_color, '')
  60. except KeyError:
  61. raise ValueError("Invalid color %r specified" %
  62. self.error_color)
  63. def format(self, tokensource, outfile):
  64. try:
  65. outfile.write(b'')
  66. except TypeError:
  67. raise TypeError('The raw tokens formatter needs a binary '
  68. 'output file')
  69. if self.compress == 'gz':
  70. import gzip
  71. outfile = gzip.GzipFile('', 'wb', 9, outfile)
  72. write = outfile.write
  73. flush = outfile.close
  74. elif self.compress == 'bz2':
  75. import bz2
  76. compressor = bz2.BZ2Compressor(9)
  77. def write(text):
  78. outfile.write(compressor.compress(text))
  79. def flush():
  80. outfile.write(compressor.flush())
  81. outfile.flush()
  82. else:
  83. write = outfile.write
  84. flush = outfile.flush
  85. if self.error_color:
  86. for ttype, value in tokensource:
  87. line = b"%r\t%r\n" % (ttype, value)
  88. if ttype is Token.Error:
  89. write(colorize(self.error_color, line))
  90. else:
  91. write(line)
  92. else:
  93. for ttype, value in tokensource:
  94. write(b"%r\t%r\n" % (ttype, value))
  95. flush()
  96. TESTCASE_BEFORE = '''\
  97. def testNeedsName(lexer):
  98. fragment = %r
  99. tokens = [
  100. '''
  101. TESTCASE_AFTER = '''\
  102. ]
  103. assert list(lexer.get_tokens(fragment)) == tokens
  104. '''
  105. class TestcaseFormatter(Formatter):
  106. """
  107. Format tokens as appropriate for a new testcase.
  108. .. versionadded:: 2.0
  109. """
  110. name = 'Testcase'
  111. aliases = ['testcase']
  112. def __init__(self, **options):
  113. Formatter.__init__(self, **options)
  114. if self.encoding is not None and self.encoding != 'utf-8':
  115. raise ValueError("Only None and utf-8 are allowed encodings.")
  116. def format(self, tokensource, outfile):
  117. indentation = ' ' * 12
  118. rawbuf = []
  119. outbuf = []
  120. for ttype, value in tokensource:
  121. rawbuf.append(value)
  122. outbuf.append('%s(%s, %r),\n' % (indentation, ttype, value))
  123. before = TESTCASE_BEFORE % (''.join(rawbuf),)
  124. during = ''.join(outbuf)
  125. after = TESTCASE_AFTER
  126. if self.encoding is None:
  127. outfile.write(before + during + after)
  128. else:
  129. outfile.write(before.encode('utf-8'))
  130. outfile.write(during.encode('utf-8'))
  131. outfile.write(after.encode('utf-8'))
  132. outfile.flush()