other.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.formatters.other
  4. ~~~~~~~~~~~~~~~~~~~~~~~~~
  5. Other formatters: NullFormatter, RawTokenFormatter.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. from pygments.formatter import Formatter
  10. from pygments.util import get_choice_opt
  11. from pygments.token import Token
  12. from pygments.console import colorize
  13. __all__ = ['NullFormatter', 'RawTokenFormatter', 'TestcaseFormatter']
  14. class NullFormatter(Formatter):
  15. """
  16. Output the text unchanged without any formatting.
  17. """
  18. name = 'Text only'
  19. aliases = ['text', 'null']
  20. filenames = ['*.txt']
  21. def format(self, tokensource, outfile):
  22. enc = self.encoding
  23. for ttype, value in tokensource:
  24. if enc:
  25. outfile.write(value.encode(enc))
  26. else:
  27. outfile.write(value)
  28. class RawTokenFormatter(Formatter):
  29. r"""
  30. Format tokens as a raw representation for storing token streams.
  31. The format is ``tokentype<TAB>repr(tokenstring)\n``. The output can later
  32. be converted to a token stream with the `RawTokenLexer`, described in the
  33. :doc:`lexer list <lexers>`.
  34. Only two options are accepted:
  35. `compress`
  36. If set to ``'gz'`` or ``'bz2'``, compress the output with the given
  37. compression algorithm after encoding (default: ``''``).
  38. `error_color`
  39. If set to a color name, highlight error tokens using that color. If
  40. set but with no value, defaults to ``'red'``.
  41. .. versionadded:: 0.11
  42. """
  43. name = 'Raw tokens'
  44. aliases = ['raw', 'tokens']
  45. filenames = ['*.raw']
  46. unicodeoutput = False
  47. def __init__(self, **options):
  48. Formatter.__init__(self, **options)
  49. # We ignore self.encoding if it is set, since it gets set for lexer
  50. # and formatter if given with -Oencoding on the command line.
  51. # The RawTokenFormatter outputs only ASCII. Override here.
  52. self.encoding = 'ascii' # let pygments.format() do the right thing
  53. self.compress = get_choice_opt(options, 'compress',
  54. ['', 'none', 'gz', 'bz2'], '')
  55. self.error_color = options.get('error_color', None)
  56. if self.error_color is True:
  57. self.error_color = 'red'
  58. if self.error_color is not None:
  59. try:
  60. colorize(self.error_color, '')
  61. except KeyError:
  62. raise ValueError("Invalid color %r specified" %
  63. self.error_color)
  64. def format(self, tokensource, outfile):
  65. try:
  66. outfile.write(b'')
  67. except TypeError:
  68. raise TypeError('The raw tokens formatter needs a binary '
  69. 'output file')
  70. if self.compress == 'gz':
  71. import gzip
  72. outfile = gzip.GzipFile('', 'wb', 9, outfile)
  73. def write(text):
  74. outfile.write(text.encode())
  75. flush = outfile.flush
  76. elif self.compress == 'bz2':
  77. import bz2
  78. compressor = bz2.BZ2Compressor(9)
  79. def write(text):
  80. outfile.write(compressor.compress(text.encode()))
  81. def flush():
  82. outfile.write(compressor.flush())
  83. outfile.flush()
  84. else:
  85. def write(text):
  86. outfile.write(text.encode())
  87. flush = outfile.flush
  88. if self.error_color:
  89. for ttype, value in tokensource:
  90. line = "%s\t%r\n" % (ttype, value)
  91. if ttype is Token.Error:
  92. write(colorize(self.error_color, line))
  93. else:
  94. write(line)
  95. else:
  96. for ttype, value in tokensource:
  97. write("%s\t%r\n" % (ttype, value))
  98. flush()
  99. TESTCASE_BEFORE = u'''\
  100. def testNeedsName(lexer):
  101. fragment = %r
  102. tokens = [
  103. '''
  104. TESTCASE_AFTER = u'''\
  105. ]
  106. assert list(lexer.get_tokens(fragment)) == tokens
  107. '''
  108. class TestcaseFormatter(Formatter):
  109. """
  110. Format tokens as appropriate for a new testcase.
  111. .. versionadded:: 2.0
  112. """
  113. name = 'Testcase'
  114. aliases = ['testcase']
  115. def __init__(self, **options):
  116. Formatter.__init__(self, **options)
  117. if self.encoding is not None and self.encoding != 'utf-8':
  118. raise ValueError("Only None and utf-8 are allowed encodings.")
  119. def format(self, tokensource, outfile):
  120. indentation = ' ' * 12
  121. rawbuf = []
  122. outbuf = []
  123. for ttype, value in tokensource:
  124. rawbuf.append(value)
  125. outbuf.append('%s(%s, %r),\n' % (indentation, ttype, value))
  126. before = TESTCASE_BEFORE % (u''.join(rawbuf),)
  127. during = u''.join(outbuf)
  128. after = TESTCASE_AFTER
  129. if self.encoding is None:
  130. outfile.write(before + during + after)
  131. else:
  132. outfile.write(before.encode('utf-8'))
  133. outfile.write(during.encode('utf-8'))
  134. outfile.write(after.encode('utf-8'))
  135. outfile.flush()