123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 |
- # -*- coding: utf-8 -*-
- """
- pygments.formatters.other
- ~~~~~~~~~~~~~~~~~~~~~~~~~
- Other formatters: NullFormatter, RawTokenFormatter.
- :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- from pygments.formatter import Formatter
- from pygments.util import get_choice_opt
- from pygments.token import Token
- from pygments.console import colorize
- __all__ = ['NullFormatter', 'RawTokenFormatter', 'TestcaseFormatter']
- class NullFormatter(Formatter):
- """
- Output the text unchanged without any formatting.
- """
- name = 'Text only'
- aliases = ['text', 'null']
- filenames = ['*.txt']
- def format(self, tokensource, outfile):
- enc = self.encoding
- for ttype, value in tokensource:
- if enc:
- outfile.write(value.encode(enc))
- else:
- outfile.write(value)
- class RawTokenFormatter(Formatter):
- r"""
- Format tokens as a raw representation for storing token streams.
- The format is ``tokentype<TAB>repr(tokenstring)\n``. The output can later
- be converted to a token stream with the `RawTokenLexer`, described in the
- :doc:`lexer list <lexers>`.
- Only two options are accepted:
- `compress`
- If set to ``'gz'`` or ``'bz2'``, compress the output with the given
- compression algorithm after encoding (default: ``''``).
- `error_color`
- If set to a color name, highlight error tokens using that color. If
- set but with no value, defaults to ``'red'``.
- .. versionadded:: 0.11
- """
- name = 'Raw tokens'
- aliases = ['raw', 'tokens']
- filenames = ['*.raw']
- unicodeoutput = False
- def __init__(self, **options):
- Formatter.__init__(self, **options)
- # We ignore self.encoding if it is set, since it gets set for lexer
- # and formatter if given with -Oencoding on the command line.
- # The RawTokenFormatter outputs only ASCII. Override here.
- self.encoding = 'ascii' # let pygments.format() do the right thing
- self.compress = get_choice_opt(options, 'compress',
- ['', 'none', 'gz', 'bz2'], '')
- self.error_color = options.get('error_color', None)
- if self.error_color is True:
- self.error_color = 'red'
- if self.error_color is not None:
- try:
- colorize(self.error_color, '')
- except KeyError:
- raise ValueError("Invalid color %r specified" %
- self.error_color)
- def format(self, tokensource, outfile):
- try:
- outfile.write(b'')
- except TypeError:
- raise TypeError('The raw tokens formatter needs a binary '
- 'output file')
- if self.compress == 'gz':
- import gzip
- outfile = gzip.GzipFile('', 'wb', 9, outfile)
- def write(text):
- outfile.write(text.encode())
- flush = outfile.flush
- elif self.compress == 'bz2':
- import bz2
- compressor = bz2.BZ2Compressor(9)
- def write(text):
- outfile.write(compressor.compress(text.encode()))
- def flush():
- outfile.write(compressor.flush())
- outfile.flush()
- else:
- def write(text):
- outfile.write(text.encode())
- flush = outfile.flush
- if self.error_color:
- for ttype, value in tokensource:
- line = "%s\t%r\n" % (ttype, value)
- if ttype is Token.Error:
- write(colorize(self.error_color, line))
- else:
- write(line)
- else:
- for ttype, value in tokensource:
- write("%s\t%r\n" % (ttype, value))
- flush()
- TESTCASE_BEFORE = u'''\
- def testNeedsName(lexer):
- fragment = %r
- tokens = [
- '''
- TESTCASE_AFTER = u'''\
- ]
- assert list(lexer.get_tokens(fragment)) == tokens
- '''
- class TestcaseFormatter(Formatter):
- """
- Format tokens as appropriate for a new testcase.
- .. versionadded:: 2.0
- """
- name = 'Testcase'
- aliases = ['testcase']
- def __init__(self, **options):
- Formatter.__init__(self, **options)
- if self.encoding is not None and self.encoding != 'utf-8':
- raise ValueError("Only None and utf-8 are allowed encodings.")
- def format(self, tokensource, outfile):
- indentation = ' ' * 12
- rawbuf = []
- outbuf = []
- for ttype, value in tokensource:
- rawbuf.append(value)
- outbuf.append('%s(%s, %r),\n' % (indentation, ttype, value))
- before = TESTCASE_BEFORE % (u''.join(rawbuf),)
- during = u''.join(outbuf)
- after = TESTCASE_AFTER
- if self.encoding is None:
- outfile.write(before + during + after)
- else:
- outfile.write(before.encode('utf-8'))
- outfile.write(during.encode('utf-8'))
- outfile.write(after.encode('utf-8'))
- outfile.flush()
|