formatter.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. """
  2. pygments.formatter
  3. ~~~~~~~~~~~~~~~~~~
  4. Base formatter class.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import codecs
  9. from pygments.util import get_bool_opt
  10. from pygments.styles import get_style_by_name
  11. __all__ = ['Formatter']
  12. def _lookup_style(style):
  13. if isinstance(style, str):
  14. return get_style_by_name(style)
  15. return style
  16. class Formatter:
  17. """
  18. Converts a token stream to text.
  19. Formatters should have attributes to help selecting them. These
  20. are similar to the corresponding :class:`~pygments.lexer.Lexer`
  21. attributes.
  22. .. autoattribute:: name
  23. :no-value:
  24. .. autoattribute:: aliases
  25. :no-value:
  26. .. autoattribute:: filenames
  27. :no-value:
  28. You can pass options as keyword arguments to the constructor.
  29. All formatters accept these basic options:
  30. ``style``
  31. The style to use, can be a string or a Style subclass
  32. (default: "default"). Not used by e.g. the
  33. TerminalFormatter.
  34. ``full``
  35. Tells the formatter to output a "full" document, i.e.
  36. a complete self-contained document. This doesn't have
  37. any effect for some formatters (default: false).
  38. ``title``
  39. If ``full`` is true, the title that should be used to
  40. caption the document (default: '').
  41. ``encoding``
  42. If given, must be an encoding name. This will be used to
  43. convert the Unicode token strings to byte strings in the
  44. output. If it is "" or None, Unicode strings will be written
  45. to the output file, which most file-like objects do not
  46. support (default: None).
  47. ``outencoding``
  48. Overrides ``encoding`` if given.
  49. """
  50. #: Full name for the formatter, in human-readable form.
  51. name = None
  52. #: A list of short, unique identifiers that can be used to lookup
  53. #: the formatter from a list, e.g. using :func:`.get_formatter_by_name()`.
  54. aliases = []
  55. #: A list of fnmatch patterns that match filenames for which this
  56. #: formatter can produce output. The patterns in this list should be unique
  57. #: among all formatters.
  58. filenames = []
  59. #: If True, this formatter outputs Unicode strings when no encoding
  60. #: option is given.
  61. unicodeoutput = True
  62. def __init__(self, **options):
  63. """
  64. As with lexers, this constructor takes arbitrary optional arguments,
  65. and if you override it, you should first process your own options, then
  66. call the base class implementation.
  67. """
  68. self.style = _lookup_style(options.get('style', 'default'))
  69. self.full = get_bool_opt(options, 'full', False)
  70. self.title = options.get('title', '')
  71. self.encoding = options.get('encoding', None) or None
  72. if self.encoding in ('guess', 'chardet'):
  73. # can happen for e.g. pygmentize -O encoding=guess
  74. self.encoding = 'utf-8'
  75. self.encoding = options.get('outencoding') or self.encoding
  76. self.options = options
  77. def get_style_defs(self, arg=''):
  78. """
  79. This method must return statements or declarations suitable to define
  80. the current style for subsequent highlighted text (e.g. CSS classes
  81. in the `HTMLFormatter`).
  82. The optional argument `arg` can be used to modify the generation and
  83. is formatter dependent (it is standardized because it can be given on
  84. the command line).
  85. This method is called by the ``-S`` :doc:`command-line option <cmdline>`,
  86. the `arg` is then given by the ``-a`` option.
  87. """
  88. return ''
  89. def format(self, tokensource, outfile):
  90. """
  91. This method must format the tokens from the `tokensource` iterable and
  92. write the formatted version to the file object `outfile`.
  93. Formatter options can control how exactly the tokens are converted.
  94. """
  95. if self.encoding:
  96. # wrap the outfile in a StreamWriter
  97. outfile = codecs.lookup(self.encoding)[3](outfile)
  98. return self.format_unencoded(tokensource, outfile)
  99. # Allow writing Formatter[str] or Formatter[bytes]. That's equivalent to
  100. # Formatter. This helps when using third-party type stubs from typeshed.
  101. def __class_getitem__(cls, name):
  102. return cls