rtf.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. """
  2. pygments.formatters.rtf
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. A formatter that generates RTF files.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from collections import OrderedDict
  9. from pygments.formatter import Formatter
  10. from pygments.style import _ansimap
  11. from pygments.util import get_bool_opt, get_int_opt, get_list_opt, surrogatepair
  12. __all__ = ['RtfFormatter']
  13. class RtfFormatter(Formatter):
  14. """
  15. Format tokens as RTF markup. This formatter automatically outputs full RTF
  16. documents with color information and other useful stuff. Perfect for Copy and
  17. Paste into Microsoft(R) Word(R) documents.
  18. Please note that ``encoding`` and ``outencoding`` options are ignored.
  19. The RTF format is ASCII natively, but handles unicode characters correctly
  20. thanks to escape sequences.
  21. .. versionadded:: 0.6
  22. Additional options accepted:
  23. `style`
  24. The style to use, can be a string or a Style subclass (default:
  25. ``'default'``).
  26. `fontface`
  27. The used font family, for example ``Bitstream Vera Sans``. Defaults to
  28. some generic font which is supposed to have fixed width.
  29. `fontsize`
  30. Size of the font used. Size is specified in half points. The
  31. default is 24 half-points, giving a size 12 font.
  32. .. versionadded:: 2.0
  33. `linenos`
  34. Turn on line numbering (default: ``False``).
  35. .. versionadded:: 2.18
  36. `lineno_fontsize`
  37. Font size for line numbers. Size is specified in half points
  38. (default: `fontsize`).
  39. .. versionadded:: 2.18
  40. `lineno_padding`
  41. Number of spaces between the (inline) line numbers and the
  42. source code (default: ``2``).
  43. .. versionadded:: 2.18
  44. `linenostart`
  45. The line number for the first line (default: ``1``).
  46. .. versionadded:: 2.18
  47. `linenostep`
  48. If set to a number n > 1, only every nth line number is printed.
  49. .. versionadded:: 2.18
  50. `lineno_color`
  51. Color for line numbers specified as a hex triplet, e.g. ``'5e5e5e'``.
  52. Defaults to the style's line number color if it is a hex triplet,
  53. otherwise ansi bright black.
  54. .. versionadded:: 2.18
  55. `hl_lines`
  56. Specify a list of lines to be highlighted, as line numbers separated by
  57. spaces, e.g. ``'3 7 8'``. The line numbers are relative to the input
  58. (i.e. the first line is line 1) unless `hl_linenostart` is set.
  59. .. versionadded:: 2.18
  60. `hl_color`
  61. Color for highlighting the lines specified in `hl_lines`, specified as
  62. a hex triplet (default: style's `highlight_color`).
  63. .. versionadded:: 2.18
  64. `hl_linenostart`
  65. If set to ``True`` line numbers in `hl_lines` are specified
  66. relative to `linenostart` (default ``False``).
  67. .. versionadded:: 2.18
  68. """
  69. name = 'RTF'
  70. aliases = ['rtf']
  71. filenames = ['*.rtf']
  72. def __init__(self, **options):
  73. r"""
  74. Additional options accepted:
  75. ``fontface``
  76. Name of the font used. Could for example be ``'Courier New'``
  77. to further specify the default which is ``'\fmodern'``. The RTF
  78. specification claims that ``\fmodern`` are "Fixed-pitch serif
  79. and sans serif fonts". Hope every RTF implementation thinks
  80. the same about modern...
  81. """
  82. Formatter.__init__(self, **options)
  83. self.fontface = options.get('fontface') or ''
  84. self.fontsize = get_int_opt(options, 'fontsize', 0)
  85. self.linenos = get_bool_opt(options, 'linenos', False)
  86. self.lineno_fontsize = get_int_opt(options, 'lineno_fontsize',
  87. self.fontsize)
  88. self.lineno_padding = get_int_opt(options, 'lineno_padding', 2)
  89. self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
  90. self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
  91. self.hl_linenostart = get_bool_opt(options, 'hl_linenostart', False)
  92. self.hl_color = options.get('hl_color', '')
  93. if not self.hl_color:
  94. self.hl_color = self.style.highlight_color
  95. self.hl_lines = []
  96. for lineno in get_list_opt(options, 'hl_lines', []):
  97. try:
  98. lineno = int(lineno)
  99. if self.hl_linenostart:
  100. lineno = lineno - self.linenostart + 1
  101. self.hl_lines.append(lineno)
  102. except ValueError:
  103. pass
  104. self.lineno_color = options.get('lineno_color', '')
  105. if not self.lineno_color:
  106. if self.style.line_number_color == 'inherit':
  107. # style color is the css value 'inherit'
  108. # default to ansi bright-black
  109. self.lineno_color = _ansimap['ansibrightblack']
  110. else:
  111. # style color is assumed to be a hex triplet as other
  112. # colors in pygments/style.py
  113. self.lineno_color = self.style.line_number_color
  114. self.color_mapping = self._create_color_mapping()
  115. def _escape(self, text):
  116. return text.replace('\\', '\\\\') \
  117. .replace('{', '\\{') \
  118. .replace('}', '\\}')
  119. def _escape_text(self, text):
  120. # empty strings, should give a small performance improvement
  121. if not text:
  122. return ''
  123. # escape text
  124. text = self._escape(text)
  125. buf = []
  126. for c in text:
  127. cn = ord(c)
  128. if cn < (2**7):
  129. # ASCII character
  130. buf.append(str(c))
  131. elif (2**7) <= cn < (2**16):
  132. # single unicode escape sequence
  133. buf.append('{\\u%d}' % cn)
  134. elif (2**16) <= cn:
  135. # RTF limits unicode to 16 bits.
  136. # Force surrogate pairs
  137. buf.append('{\\u%d}{\\u%d}' % surrogatepair(cn))
  138. return ''.join(buf).replace('\n', '\\par')
  139. @staticmethod
  140. def hex_to_rtf_color(hex_color):
  141. if hex_color[0] == "#":
  142. hex_color = hex_color[1:]
  143. return '\\red%d\\green%d\\blue%d;' % (
  144. int(hex_color[0:2], 16),
  145. int(hex_color[2:4], 16),
  146. int(hex_color[4:6], 16)
  147. )
  148. def _split_tokens_on_newlines(self, tokensource):
  149. """
  150. Split tokens containing newline characters into multiple token
  151. each representing a line of the input file. Needed for numbering
  152. lines of e.g. multiline comments.
  153. """
  154. for ttype, value in tokensource:
  155. if value == '\n':
  156. yield (ttype, value)
  157. elif "\n" in value:
  158. lines = value.split("\n")
  159. for line in lines[:-1]:
  160. yield (ttype, line+"\n")
  161. if lines[-1]:
  162. yield (ttype, lines[-1])
  163. else:
  164. yield (ttype, value)
  165. def _create_color_mapping(self):
  166. """
  167. Create a mapping of style hex colors to index/offset in
  168. the RTF color table.
  169. """
  170. color_mapping = OrderedDict()
  171. offset = 1
  172. if self.linenos:
  173. color_mapping[self.lineno_color] = offset
  174. offset += 1
  175. if self.hl_lines:
  176. color_mapping[self.hl_color] = offset
  177. offset += 1
  178. for _, style in self.style:
  179. for color in style['color'], style['bgcolor'], style['border']:
  180. if color and color not in color_mapping:
  181. color_mapping[color] = offset
  182. offset += 1
  183. return color_mapping
  184. @property
  185. def _lineno_template(self):
  186. if self.lineno_fontsize != self.fontsize:
  187. return '{{\\fs{} \\cf{} %s{}}}'.format(self.lineno_fontsize,
  188. self.color_mapping[self.lineno_color],
  189. " " * self.lineno_padding)
  190. return '{{\\cf{} %s{}}}'.format(self.color_mapping[self.lineno_color],
  191. " " * self.lineno_padding)
  192. @property
  193. def _hl_open_str(self):
  194. return rf'{{\highlight{self.color_mapping[self.hl_color]} '
  195. @property
  196. def _rtf_header(self):
  197. lines = []
  198. # rtf 1.8 header
  199. lines.append('{\\rtf1\\ansi\\uc0\\deff0'
  200. '{\\fonttbl{\\f0\\fmodern\\fprq1\\fcharset0%s;}}'
  201. % (self.fontface and ' '
  202. + self._escape(self.fontface) or ''))
  203. # color table
  204. lines.append('{\\colortbl;')
  205. for color, _ in self.color_mapping.items():
  206. lines.append(self.hex_to_rtf_color(color))
  207. lines.append('}')
  208. # font and fontsize
  209. lines.append('\\f0\\sa0')
  210. if self.fontsize:
  211. lines.append('\\fs%d' % self.fontsize)
  212. # ensure Libre Office Writer imports and renders consecutive
  213. # space characters the same width, needed for line numbering.
  214. # https://bugs.documentfoundation.org/show_bug.cgi?id=144050
  215. lines.append('\\dntblnsbdb')
  216. return lines
  217. def format_unencoded(self, tokensource, outfile):
  218. for line in self._rtf_header:
  219. outfile.write(line + "\n")
  220. tokensource = self._split_tokens_on_newlines(tokensource)
  221. # first pass of tokens to count lines, needed for line numbering
  222. if self.linenos:
  223. line_count = 0
  224. tokens = [] # for copying the token source generator
  225. for ttype, value in tokensource:
  226. tokens.append((ttype, value))
  227. if value.endswith("\n"):
  228. line_count += 1
  229. # width of line number strings (for padding with spaces)
  230. linenos_width = len(str(line_count+self.linenostart-1))
  231. tokensource = tokens
  232. # highlight stream
  233. lineno = 1
  234. start_new_line = True
  235. for ttype, value in tokensource:
  236. if start_new_line and lineno in self.hl_lines:
  237. outfile.write(self._hl_open_str)
  238. if start_new_line and self.linenos:
  239. if (lineno-self.linenostart+1)%self.linenostep == 0:
  240. current_lineno = lineno + self.linenostart - 1
  241. lineno_str = str(current_lineno).rjust(linenos_width)
  242. else:
  243. lineno_str = "".rjust(linenos_width)
  244. outfile.write(self._lineno_template % lineno_str)
  245. while not self.style.styles_token(ttype) and ttype.parent:
  246. ttype = ttype.parent
  247. style = self.style.style_for_token(ttype)
  248. buf = []
  249. if style['bgcolor']:
  250. buf.append('\\cb%d' % self.color_mapping[style['bgcolor']])
  251. if style['color']:
  252. buf.append('\\cf%d' % self.color_mapping[style['color']])
  253. if style['bold']:
  254. buf.append('\\b')
  255. if style['italic']:
  256. buf.append('\\i')
  257. if style['underline']:
  258. buf.append('\\ul')
  259. if style['border']:
  260. buf.append('\\chbrdr\\chcfpat%d' %
  261. self.color_mapping[style['border']])
  262. start = ''.join(buf)
  263. if start:
  264. outfile.write(f'{{{start} ')
  265. outfile.write(self._escape_text(value))
  266. if start:
  267. outfile.write('}')
  268. start_new_line = False
  269. # complete line of input
  270. if value.endswith("\n"):
  271. # close line highlighting
  272. if lineno in self.hl_lines:
  273. outfile.write('}')
  274. # newline in RTF file after closing }
  275. outfile.write("\n")
  276. start_new_line = True
  277. lineno += 1
  278. outfile.write('}\n')