diff.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. """
  2. pygments.lexers.diff
  3. ~~~~~~~~~~~~~~~~~~~~
  4. Lexers for diff/patch formats.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, bygroups
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, Generic, \
  11. Literal, Whitespace
  12. __all__ = ['DiffLexer', 'DarcsPatchLexer', 'WDiffLexer']
  13. class DiffLexer(RegexLexer):
  14. """
  15. Lexer for unified or context-style diffs or patches.
  16. """
  17. name = 'Diff'
  18. aliases = ['diff', 'udiff']
  19. filenames = ['*.diff', '*.patch']
  20. mimetypes = ['text/x-diff', 'text/x-patch']
  21. url = 'https://en.wikipedia.org/wiki/Diff'
  22. version_added = ''
  23. tokens = {
  24. 'root': [
  25. (r'( )(.*)(\n)', bygroups(Whitespace, Text, Whitespace)),
  26. (r'(!.*|---)(\n)', bygroups(Generic.Strong, Whitespace)),
  27. (r'((?:< |-).*)(\n)', bygroups(Generic.Deleted, Whitespace)),
  28. (r'((?:> |\+).*)(\n)', bygroups(Generic.Inserted, Whitespace)),
  29. (
  30. r'(@.*|\d(?:,\d+)?(?:a|c|d)\d+(?:,\d+)?)(\n)',
  31. bygroups(Generic.Subheading, Whitespace),
  32. ),
  33. (r'((?:[Ii]ndex|diff).*)(\n)', bygroups(Generic.Heading, Whitespace)),
  34. (r'(=.*)(\n)', bygroups(Generic.Heading, Whitespace)),
  35. (r'(.*)(\n)', bygroups(Text, Whitespace)),
  36. ]
  37. }
  38. def analyse_text(text):
  39. if text[:7] == 'Index: ':
  40. return True
  41. if text[:5] == 'diff ':
  42. return True
  43. if text[:4] == '--- ':
  44. return 0.9
  45. class DarcsPatchLexer(RegexLexer):
  46. """
  47. DarcsPatchLexer is a lexer for the various versions of the darcs patch
  48. format. Examples of this format are derived by commands such as
  49. ``darcs annotate --patch`` and ``darcs send``.
  50. """
  51. name = 'Darcs Patch'
  52. aliases = ['dpatch']
  53. filenames = ['*.dpatch', '*.darcspatch']
  54. url = 'https://darcs.net'
  55. version_added = '0.10'
  56. DPATCH_KEYWORDS = ('hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move',
  57. 'replace')
  58. tokens = {
  59. 'root': [
  60. (r'<', Operator),
  61. (r'>', Operator),
  62. (r'\{', Operator),
  63. (r'\}', Operator),
  64. (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])',
  65. bygroups(Operator, Keyword, Name, Whitespace, Name, Operator,
  66. Literal.Date, Whitespace, Operator)),
  67. (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)',
  68. bygroups(Operator, Keyword, Name, Whitespace, Name, Operator,
  69. Literal.Date, Whitespace), 'comment'),
  70. (r'New patches:', Generic.Heading),
  71. (r'Context:', Generic.Heading),
  72. (r'Patch bundle hash:', Generic.Heading),
  73. (r'(\s*)({})(.*)(\n)'.format('|'.join(DPATCH_KEYWORDS)),
  74. bygroups(Whitespace, Keyword, Text, Whitespace)),
  75. (r'\+', Generic.Inserted, "insert"),
  76. (r'-', Generic.Deleted, "delete"),
  77. (r'(.*)(\n)', bygroups(Text, Whitespace)),
  78. ],
  79. 'comment': [
  80. (r'[^\]].*\n', Comment),
  81. (r'\]', Operator, "#pop"),
  82. ],
  83. 'specialText': [ # darcs add [_CODE_] special operators for clarity
  84. (r'\n', Whitespace, "#pop"), # line-based
  85. (r'\[_[^_]*_]', Operator),
  86. ],
  87. 'insert': [
  88. include('specialText'),
  89. (r'\[', Generic.Inserted),
  90. (r'[^\n\[]+', Generic.Inserted),
  91. ],
  92. 'delete': [
  93. include('specialText'),
  94. (r'\[', Generic.Deleted),
  95. (r'[^\n\[]+', Generic.Deleted),
  96. ],
  97. }
  98. class WDiffLexer(RegexLexer):
  99. """
  100. A wdiff lexer.
  101. Note that:
  102. * It only works with normal output (without options like ``-l``).
  103. * If the target files contain "[-", "-]", "{+", or "+}",
  104. especially they are unbalanced, the lexer will get confused.
  105. """
  106. name = 'WDiff'
  107. url = 'https://www.gnu.org/software/wdiff/'
  108. aliases = ['wdiff']
  109. filenames = ['*.wdiff']
  110. mimetypes = []
  111. version_added = '2.2'
  112. flags = re.MULTILINE | re.DOTALL
  113. # We can only assume "[-" after "[-" before "-]" is `nested`,
  114. # for instance wdiff to wdiff outputs. We have no way to
  115. # distinct these marker is of wdiff output from original text.
  116. ins_op = r"\{\+"
  117. ins_cl = r"\+\}"
  118. del_op = r"\[\-"
  119. del_cl = r"\-\]"
  120. normal = r'[^{}[\]+-]+' # for performance
  121. tokens = {
  122. 'root': [
  123. (ins_op, Generic.Inserted, 'inserted'),
  124. (del_op, Generic.Deleted, 'deleted'),
  125. (normal, Text),
  126. (r'.', Text),
  127. ],
  128. 'inserted': [
  129. (ins_op, Generic.Inserted, '#push'),
  130. (del_op, Generic.Inserted, '#push'),
  131. (del_cl, Generic.Inserted, '#pop'),
  132. (ins_cl, Generic.Inserted, '#pop'),
  133. (normal, Generic.Inserted),
  134. (r'.', Generic.Inserted),
  135. ],
  136. 'deleted': [
  137. (del_op, Generic.Deleted, '#push'),
  138. (ins_op, Generic.Deleted, '#push'),
  139. (ins_cl, Generic.Deleted, '#pop'),
  140. (del_cl, Generic.Deleted, '#pop'),
  141. (normal, Generic.Deleted),
  142. (r'.', Generic.Deleted),
  143. ],
  144. }