diff.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.diff
  4. ~~~~~~~~~~~~~~~~~~~~
  5. Lexers for diff/patch formats.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import RegexLexer, include, bygroups
  11. from pygments.token import Text, Comment, Operator, Keyword, Name, Generic, \
  12. Literal
  13. __all__ = ['DiffLexer', 'DarcsPatchLexer', 'WDiffLexer']
  14. class DiffLexer(RegexLexer):
  15. """
  16. Lexer for unified or context-style diffs or patches.
  17. """
  18. name = 'Diff'
  19. aliases = ['diff', 'udiff']
  20. filenames = ['*.diff', '*.patch']
  21. mimetypes = ['text/x-diff', 'text/x-patch']
  22. tokens = {
  23. 'root': [
  24. (r' .*\n', Text),
  25. (r'\+.*\n', Generic.Inserted),
  26. (r'-.*\n', Generic.Deleted),
  27. (r'!.*\n', Generic.Strong),
  28. (r'@.*\n', Generic.Subheading),
  29. (r'([Ii]ndex|diff).*\n', Generic.Heading),
  30. (r'=.*\n', Generic.Heading),
  31. (r'.*\n', Text),
  32. ]
  33. }
  34. def analyse_text(text):
  35. if text[:7] == 'Index: ':
  36. return True
  37. if text[:5] == 'diff ':
  38. return True
  39. if text[:4] == '--- ':
  40. return 0.9
  41. class DarcsPatchLexer(RegexLexer):
  42. """
  43. DarcsPatchLexer is a lexer for the various versions of the darcs patch
  44. format. Examples of this format are derived by commands such as
  45. ``darcs annotate --patch`` and ``darcs send``.
  46. .. versionadded:: 0.10
  47. """
  48. name = 'Darcs Patch'
  49. aliases = ['dpatch']
  50. filenames = ['*.dpatch', '*.darcspatch']
  51. DPATCH_KEYWORDS = ('hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move',
  52. 'replace')
  53. tokens = {
  54. 'root': [
  55. (r'<', Operator),
  56. (r'>', Operator),
  57. (r'\{', Operator),
  58. (r'\}', Operator),
  59. (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])',
  60. bygroups(Operator, Keyword, Name, Text, Name, Operator,
  61. Literal.Date, Text, Operator)),
  62. (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)',
  63. bygroups(Operator, Keyword, Name, Text, Name, Operator,
  64. Literal.Date, Text), 'comment'),
  65. (r'New patches:', Generic.Heading),
  66. (r'Context:', Generic.Heading),
  67. (r'Patch bundle hash:', Generic.Heading),
  68. (r'(\s*)(%s)(.*\n)' % '|'.join(DPATCH_KEYWORDS),
  69. bygroups(Text, Keyword, Text)),
  70. (r'\+', Generic.Inserted, "insert"),
  71. (r'-', Generic.Deleted, "delete"),
  72. (r'.*\n', Text),
  73. ],
  74. 'comment': [
  75. (r'[^\]].*\n', Comment),
  76. (r'\]', Operator, "#pop"),
  77. ],
  78. 'specialText': [ # darcs add [_CODE_] special operators for clarity
  79. (r'\n', Text, "#pop"), # line-based
  80. (r'\[_[^_]*_]', Operator),
  81. ],
  82. 'insert': [
  83. include('specialText'),
  84. (r'\[', Generic.Inserted),
  85. (r'[^\n\[]+', Generic.Inserted),
  86. ],
  87. 'delete': [
  88. include('specialText'),
  89. (r'\[', Generic.Deleted),
  90. (r'[^\n\[]+', Generic.Deleted),
  91. ],
  92. }
  93. class WDiffLexer(RegexLexer):
  94. """
  95. A `wdiff <https://www.gnu.org/software/wdiff/>`_ lexer.
  96. Note that:
  97. * only to normal output (without option like -l).
  98. * if target files of wdiff contain "[-", "-]", "{+", "+}",
  99. especially they are unbalanced, this lexer will get confusing.
  100. .. versionadded:: 2.2
  101. """
  102. name = 'WDiff'
  103. aliases = ['wdiff']
  104. filenames = ['*.wdiff']
  105. mimetypes = []
  106. flags = re.MULTILINE | re.DOTALL
  107. # We can only assume "[-" after "[-" before "-]" is `nested`,
  108. # for instance wdiff to wdiff outputs. We have no way to
  109. # distinct these marker is of wdiff output from original text.
  110. ins_op = r"\{\+"
  111. ins_cl = r"\+\}"
  112. del_op = r"\[\-"
  113. del_cl = r"\-\]"
  114. normal = r'[^{}[\]+-]+' # for performance
  115. tokens = {
  116. 'root': [
  117. (ins_op, Generic.Inserted, 'inserted'),
  118. (del_op, Generic.Deleted, 'deleted'),
  119. (normal, Text),
  120. (r'.', Text),
  121. ],
  122. 'inserted': [
  123. (ins_op, Generic.Inserted, '#push'),
  124. (del_op, Generic.Inserted, '#push'),
  125. (del_cl, Generic.Inserted, '#pop'),
  126. (ins_cl, Generic.Inserted, '#pop'),
  127. (normal, Generic.Inserted),
  128. (r'.', Generic.Inserted),
  129. ],
  130. 'deleted': [
  131. (del_op, Generic.Deleted, '#push'),
  132. (ins_op, Generic.Deleted, '#push'),
  133. (ins_cl, Generic.Deleted, '#pop'),
  134. (del_cl, Generic.Deleted, '#pop'),
  135. (normal, Generic.Deleted),
  136. (r'.', Generic.Deleted),
  137. ],
  138. }