email.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.email
  4. ~~~~~~~~~~~~~~~~~~~~~
  5. Lexer for the raw E-mail.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. from pygments.lexer import RegexLexer, DelegatingLexer, bygroups
  10. from pygments.lexers.mime import MIMELexer
  11. from pygments.token import Text, Keyword, Name, String, Number, Comment
  12. from pygments.util import get_bool_opt
  13. __all__ = ["EmailLexer"]
  14. class EmailHeaderLexer(RegexLexer):
  15. """
  16. Sub-lexer for raw E-mail. This lexer only process header part of e-mail.
  17. .. versionadded:: 2.5
  18. """
  19. def __init__(self, **options):
  20. super(EmailHeaderLexer, self).__init__(**options)
  21. self.highlight_x = get_bool_opt(options, "highlight-X-header", False)
  22. def get_x_header_tokens(self, match):
  23. if self.highlight_x:
  24. # field
  25. yield match.start(1), Name.Tag, match.group(1)
  26. # content
  27. default_actions = self.get_tokens_unprocessed(
  28. match.group(2), stack=("root", "header"))
  29. for item in default_actions:
  30. yield item
  31. else:
  32. # lowlight
  33. yield match.start(1), Comment.Special, match.group(1)
  34. yield match.start(2), Comment.Multiline, match.group(2)
  35. tokens = {
  36. "root": [
  37. (r"^(?:[A-WYZ]|X400)[\w\-]*:", Name.Tag, "header"),
  38. (r"^(X-(?:\w[\w\-]*:))([\s\S]*?\n)(?![ \t])", get_x_header_tokens),
  39. ],
  40. "header": [
  41. # folding
  42. (r"\n[ \t]", Text.Whitespace),
  43. (r"\n(?![ \t])", Text.Whitespace, "#pop"),
  44. # keywords
  45. (r"\bE?SMTPS?\b", Keyword),
  46. (r"\b(?:HE|EH)LO\b", Keyword),
  47. # mailbox
  48. (r"[\w\.\-\+=]+@[\w\.\-]+", Name.Label),
  49. (r"<[\w\.\-\+=]+@[\w\.\-]+>", Name.Label),
  50. # domain
  51. (r"\b(\w[\w\.-]*\.[\w\.-]*\w[a-zA-Z]+)\b", Name.Function),
  52. # IPv4
  53. (
  54. r"(?<=\b)(?:(?:25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(?:25[0"
  55. r"-5]|2[0-4][0-9]|1?[0-9][0-9]?)(?=\b)",
  56. Number.Integer,
  57. ),
  58. # IPv6
  59. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,7}:(?!\b)", Number.Hex),
  60. (r"(?<=\b):((:[0-9a-fA-F]{1,4}){1,7}|:)(?=\b)", Number.Hex),
  61. (r"(?<=\b)([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}(?=\b)", Number.Hex),
  62. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}(?=\b)", Number.Hex),
  63. (r"(?<=\b)[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})(?=\b)", Number.Hex),
  64. (r"(?<=\b)fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}(?=\b)", Number.Hex),
  65. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}(?=\b)", Number.Hex),
  66. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}(?=\b)",
  67. Number.Hex),
  68. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}(?=\b)",
  69. Number.Hex),
  70. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}(?=\b)",
  71. Number.Hex),
  72. (
  73. r"(?<=\b)::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}"
  74. r"[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}"
  75. r"[0-9])(?=\b)",
  76. Number.Hex,
  77. ),
  78. (
  79. r"(?<=\b)([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-"
  80. r"9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-"
  81. r"9])(?=\b)",
  82. Number.Hex,
  83. ),
  84. # Date time
  85. (
  86. r"(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),\s+)?(0[1-9]|[1-2]?[0-9]|3["
  87. r"01])\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+("
  88. r"19[0-9]{2}|[2-9][0-9]{3})\s+(2[0-3]|[0-1][0-9]):([0-5][0-9])"
  89. r"(?::(60|[0-5][0-9]))?(?:\.\d{1,5})?\s+([-\+][0-9]{2}[0-5][0-"
  90. r"9]|\(?(?:UTC?|GMT|(?:E|C|M|P)(?:ST|ET|DT)|[A-IK-Z])\)?)",
  91. Name.Decorator,
  92. ),
  93. # RFC-2047 encoded string
  94. (
  95. r"(=\?)([\w-]+)(\?)([BbQq])(\?)([\[\w!\"#$%&\'()*+,-./:;<=>@[\\"
  96. r"\]^_`{|}~]+)(\?=)",
  97. bygroups(
  98. String.Affix,
  99. Name.Constant,
  100. String.Affix,
  101. Keyword.Constant,
  102. String.Affix,
  103. Number.Hex,
  104. String.Affix
  105. )
  106. ),
  107. # others
  108. (r'[\s]+', Text.Whitespace),
  109. (r'[\S]', Text),
  110. ],
  111. }
  112. class EmailLexer(DelegatingLexer):
  113. """
  114. Lexer for raw E-mail.
  115. Additional options accepted:
  116. `highlight-X-header`
  117. Highlight the fields of ``X-`` user-defined email header. (default:
  118. ``False``).
  119. .. versionadded:: 2.5
  120. """
  121. name = "E-mail"
  122. aliases = ["email", "eml"]
  123. filenames = ["*.eml"]
  124. mimetypes = ["message/rfc822"]
  125. def __init__(self, **options):
  126. super(EmailLexer, self).__init__(
  127. EmailHeaderLexer, MIMELexer, Comment, **options
  128. )