email.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. """
  2. pygments.lexers.email
  3. ~~~~~~~~~~~~~~~~~~~~~
  4. Lexer for the raw E-mail.
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from pygments.lexer import RegexLexer, DelegatingLexer, bygroups
  9. from pygments.lexers.mime import MIMELexer
  10. from pygments.token import Text, Keyword, Name, String, Number, Comment
  11. from pygments.util import get_bool_opt
  12. __all__ = ["EmailLexer"]
  13. class EmailHeaderLexer(RegexLexer):
  14. """
  15. Sub-lexer for raw E-mail. This lexer only process header part of e-mail.
  16. .. versionadded:: 2.5
  17. """
  18. def __init__(self, **options):
  19. super().__init__(**options)
  20. self.highlight_x = get_bool_opt(options, "highlight-X-header", False)
  21. def get_x_header_tokens(self, match):
  22. if self.highlight_x:
  23. # field
  24. yield match.start(1), Name.Tag, match.group(1)
  25. # content
  26. default_actions = self.get_tokens_unprocessed(
  27. match.group(2), stack=("root", "header"))
  28. yield from default_actions
  29. else:
  30. # lowlight
  31. yield match.start(1), Comment.Special, match.group(1)
  32. yield match.start(2), Comment.Multiline, match.group(2)
  33. tokens = {
  34. "root": [
  35. (r"^(?:[A-WYZ]|X400)[\w\-]*:", Name.Tag, "header"),
  36. (r"^(X-(?:\w[\w\-]*:))([\s\S]*?\n)(?![ \t])", get_x_header_tokens),
  37. ],
  38. "header": [
  39. # folding
  40. (r"\n[ \t]", Text.Whitespace),
  41. (r"\n(?![ \t])", Text.Whitespace, "#pop"),
  42. # keywords
  43. (r"\bE?SMTPS?\b", Keyword),
  44. (r"\b(?:HE|EH)LO\b", Keyword),
  45. # mailbox
  46. (r"[\w\.\-\+=]+@[\w\.\-]+", Name.Label),
  47. (r"<[\w\.\-\+=]+@[\w\.\-]+>", Name.Label),
  48. # domain
  49. (r"\b(\w[\w\.-]*\.[\w\.-]*\w[a-zA-Z]+)\b", Name.Function),
  50. # IPv4
  51. (r"(?<=\b)(?:(?:25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(?:25[0"
  52. r"-5]|2[0-4][0-9]|1?[0-9][0-9]?)(?=\b)",
  53. Number.Integer),
  54. # IPv6
  55. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,7}:(?!\b)", Number.Hex),
  56. (r"(?<=\b):((:[0-9a-fA-F]{1,4}){1,7}|:)(?=\b)", Number.Hex),
  57. (r"(?<=\b)([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}(?=\b)", Number.Hex),
  58. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}(?=\b)", Number.Hex),
  59. (r"(?<=\b)[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})(?=\b)", Number.Hex),
  60. (r"(?<=\b)fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}(?=\b)", Number.Hex),
  61. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}(?=\b)", Number.Hex),
  62. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}(?=\b)",
  63. Number.Hex),
  64. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}(?=\b)",
  65. Number.Hex),
  66. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}(?=\b)",
  67. Number.Hex),
  68. (r"(?<=\b)::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}"
  69. r"[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}"
  70. r"[0-9])(?=\b)",
  71. Number.Hex),
  72. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9])"
  73. r"{0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])(?=\b)",
  74. Number.Hex),
  75. # Date time
  76. (r"(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),\s+)?(0[1-9]|[1-2]?[0-9]|3["
  77. r"01])\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+("
  78. r"19[0-9]{2}|[2-9][0-9]{3})\s+(2[0-3]|[0-1][0-9]):([0-5][0-9])"
  79. r"(?::(60|[0-5][0-9]))?(?:\.\d{1,5})?\s+([-\+][0-9]{2}[0-5][0-"
  80. r"9]|\(?(?:UTC?|GMT|(?:E|C|M|P)(?:ST|ET|DT)|[A-IK-Z])\)?)",
  81. Name.Decorator),
  82. # RFC-2047 encoded string
  83. (r"(=\?)([\w-]+)(\?)([BbQq])(\?)([\[\w!\"#$%&\'()*+,-./:;<=>@[\\"
  84. r"\]^_`{|}~]+)(\?=)",
  85. bygroups(String.Affix, Name.Constant, String.Affix, Keyword.Constant,
  86. String.Affix, Number.Hex, String.Affix)),
  87. # others
  88. (r'[\s]+', Text.Whitespace),
  89. (r'[\S]', Text),
  90. ],
  91. }
  92. class EmailLexer(DelegatingLexer):
  93. """
  94. Lexer for raw E-mail.
  95. Additional options accepted:
  96. `highlight-X-header`
  97. Highlight the fields of ``X-`` user-defined email header. (default:
  98. ``False``).
  99. .. versionadded:: 2.5
  100. """
  101. name = "E-mail"
  102. aliases = ["email", "eml"]
  103. filenames = ["*.eml"]
  104. mimetypes = ["message/rfc822"]
  105. def __init__(self, **options):
  106. super().__init__(EmailHeaderLexer, MIMELexer, Comment, **options)