1# -*- coding: utf-8 -*-
2"""
3    pygments.lexers.email
4    ~~~~~~~~~~~~~~~~~~~~~
5
6    Lexer for the raw E-mail.
7
8    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
9    :license: BSD, see LICENSE for details.
10"""
11
12from pygments.lexer import RegexLexer, DelegatingLexer, bygroups
13from pygments.lexers.mime import MIMELexer
14from pygments.token import Text, Keyword, Name, String, Number, Comment
15from pygments.util import get_bool_opt
16
17__all__ = ["EmailLexer"]
18
19
20class EmailHeaderLexer(RegexLexer):
21    """
22    Sub-lexer for raw E-mail. This lexer only process header part of e-mail.
23
24    .. versionadded:: 2.5
25    """
26
27    def __init__(self, **options):
28        super().__init__(**options)
29        self.highlight_x = get_bool_opt(options, "highlight-X-header", False)
30
31    def get_x_header_tokens(self, match):
32        if self.highlight_x:
33            # field
34            yield match.start(1), Name.Tag, match.group(1)
35
36            # content
37            default_actions = self.get_tokens_unprocessed(
38                match.group(2), stack=("root", "header"))
39            yield from default_actions
40        else:
41            # lowlight
42            yield match.start(1), Comment.Special, match.group(1)
43            yield match.start(2), Comment.Multiline, match.group(2)
44
45    tokens = {
46        "root": [
47            (r"^(?:[A-WYZ]|X400)[\w\-]*:", Name.Tag, "header"),
48            (r"^(X-(?:\w[\w\-]*:))([\s\S]*?\n)(?![ \t])", get_x_header_tokens),
49        ],
50        "header": [
51            # folding
52            (r"\n[ \t]", Text.Whitespace),
53            (r"\n(?![ \t])", Text.Whitespace, "#pop"),
54
55            # keywords
56            (r"\bE?SMTPS?\b", Keyword),
57            (r"\b(?:HE|EH)LO\b", Keyword),
58
59            # mailbox
60            (r"[\w\.\-\+=]+@[\w\.\-]+", Name.Label),
61            (r"<[\w\.\-\+=]+@[\w\.\-]+>", Name.Label),
62
63            # domain
64            (r"\b(\w[\w\.-]*\.[\w\.-]*\w[a-zA-Z]+)\b", Name.Function),
65
66            # IPv4
67            (
68                r"(?<=\b)(?:(?:25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(?:25[0"
69                r"-5]|2[0-4][0-9]|1?[0-9][0-9]?)(?=\b)",
70                Number.Integer,
71            ),
72
73            # IPv6
74            (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,7}:(?!\b)", Number.Hex),
75            (r"(?<=\b):((:[0-9a-fA-F]{1,4}){1,7}|:)(?=\b)", Number.Hex),
76            (r"(?<=\b)([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}(?=\b)", Number.Hex),
77            (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}(?=\b)", Number.Hex),
78            (r"(?<=\b)[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})(?=\b)", Number.Hex),
79            (r"(?<=\b)fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}(?=\b)", Number.Hex),
80            (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}(?=\b)", Number.Hex),
81            (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}(?=\b)",
82             Number.Hex),
83            (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}(?=\b)",
84             Number.Hex),
85            (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}(?=\b)",
86             Number.Hex),
87            (
88                r"(?<=\b)::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}"
89                r"[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}"
90                r"[0-9])(?=\b)",
91                Number.Hex,
92            ),
93            (
94                r"(?<=\b)([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-"
95                r"9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-"
96                r"9])(?=\b)",
97                Number.Hex,
98            ),
99
100            # Date time
101            (
102                r"(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),\s+)?(0[1-9]|[1-2]?[0-9]|3["
103                r"01])\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+("
104                r"19[0-9]{2}|[2-9][0-9]{3})\s+(2[0-3]|[0-1][0-9]):([0-5][0-9])"
105                r"(?::(60|[0-5][0-9]))?(?:\.\d{1,5})?\s+([-\+][0-9]{2}[0-5][0-"
106                r"9]|\(?(?:UTC?|GMT|(?:E|C|M|P)(?:ST|ET|DT)|[A-IK-Z])\)?)",
107                Name.Decorator,
108            ),
109
110            # RFC-2047 encoded string
111            (
112                r"(=\?)([\w-]+)(\?)([BbQq])(\?)([\[\w!\"#$%&\'()*+,-./:;<=>@[\\"
113                r"\]^_`{|}~]+)(\?=)",
114                bygroups(
115                    String.Affix,
116                    Name.Constant,
117                    String.Affix,
118                    Keyword.Constant,
119                    String.Affix,
120                    Number.Hex,
121                    String.Affix
122                )
123            ),
124
125            # others
126            (r'[\s]+', Text.Whitespace),
127            (r'[\S]', Text),
128        ],
129    }
130
131
132class EmailLexer(DelegatingLexer):
133    """
134    Lexer for raw E-mail.
135
136    Additional options accepted:
137
138    `highlight-X-header`
139        Highlight the fields of ``X-`` user-defined email header. (default:
140        ``False``).
141
142    .. versionadded:: 2.5
143    """
144
145    name = "E-mail"
146    aliases = ["email", "eml"]
147    filenames = ["*.eml"]
148    mimetypes = ["message/rfc822"]
149
150    def __init__(self, **options):
151        super().__init__(EmailHeaderLexer, MIMELexer, Comment, **options)
152