1# Copyright (c) 2014, Menno Smits
2# Released subject to the New BSD License
3# Please see http://en.wikipedia.org/wiki/BSD_licenses
4
5"""
6Parsing for IMAP command responses with focus on FETCH responses as
7returned by imaplib.
8
9Initially inspired by http://effbot.org/zone/simple-iterator-parser.htm
10"""
11
12# TODO more exact error reporting
13
14from __future__ import unicode_literals
15
16import re
17import sys
18from collections import defaultdict
19
20import six
21
22from .datetime_util import parse_to_datetime
23from .response_lexer import TokenSource
24from .response_types import BodyData, Envelope, Address, SearchIds
25from .exceptions import ProtocolError
26
27xrange = six.moves.xrange
28
29__all__ = ["parse_response", "parse_message_list"]
30
31
32def parse_response(data):
33    """Pull apart IMAP command responses.
34
35    Returns nested tuples of appropriately typed objects.
36    """
37    if data == [None]:
38        return []
39    return tuple(gen_parsed_response(data))
40
41
42_msg_id_pattern = re.compile(r"(\d+(?: +\d+)*)")
43
44
45def parse_message_list(data):
46    """Parse a list of message ids and return them as a list.
47
48    parse_response is also capable of doing this but this is
49    faster. This also has special handling of the optional MODSEQ part
50    of a SEARCH response.
51
52    The returned list is a SearchIds instance which has a *modseq*
53    attribute which contains the MODSEQ response (if returned by the
54    server).
55    """
56    if len(data) != 1:
57        raise ValueError("unexpected message list data")
58
59    data = data[0]
60    if not data:
61        return SearchIds()
62
63    if six.PY3 and isinstance(data, six.binary_type):
64        data = data.decode("ascii")
65
66    m = _msg_id_pattern.match(data)
67    if not m:
68        raise ValueError("unexpected message list format")
69
70    ids = SearchIds(int(n) for n in m.group(1).split())
71
72    # Parse any non-numeric part on the end using parse_response (this
73    # is likely to be the MODSEQ section).
74    extra = data[m.end(1) :]
75    if extra:
76        for item in parse_response([extra.encode("ascii")]):
77            if (
78                isinstance(item, tuple)
79                and len(item) == 2
80                and item[0].lower() == b"modseq"
81            ):
82                ids.modseq = item[1]
83            elif isinstance(item, int):
84                ids.append(item)
85    return ids
86
87
88def gen_parsed_response(text):
89    if not text:
90        return
91    src = TokenSource(text)
92
93    token = None
94    try:
95        for token in src:
96            yield atom(src, token)
97    except ProtocolError:
98        raise
99    except ValueError:
100        _, err, _ = sys.exc_info()
101        raise ProtocolError("%s: %s" % (str(err), token))
102
103
104def parse_fetch_response(text, normalise_times=True, uid_is_key=True):
105    """Pull apart IMAP FETCH responses as returned by imaplib.
106
107    Returns a dictionary, keyed by message ID. Each value a dictionary
108    keyed by FETCH field type (eg."RFC822").
109    """
110    if text == [None]:
111        return {}
112    response = gen_parsed_response(text)
113
114    parsed_response = defaultdict(dict)
115    while True:
116        try:
117            msg_id = seq = _int_or_error(next(response), "invalid message ID")
118        except StopIteration:
119            break
120
121        try:
122            msg_response = next(response)
123        except StopIteration:
124            raise ProtocolError("unexpected EOF")
125
126        if not isinstance(msg_response, tuple):
127            raise ProtocolError("bad response type: %s" % repr(msg_response))
128        if len(msg_response) % 2:
129            raise ProtocolError(
130                "uneven number of response items: %s" % repr(msg_response)
131            )
132
133        # always return the sequence of the message, so it is available
134        # even if we return keyed by UID.
135        msg_data = {b"SEQ": seq}
136        for i in xrange(0, len(msg_response), 2):
137            word = msg_response[i].upper()
138            value = msg_response[i + 1]
139
140            if word == b"UID":
141                uid = _int_or_error(value, "invalid UID")
142                if uid_is_key:
143                    msg_id = uid
144                else:
145                    msg_data[word] = uid
146            elif word == b"INTERNALDATE":
147                msg_data[word] = _convert_INTERNALDATE(value, normalise_times)
148            elif word == b"ENVELOPE":
149                msg_data[word] = _convert_ENVELOPE(value, normalise_times)
150            elif word in (b"BODY", b"BODYSTRUCTURE"):
151                msg_data[word] = BodyData.create(value)
152            else:
153                msg_data[word] = value
154
155        parsed_response[msg_id].update(msg_data)
156
157    return parsed_response
158
159
160def _int_or_error(value, error_text):
161    try:
162        return int(value)
163    except (TypeError, ValueError):
164        raise ProtocolError("%s: %s" % (error_text, repr(value)))
165
166
167def _convert_INTERNALDATE(date_string, normalise_times=True):
168    if date_string is None:
169        return None
170
171    try:
172        return parse_to_datetime(date_string, normalise=normalise_times)
173    except ValueError:
174        return None
175
176
177def _convert_ENVELOPE(envelope_response, normalise_times=True):
178    dt = None
179    if envelope_response[0]:
180        try:
181            dt = parse_to_datetime(envelope_response[0], normalise=normalise_times)
182        except ValueError:
183            pass
184
185    subject = envelope_response[1]
186
187    # addresses contains a tuple of addresses
188    # from, sender, reply_to, to, cc, bcc headers
189    addresses = []
190    for addr_list in envelope_response[2:8]:
191        addrs = []
192        if addr_list:
193            for addr_tuple in addr_list:
194                if addr_tuple:
195                    addrs.append(Address(*addr_tuple))
196            addresses.append(tuple(addrs))
197        else:
198            addresses.append(None)
199
200    return Envelope(
201        dt,
202        subject,
203        *addresses,
204        in_reply_to=envelope_response[8],
205        message_id=envelope_response[9]
206    )
207
208
209def atom(src, token):
210    if token == b"(":
211        return parse_tuple(src)
212    elif token == b"NIL":
213        return None
214    elif token[:1] == b"{":
215        literal_len = int(token[1:-1])
216        literal_text = src.current_literal
217        if literal_text is None:
218            raise ProtocolError("No literal corresponds to %r" % token)
219        if len(literal_text) != literal_len:
220            raise ProtocolError(
221                "Expecting literal of size %d, got %d"
222                % (literal_len, len(literal_text))
223            )
224        return literal_text
225    elif len(token) >= 2 and (token[:1] == token[-1:] == b'"'):
226        return token[1:-1]
227    elif token.isdigit() and (token[:1] != b"0" or len(token) == 1):
228        # this prevents converting items like 0123 to 123
229        return int(token)
230    else:
231        return token
232
233
234def parse_tuple(src):
235    out = []
236    for token in src:
237        if token == b")":
238            return tuple(out)
239        out.append(atom(src, token))
240    # no terminator
241    raise ProtocolError('Tuple incomplete before "(%s"' % _fmt_tuple(out))
242
243
244def _fmt_tuple(t):
245    return " ".join(str(item) for item in t)
246