1# Copyright (c) 2014, Menno Smits 2# Released subject to the New BSD License 3# Please see http://en.wikipedia.org/wiki/BSD_licenses 4 5""" 6Parsing for IMAP command responses with focus on FETCH responses as 7returned by imaplib. 8 9Initially inspired by http://effbot.org/zone/simple-iterator-parser.htm 10""" 11 12# TODO more exact error reporting 13 14from __future__ import unicode_literals 15 16import re 17import sys 18from collections import defaultdict 19 20import six 21 22from .datetime_util import parse_to_datetime 23from .response_lexer import TokenSource 24from .response_types import BodyData, Envelope, Address, SearchIds 25from .exceptions import ProtocolError 26 27xrange = six.moves.xrange 28 29__all__ = ["parse_response", "parse_message_list"] 30 31 32def parse_response(data): 33 """Pull apart IMAP command responses. 34 35 Returns nested tuples of appropriately typed objects. 36 """ 37 if data == [None]: 38 return [] 39 return tuple(gen_parsed_response(data)) 40 41 42_msg_id_pattern = re.compile(r"(\d+(?: +\d+)*)") 43 44 45def parse_message_list(data): 46 """Parse a list of message ids and return them as a list. 47 48 parse_response is also capable of doing this but this is 49 faster. This also has special handling of the optional MODSEQ part 50 of a SEARCH response. 51 52 The returned list is a SearchIds instance which has a *modseq* 53 attribute which contains the MODSEQ response (if returned by the 54 server). 55 """ 56 if len(data) != 1: 57 raise ValueError("unexpected message list data") 58 59 data = data[0] 60 if not data: 61 return SearchIds() 62 63 if six.PY3 and isinstance(data, six.binary_type): 64 data = data.decode("ascii") 65 66 m = _msg_id_pattern.match(data) 67 if not m: 68 raise ValueError("unexpected message list format") 69 70 ids = SearchIds(int(n) for n in m.group(1).split()) 71 72 # Parse any non-numeric part on the end using parse_response (this 73 # is likely to be the MODSEQ section). 74 extra = data[m.end(1) :] 75 if extra: 76 for item in parse_response([extra.encode("ascii")]): 77 if ( 78 isinstance(item, tuple) 79 and len(item) == 2 80 and item[0].lower() == b"modseq" 81 ): 82 ids.modseq = item[1] 83 elif isinstance(item, int): 84 ids.append(item) 85 return ids 86 87 88def gen_parsed_response(text): 89 if not text: 90 return 91 src = TokenSource(text) 92 93 token = None 94 try: 95 for token in src: 96 yield atom(src, token) 97 except ProtocolError: 98 raise 99 except ValueError: 100 _, err, _ = sys.exc_info() 101 raise ProtocolError("%s: %s" % (str(err), token)) 102 103 104def parse_fetch_response(text, normalise_times=True, uid_is_key=True): 105 """Pull apart IMAP FETCH responses as returned by imaplib. 106 107 Returns a dictionary, keyed by message ID. Each value a dictionary 108 keyed by FETCH field type (eg."RFC822"). 109 """ 110 if text == [None]: 111 return {} 112 response = gen_parsed_response(text) 113 114 parsed_response = defaultdict(dict) 115 while True: 116 try: 117 msg_id = seq = _int_or_error(next(response), "invalid message ID") 118 except StopIteration: 119 break 120 121 try: 122 msg_response = next(response) 123 except StopIteration: 124 raise ProtocolError("unexpected EOF") 125 126 if not isinstance(msg_response, tuple): 127 raise ProtocolError("bad response type: %s" % repr(msg_response)) 128 if len(msg_response) % 2: 129 raise ProtocolError( 130 "uneven number of response items: %s" % repr(msg_response) 131 ) 132 133 # always return the sequence of the message, so it is available 134 # even if we return keyed by UID. 135 msg_data = {b"SEQ": seq} 136 for i in xrange(0, len(msg_response), 2): 137 word = msg_response[i].upper() 138 value = msg_response[i + 1] 139 140 if word == b"UID": 141 uid = _int_or_error(value, "invalid UID") 142 if uid_is_key: 143 msg_id = uid 144 else: 145 msg_data[word] = uid 146 elif word == b"INTERNALDATE": 147 msg_data[word] = _convert_INTERNALDATE(value, normalise_times) 148 elif word == b"ENVELOPE": 149 msg_data[word] = _convert_ENVELOPE(value, normalise_times) 150 elif word in (b"BODY", b"BODYSTRUCTURE"): 151 msg_data[word] = BodyData.create(value) 152 else: 153 msg_data[word] = value 154 155 parsed_response[msg_id].update(msg_data) 156 157 return parsed_response 158 159 160def _int_or_error(value, error_text): 161 try: 162 return int(value) 163 except (TypeError, ValueError): 164 raise ProtocolError("%s: %s" % (error_text, repr(value))) 165 166 167def _convert_INTERNALDATE(date_string, normalise_times=True): 168 if date_string is None: 169 return None 170 171 try: 172 return parse_to_datetime(date_string, normalise=normalise_times) 173 except ValueError: 174 return None 175 176 177def _convert_ENVELOPE(envelope_response, normalise_times=True): 178 dt = None 179 if envelope_response[0]: 180 try: 181 dt = parse_to_datetime(envelope_response[0], normalise=normalise_times) 182 except ValueError: 183 pass 184 185 subject = envelope_response[1] 186 187 # addresses contains a tuple of addresses 188 # from, sender, reply_to, to, cc, bcc headers 189 addresses = [] 190 for addr_list in envelope_response[2:8]: 191 addrs = [] 192 if addr_list: 193 for addr_tuple in addr_list: 194 if addr_tuple: 195 addrs.append(Address(*addr_tuple)) 196 addresses.append(tuple(addrs)) 197 else: 198 addresses.append(None) 199 200 return Envelope( 201 dt, 202 subject, 203 *addresses, 204 in_reply_to=envelope_response[8], 205 message_id=envelope_response[9] 206 ) 207 208 209def atom(src, token): 210 if token == b"(": 211 return parse_tuple(src) 212 elif token == b"NIL": 213 return None 214 elif token[:1] == b"{": 215 literal_len = int(token[1:-1]) 216 literal_text = src.current_literal 217 if literal_text is None: 218 raise ProtocolError("No literal corresponds to %r" % token) 219 if len(literal_text) != literal_len: 220 raise ProtocolError( 221 "Expecting literal of size %d, got %d" 222 % (literal_len, len(literal_text)) 223 ) 224 return literal_text 225 elif len(token) >= 2 and (token[:1] == token[-1:] == b'"'): 226 return token[1:-1] 227 elif token.isdigit() and (token[:1] != b"0" or len(token) == 1): 228 # this prevents converting items like 0123 to 123 229 return int(token) 230 else: 231 return token 232 233 234def parse_tuple(src): 235 out = [] 236 for token in src: 237 if token == b")": 238 return tuple(out) 239 out.append(atom(src, token)) 240 # no terminator 241 raise ProtocolError('Tuple incomplete before "(%s"' % _fmt_tuple(out)) 242 243 244def _fmt_tuple(t): 245 return " ".join(str(item) for item in t) 246