1# Copyright (C) 2001-2010 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Miscellaneous utilities."""
6
7__all__ = [
8    'collapse_rfc2231_value',
9    'decode_params',
10    'decode_rfc2231',
11    'encode_rfc2231',
12    'formataddr',
13    'formatdate',
14    'getaddresses',
15    'make_msgid',
16    'mktime_tz',
17    'parseaddr',
18    'parsedate',
19    'parsedate_tz',
20    'unquote',
21    ]
22
23import os
24import re
25import time
26import base64
27import random
28import socket
29import urllib
30import warnings
31
32from email._parseaddr import quote
33from email._parseaddr import AddressList as _AddressList
34from email._parseaddr import mktime_tz
35
36# We need wormarounds for bugs in these methods in older Pythons (see below)
37from email._parseaddr import parsedate as _parsedate
38from email._parseaddr import parsedate_tz as _parsedate_tz
39
40from quopri import decodestring as _qdecode
41
42# Intrapackage imports
43from email.encoders import _bencode, _qencode
44
45COMMASPACE = ', '
46EMPTYSTRING = ''
47UEMPTYSTRING = u''
48CRLF = '\r\n'
49TICK = "'"
50
51specialsre = re.compile(r'[][\\()<>@,:;".]')
52escapesre = re.compile(r'[][\\()"]')
53
54
55
56# Helpers
57
58def _identity(s):
59    return s
60
61
62def _bdecode(s):
63    """Decodes a base64 string.
64
65    This function is equivalent to base64.decodestring and it's retained only
66    for backward compatibility. It used to remove the last \\n of the decoded
67    string, if it had any (see issue 7143).
68    """
69    if not s:
70        return s
71    return base64.decodestring(s)
72
73
74
75def fix_eols(s):
76    """Replace all line-ending characters with \\r\\n."""
77    # Fix newlines with no preceding carriage return
78    s = re.sub(r'(?<!\r)\n', CRLF, s)
79    # Fix carriage returns with no following newline
80    s = re.sub(r'\r(?!\n)', CRLF, s)
81    return s
82
83
84
85def formataddr(pair):
86    """The inverse of parseaddr(), this takes a 2-tuple of the form
87    (realname, email_address) and returns the string value suitable
88    for an RFC 2822 From, To or Cc header.
89
90    If the first element of pair is false, then the second element is
91    returned unmodified.
92    """
93    name, address = pair
94    if name:
95        quotes = ''
96        if specialsre.search(name):
97            quotes = '"'
98        name = escapesre.sub(r'\\\g<0>', name)
99        return '%s%s%s <%s>' % (quotes, name, quotes, address)
100    return address
101
102
103
104def getaddresses(fieldvalues):
105    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
106    all = COMMASPACE.join(fieldvalues)
107    a = _AddressList(all)
108    return a.addresslist
109
110
111
112ecre = re.compile(r'''
113  =\?                   # literal =?
114  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
115  \?                    # literal ?
116  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
117  \?                    # literal ?
118  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
119  \?=                   # literal ?=
120  ''', re.VERBOSE | re.IGNORECASE)
121
122
123
124def formatdate(timeval=None, localtime=False, usegmt=False):
125    """Returns a date string as specified by RFC 2822, e.g.:
126
127    Fri, 09 Nov 2001 01:08:47 -0000
128
129    Optional timeval if given is a floating point time value as accepted by
130    gmtime() and localtime(), otherwise the current time is used.
131
132    Optional localtime is a flag that when True, interprets timeval, and
133    returns a date relative to the local timezone instead of UTC, properly
134    taking daylight savings time into account.
135
136    Optional argument usegmt means that the timezone is written out as
137    an ascii string, not numeric one (so "GMT" instead of "+0000"). This
138    is needed for HTTP, and is only used when localtime==False.
139    """
140    # Note: we cannot use strftime() because that honors the locale and RFC
141    # 2822 requires that day and month names be the English abbreviations.
142    if timeval is None:
143        timeval = time.time()
144    if localtime:
145        now = time.localtime(timeval)
146        # Calculate timezone offset, based on whether the local zone has
147        # daylight savings time, and whether DST is in effect.
148        if time.daylight and now[-1]:
149            offset = time.altzone
150        else:
151            offset = time.timezone
152        hours, minutes = divmod(abs(offset), 3600)
153        # Remember offset is in seconds west of UTC, but the timezone is in
154        # minutes east of UTC, so the signs differ.
155        if offset > 0:
156            sign = '-'
157        else:
158            sign = '+'
159        zone = '%s%02d%02d' % (sign, hours, minutes // 60)
160    else:
161        now = time.gmtime(timeval)
162        # Timezone offset is always -0000
163        if usegmt:
164            zone = 'GMT'
165        else:
166            zone = '-0000'
167    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
168        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
169        now[2],
170        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
171         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
172        now[0], now[3], now[4], now[5],
173        zone)
174
175
176
177def make_msgid(idstring=None):
178    """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
179
180    <142480216486.20800.16526388040877946887@nightshade.la.mastaler.com>
181
182    Optional idstring if given is a string used to strengthen the
183    uniqueness of the message id.
184    """
185    timeval = int(time.time()*100)
186    pid = os.getpid()
187    randint = random.getrandbits(64)
188    if idstring is None:
189        idstring = ''
190    else:
191        idstring = '.' + idstring
192    idhost = socket.getfqdn()
193    msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, idhost)
194    return msgid
195
196
197
198# These functions are in the standalone mimelib version only because they've
199# subsequently been fixed in the latest Python versions.  We use this to worm
200# around broken older Pythons.
201def parsedate(data):
202    if not data:
203        return None
204    return _parsedate(data)
205
206
207def parsedate_tz(data):
208    if not data:
209        return None
210    return _parsedate_tz(data)
211
212
213def parseaddr(addr):
214    """
215    Parse addr into its constituent realname and email address parts.
216
217    Return a tuple of realname and email address, unless the parse fails, in
218    which case return a 2-tuple of ('', '').
219    """
220    addrs = _AddressList(addr).addresslist
221    if not addrs:
222        return '', ''
223    return addrs[0]
224
225
226# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
227def unquote(str):
228    """Remove quotes from a string."""
229    if len(str) > 1:
230        if str.startswith('"') and str.endswith('"'):
231            return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
232        if str.startswith('<') and str.endswith('>'):
233            return str[1:-1]
234    return str
235
236
237
238# RFC2231-related functions - parameter encoding and decoding
239def decode_rfc2231(s):
240    """Decode string according to RFC 2231"""
241    parts = s.split(TICK, 2)
242    if len(parts) <= 2:
243        return None, None, s
244    return parts
245
246
247def encode_rfc2231(s, charset=None, language=None):
248    """Encode string according to RFC 2231.
249
250    If neither charset nor language is given, then s is returned as-is.  If
251    charset is given but not language, the string is encoded using the empty
252    string for language.
253    """
254    import urllib
255    s = urllib.quote(s, safe='')
256    if charset is None and language is None:
257        return s
258    if language is None:
259        language = ''
260    return "%s'%s'%s" % (charset, language, s)
261
262
263rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
264
265def decode_params(params):
266    """Decode parameters list according to RFC 2231.
267
268    params is a sequence of 2-tuples containing (param name, string value).
269    """
270    # Copy params so we don't mess with the original
271    params = params[:]
272    new_params = []
273    # Map parameter's name to a list of continuations.  The values are a
274    # 3-tuple of the continuation number, the string value, and a flag
275    # specifying whether a particular segment is %-encoded.
276    rfc2231_params = {}
277    name, value = params.pop(0)
278    new_params.append((name, value))
279    while params:
280        name, value = params.pop(0)
281        if name.endswith('*'):
282            encoded = True
283        else:
284            encoded = False
285        value = unquote(value)
286        mo = rfc2231_continuation.match(name)
287        if mo:
288            name, num = mo.group('name', 'num')
289            if num is not None:
290                num = int(num)
291            rfc2231_params.setdefault(name, []).append((num, value, encoded))
292        else:
293            new_params.append((name, '"%s"' % quote(value)))
294    if rfc2231_params:
295        for name, continuations in rfc2231_params.items():
296            value = []
297            extended = False
298            # Sort by number
299            continuations.sort()
300            # And now append all values in numerical order, converting
301            # %-encodings for the encoded segments.  If any of the
302            # continuation names ends in a *, then the entire string, after
303            # decoding segments and concatenating, must have the charset and
304            # language specifiers at the beginning of the string.
305            for num, s, encoded in continuations:
306                if encoded:
307                    s = urllib.unquote(s)
308                    extended = True
309                value.append(s)
310            value = quote(EMPTYSTRING.join(value))
311            if extended:
312                charset, language, value = decode_rfc2231(value)
313                new_params.append((name, (charset, language, '"%s"' % value)))
314            else:
315                new_params.append((name, '"%s"' % value))
316    return new_params
317
318def collapse_rfc2231_value(value, errors='replace',
319                           fallback_charset='us-ascii'):
320    if isinstance(value, tuple):
321        rawval = unquote(value[2])
322        charset = value[0] or 'us-ascii'
323        try:
324            return unicode(rawval, charset, errors)
325        except LookupError:
326            # XXX charset is unknown to Python.
327            return unicode(rawval, fallback_charset, errors)
328    else:
329        return unquote(value)
330