1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2008-2009 Christopher Lenz
4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution.
8
9"""Support for streamed reading and writing of multipart MIME content."""
10
11from base64 import b64encode
12from cgi import parse_header
13from email import header
14
15try:
16    from hashlib import md5
17except ImportError:
18    from md5 import new as md5
19
20import uuid
21
22from couchdb import util
23
24__all__ = ['read_multipart', 'write_multipart']
25__docformat__ = 'restructuredtext en'
26
27
28CRLF = b'\r\n'
29
30
31def read_multipart(fileobj, boundary=None):
32    """Simple streaming MIME multipart parser.
33
34    This function takes a file-like object reading a MIME envelope, and yields
35    a ``(headers, is_multipart, payload)`` tuple for every part found, where
36    ``headers`` is a dictionary containing the MIME headers of that part (with
37    names lower-cased), ``is_multipart`` is a boolean indicating whether the
38    part is itself multipart, and ``payload`` is either a string (if
39    ``is_multipart`` is false), or an iterator over the nested parts.
40
41    Note that the iterator produced for nested multipart payloads MUST be fully
42    consumed, even if you wish to skip over the content.
43
44    :param fileobj: a file-like object
45    :param boundary: the part boundary string, will generally be determined
46                     automatically from the headers of the outermost multipart
47                     envelope
48    :return: an iterator over the parts
49    :since: 0.5
50    """
51    headers = {}
52    buf = []
53    outer = in_headers = boundary is None
54
55    next_boundary = boundary and ('--' + boundary + '\n').encode('ascii') or None
56    last_boundary = boundary and ('--' + boundary + '--\n').encode('ascii') or None
57
58    def _current_part():
59        payload = b''.join(buf)
60        if payload.endswith(b'\r\n'):
61            payload = payload[:-2]
62        elif payload.endswith(b'\n'):
63            payload = payload[:-1]
64        content_md5 = headers.get(b'content-md5')
65        if content_md5:
66            h = b64encode(md5(payload).digest())
67            if content_md5 != h:
68                raise ValueError('data integrity check failed')
69        return headers, False, payload
70
71    for line in fileobj:
72        if in_headers:
73            line = line.replace(CRLF, b'\n')
74            if line != b'\n':
75                name, value = [item.strip() for item in line.split(b':', 1)]
76                name = name.lower().decode('ascii')
77                value, charset = header.decode_header(value.decode('utf-8'))[0]
78                if charset is None:
79                    headers[name] = value
80                else:
81                    headers[name] = value.decode(charset)
82            else:
83                in_headers = False
84                mimetype, params = parse_header(headers.get('content-type'))
85                if mimetype.startswith('multipart/'):
86                    sub_boundary = params['boundary']
87                    sub_parts = read_multipart(fileobj, boundary=sub_boundary)
88                    if boundary is not None:
89                        yield headers, True, sub_parts
90                        headers.clear()
91                        del buf[:]
92                    else:
93                        for part in sub_parts:
94                            yield part
95                        return
96
97        elif line.replace(CRLF, b'\n') == next_boundary:
98            # We've reached the start of a new part, as indicated by the
99            # boundary
100            if headers:
101                if not outer:
102                    yield _current_part()
103                else:
104                    outer = False
105                headers.clear()
106                del buf[:]
107            in_headers = True
108
109        elif line.replace(CRLF, b'\n') == last_boundary:
110            # We're done with this multipart envelope
111            break
112
113        else:
114            buf.append(line)
115
116    if not outer and headers:
117        yield _current_part()
118
119
120class MultipartWriter(object):
121
122    def __init__(self, fileobj, headers=None, subtype='mixed', boundary=None):
123        self.fileobj = fileobj
124        if boundary is None:
125            boundary = '==' + uuid.uuid4().hex + '=='
126        self.boundary = boundary
127        if headers is None:
128            headers = {}
129        headers['Content-Type'] = 'multipart/%s; boundary="%s"' % (
130            subtype, self.boundary
131        )
132        self._write_headers(headers)
133
134    def open(self, headers=None, subtype='mixed', boundary=None):
135        self.fileobj.write(b'--')
136        self.fileobj.write(self.boundary.encode('utf-8'))
137        self.fileobj.write(CRLF)
138        return MultipartWriter(self.fileobj, headers=headers, subtype=subtype,
139                               boundary=boundary)
140
141    def add(self, mimetype, content, headers=None):
142        self.fileobj.write(b'--')
143        self.fileobj.write(self.boundary.encode('utf-8'))
144        self.fileobj.write(CRLF)
145        if headers is None:
146            headers = {}
147
148        ctype, params = parse_header(mimetype)
149        if isinstance(content, util.utype):
150            if 'charset' in params:
151                content = content.encode(params['charset'])
152            else:
153                content = content.encode('utf-8')
154                mimetype = mimetype + ';charset=utf-8'
155
156        headers['Content-Type'] = mimetype
157        if content:
158            headers['Content-Length'] = str(len(content))
159            hash = b64encode(md5(content).digest()).decode('ascii')
160            headers['Content-MD5'] = hash
161        self._write_headers(headers)
162        if content:
163            # XXX: throw an exception if a boundary appears in the content??
164            self.fileobj.write(content)
165            self.fileobj.write(CRLF)
166
167    def close(self):
168        self.fileobj.write(b'--')
169        self.fileobj.write(self.boundary.encode('ascii'))
170        self.fileobj.write(b'--')
171        self.fileobj.write(CRLF)
172
173    def _write_headers(self, headers):
174        if headers:
175            for name in sorted(headers.keys()):
176                value = headers[name]
177                if value.encode('ascii', 'ignore') != value.encode('utf-8'):
178                    value = header.make_header([(value, 'utf-8')]).encode()
179                self.fileobj.write(name.encode('utf-8'))
180                self.fileobj.write(b': ')
181                self.fileobj.write(value.encode('utf-8'))
182                self.fileobj.write(CRLF)
183        self.fileobj.write(CRLF)
184
185    def __enter__(self):
186        return self
187
188    def __exit__(self, exc_type, exc_val, exc_tb):
189        self.close()
190
191
192def write_multipart(fileobj, subtype='mixed', boundary=None):
193    r"""Simple streaming MIME multipart writer.
194
195    This function returns a `MultipartWriter` object that has a few methods to
196    control the nested MIME parts. For example, to write a flat multipart
197    envelope you call the ``add(mimetype, content, [headers])`` method for
198    every part, and finally call the ``close()`` method.
199
200    >>> from couchdb.util import StringIO
201
202    >>> buf = StringIO()
203    >>> envelope = write_multipart(buf, boundary='==123456789==')
204    >>> envelope.add('text/plain', b'Just testing')
205    >>> envelope.close()
206    >>> print(buf.getvalue().replace(b'\r\n', b'\n').decode('utf-8'))
207    Content-Type: multipart/mixed; boundary="==123456789=="
208    <BLANKLINE>
209    --==123456789==
210    Content-Length: 12
211    Content-MD5: nHmX4a6el41B06x2uCpglQ==
212    Content-Type: text/plain
213    <BLANKLINE>
214    Just testing
215    --==123456789==--
216    <BLANKLINE>
217
218    Note that an explicit boundary is only specified for testing purposes. If
219    the `boundary` parameter is omitted, the multipart writer will generate a
220    random string for the boundary.
221
222    To write nested structures, call the ``open([headers])`` method on the
223    respective envelope, and finish each envelope using the ``close()`` method:
224
225    >>> buf = StringIO()
226    >>> envelope = write_multipart(buf, boundary='==123456789==')
227    >>> part = envelope.open(boundary='==abcdefghi==')
228    >>> part.add('text/plain', u'Just testing')
229    >>> part.close()
230    >>> envelope.close()
231    >>> print(buf.getvalue().replace(b'\r\n', b'\n').decode('utf-8')) #:doctest +ELLIPSIS
232    Content-Type: multipart/mixed; boundary="==123456789=="
233    <BLANKLINE>
234    --==123456789==
235    Content-Type: multipart/mixed; boundary="==abcdefghi=="
236    <BLANKLINE>
237    --==abcdefghi==
238    Content-Length: 12
239    Content-MD5: nHmX4a6el41B06x2uCpglQ==
240    Content-Type: text/plain;charset=utf-8
241    <BLANKLINE>
242    Just testing
243    --==abcdefghi==--
244    --==123456789==--
245    <BLANKLINE>
246
247    :param fileobj: a writable file-like object that the output should get
248                    written to
249    :param subtype: the subtype of the multipart MIME type (e.g. "mixed")
250    :param boundary: the boundary to use to separate the different parts
251    :since: 0.6
252    """
253    return MultipartWriter(fileobj, subtype=subtype, boundary=boundary)
254