1# -*- coding: utf-8 -*-
3# Copyright (C) 2008-2009 Christopher Lenz
4# All rights reserved.
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution.
9"""Support for streamed reading and writing of multipart MIME content."""
11from base64 import b64encode
12from cgi import parse_header
13from email import header
16    from hashlib import md5
17except ImportError:
18    from md5 import new as md5
20import uuid
22from couchdb import util
24__all__ = ['read_multipart', 'write_multipart']
25__docformat__ = 'restructuredtext en'
28CRLF = b'\r\n'
31def read_multipart(fileobj, boundary=None):
32    """Simple streaming MIME multipart parser.
34    This function takes a file-like object reading a MIME envelope, and yields
35    a ``(headers, is_multipart, payload)`` tuple for every part found, where
36    ``headers`` is a dictionary containing the MIME headers of that part (with
37    names lower-cased), ``is_multipart`` is a boolean indicating whether the
38    part is itself multipart, and ``payload`` is either a string (if
39    ``is_multipart`` is false), or an iterator over the nested parts.
41    Note that the iterator produced for nested multipart payloads MUST be fully
42    consumed, even if you wish to skip over the content.
44    :param fileobj: a file-like object
45    :param boundary: the part boundary string, will generally be determined
46                     automatically from the headers of the outermost multipart
47                     envelope
48    :return: an iterator over the parts
49    :since: 0.5
50    """
51    headers = {}
52    buf = []
53    outer = in_headers = boundary is None
55    next_boundary = boundary and ('--' + boundary + '\n').encode('ascii') or None
56    last_boundary = boundary and ('--' + boundary + '--\n').encode('ascii') or None
58    def _current_part():
59        payload = b''.join(buf)
60        if payload.endswith(b'\r\n'):
61            payload = payload[:-2]
62        elif payload.endswith(b'\n'):
63            payload = payload[:-1]
64        content_md5 = headers.get(b'content-md5')
65        if content_md5:
66            h = b64encode(md5(payload).digest())
67            if content_md5 != h:
68                raise ValueError('data integrity check failed')
69        return headers, False, payload
71    for line in fileobj:
72        if in_headers:
73            line = line.replace(CRLF, b'\n')
74            if line != b'\n':
75                name, value = [item.strip() for item in line.split(b':', 1)]
76                name = name.lower().decode('ascii')
77                value, charset = header.decode_header(value.decode('utf-8'))[0]
78                if charset is None:
79                    headers[name] = value
80                else:
81                    headers[name] = value.decode(charset)
82            else:
83                in_headers = False
84                mimetype, params = parse_header(headers.get('content-type'))
85                if mimetype.startswith('multipart/'):
86                    sub_boundary = params['boundary']
87                    sub_parts = read_multipart(fileobj, boundary=sub_boundary)
88                    if boundary is not None:
89                        yield headers, True, sub_parts
90                        headers.clear()
91                        del buf[:]
92                    else:
93                        for part in sub_parts:
94                            yield part
95                        return
97        elif line.replace(CRLF, b'\n') == next_boundary:
98            # We've reached the start of a new part, as indicated by the
99            # boundary
100            if headers:
101                if not outer:
102                    yield _current_part()
103                else:
104                    outer = False
105                headers.clear()
106                del buf[:]
107            in_headers = True
109        elif line.replace(CRLF, b'\n') == last_boundary:
110            # We're done with this multipart envelope
111            break
113        else:
114            buf.append(line)
116    if not outer and headers:
117        yield _current_part()
120class MultipartWriter(object):
122    def __init__(self, fileobj, headers=None, subtype='mixed', boundary=None):
123        self.fileobj = fileobj
124        if boundary is None:
125            boundary = '==' + uuid.uuid4().hex + '=='
126        self.boundary = boundary
127        if headers is None:
128            headers = {}
129        headers['Content-Type'] = 'multipart/%s; boundary="%s"' % (
130            subtype, self.boundary
131        )
132        self._write_headers(headers)
134    def open(self, headers=None, subtype='mixed', boundary=None):
135        self.fileobj.write(b'--')
136        self.fileobj.write(self.boundary.encode('utf-8'))
137        self.fileobj.write(CRLF)
138        return MultipartWriter(self.fileobj, headers=headers, subtype=subtype,
139                               boundary=boundary)
141    def add(self, mimetype, content, headers=None):
142        self.fileobj.write(b'--')
143        self.fileobj.write(self.boundary.encode('utf-8'))
144        self.fileobj.write(CRLF)
145        if headers is None:
146            headers = {}
148        ctype, params = parse_header(mimetype)
149        if isinstance(content, util.utype):
150            if 'charset' in params:
151                content = content.encode(params['charset'])
152            else:
153                content = content.encode('utf-8')
154                mimetype = mimetype + ';charset=utf-8'
156        headers['Content-Type'] = mimetype
157        if content:
158            headers['Content-Length'] = str(len(content))
159            hash = b64encode(md5(content).digest()).decode('ascii')
160            headers['Content-MD5'] = hash
161        self._write_headers(headers)
162        if content:
163            # XXX: throw an exception if a boundary appears in the content??
164            self.fileobj.write(content)
165            self.fileobj.write(CRLF)
167    def close(self):
168        self.fileobj.write(b'--')
169        self.fileobj.write(self.boundary.encode('ascii'))
170        self.fileobj.write(b'--')
171        self.fileobj.write(CRLF)
173    def _write_headers(self, headers):
174        if headers:
175            for name in sorted(headers.keys()):
176                value = headers[name]
177                if value.encode('ascii', 'ignore') != value.encode('utf-8'):
178                    value = header.make_header([(value, 'utf-8')]).encode()
179                self.fileobj.write(name.encode('utf-8'))
180                self.fileobj.write(b': ')
181                self.fileobj.write(value.encode('utf-8'))
182                self.fileobj.write(CRLF)
183        self.fileobj.write(CRLF)
185    def __enter__(self):
186        return self
188    def __exit__(self, exc_type, exc_val, exc_tb):
189        self.close()
192def write_multipart(fileobj, subtype='mixed', boundary=None):
193    r"""Simple streaming MIME multipart writer.
195    This function returns a `MultipartWriter` object that has a few methods to
196    control the nested MIME parts. For example, to write a flat multipart
197    envelope you call the ``add(mimetype, content, [headers])`` method for
198    every part, and finally call the ``close()`` method.
200    >>> from couchdb.util import StringIO
202    >>> buf = StringIO()
203    >>> envelope = write_multipart(buf, boundary='==123456789==')
204    >>> envelope.add('text/plain', b'Just testing')
205    >>> envelope.close()
206    >>> print(buf.getvalue().replace(b'\r\n', b'\n').decode('utf-8'))
207    Content-Type: multipart/mixed; boundary="==123456789=="
208    <BLANKLINE>
209    --==123456789==
210    Content-Length: 12
211    Content-MD5: nHmX4a6el41B06x2uCpglQ==
212    Content-Type: text/plain
213    <BLANKLINE>
214    Just testing
215    --==123456789==--
216    <BLANKLINE>
218    Note that an explicit boundary is only specified for testing purposes. If
219    the `boundary` parameter is omitted, the multipart writer will generate a
220    random string for the boundary.
222    To write nested structures, call the ``open([headers])`` method on the
223    respective envelope, and finish each envelope using the ``close()`` method:
225    >>> buf = StringIO()
226    >>> envelope = write_multipart(buf, boundary='==123456789==')
227    >>> part = envelope.open(boundary='==abcdefghi==')
228    >>> part.add('text/plain', u'Just testing')
229    >>> part.close()
230    >>> envelope.close()
231    >>> print(buf.getvalue().replace(b'\r\n', b'\n').decode('utf-8')) #:doctest +ELLIPSIS
232    Content-Type: multipart/mixed; boundary="==123456789=="
233    <BLANKLINE>
234    --==123456789==
235    Content-Type: multipart/mixed; boundary="==abcdefghi=="
236    <BLANKLINE>
237    --==abcdefghi==
238    Content-Length: 12
239    Content-MD5: nHmX4a6el41B06x2uCpglQ==
240    Content-Type: text/plain;charset=utf-8
241    <BLANKLINE>
242    Just testing
243    --==abcdefghi==--
244    --==123456789==--
245    <BLANKLINE>
247    :param fileobj: a writable file-like object that the output should get
248                    written to
249    :param subtype: the subtype of the multipart MIME type (e.g. "mixed")
250    :param boundary: the boundary to use to separate the different parts
251    :since: 0.6
252    """
253    return MultipartWriter(fileobj, subtype=subtype, boundary=boundary)