1# -*- coding: utf-8 -*- 2# 3# Copyright (C) 2008-2009 Christopher Lenz 4# All rights reserved. 5# 6# This software is licensed as described in the file COPYING, which 7# you should have received as part of this distribution. 8 9"""Support for streamed reading and writing of multipart MIME content.""" 10 11from base64 import b64encode 12from cgi import parse_header 13from email import header 14 15try: 16 from hashlib import md5 17except ImportError: 18 from md5 import new as md5 19 20import uuid 21 22from couchdb import util 23 24__all__ = ['read_multipart', 'write_multipart'] 25__docformat__ = 'restructuredtext en' 26 27 28CRLF = b'\r\n' 29 30 31def read_multipart(fileobj, boundary=None): 32 """Simple streaming MIME multipart parser. 33 34 This function takes a file-like object reading a MIME envelope, and yields 35 a ``(headers, is_multipart, payload)`` tuple for every part found, where 36 ``headers`` is a dictionary containing the MIME headers of that part (with 37 names lower-cased), ``is_multipart`` is a boolean indicating whether the 38 part is itself multipart, and ``payload`` is either a string (if 39 ``is_multipart`` is false), or an iterator over the nested parts. 40 41 Note that the iterator produced for nested multipart payloads MUST be fully 42 consumed, even if you wish to skip over the content. 43 44 :param fileobj: a file-like object 45 :param boundary: the part boundary string, will generally be determined 46 automatically from the headers of the outermost multipart 47 envelope 48 :return: an iterator over the parts 49 :since: 0.5 50 """ 51 headers = {} 52 buf = [] 53 outer = in_headers = boundary is None 54 55 next_boundary = boundary and ('--' + boundary + '\n').encode('ascii') or None 56 last_boundary = boundary and ('--' + boundary + '--\n').encode('ascii') or None 57 58 def _current_part(): 59 payload = b''.join(buf) 60 if payload.endswith(b'\r\n'): 61 payload = payload[:-2] 62 elif payload.endswith(b'\n'): 63 payload = payload[:-1] 64 content_md5 = headers.get(b'content-md5') 65 if content_md5: 66 h = b64encode(md5(payload).digest()) 67 if content_md5 != h: 68 raise ValueError('data integrity check failed') 69 return headers, False, payload 70 71 for line in fileobj: 72 if in_headers: 73 line = line.replace(CRLF, b'\n') 74 if line != b'\n': 75 name, value = [item.strip() for item in line.split(b':', 1)] 76 name = name.lower().decode('ascii') 77 value, charset = header.decode_header(value.decode('utf-8'))[0] 78 if charset is None: 79 headers[name] = value 80 else: 81 headers[name] = value.decode(charset) 82 else: 83 in_headers = False 84 mimetype, params = parse_header(headers.get('content-type')) 85 if mimetype.startswith('multipart/'): 86 sub_boundary = params['boundary'] 87 sub_parts = read_multipart(fileobj, boundary=sub_boundary) 88 if boundary is not None: 89 yield headers, True, sub_parts 90 headers.clear() 91 del buf[:] 92 else: 93 for part in sub_parts: 94 yield part 95 return 96 97 elif line.replace(CRLF, b'\n') == next_boundary: 98 # We've reached the start of a new part, as indicated by the 99 # boundary 100 if headers: 101 if not outer: 102 yield _current_part() 103 else: 104 outer = False 105 headers.clear() 106 del buf[:] 107 in_headers = True 108 109 elif line.replace(CRLF, b'\n') == last_boundary: 110 # We're done with this multipart envelope 111 break 112 113 else: 114 buf.append(line) 115 116 if not outer and headers: 117 yield _current_part() 118 119 120class MultipartWriter(object): 121 122 def __init__(self, fileobj, headers=None, subtype='mixed', boundary=None): 123 self.fileobj = fileobj 124 if boundary is None: 125 boundary = '==' + uuid.uuid4().hex + '==' 126 self.boundary = boundary 127 if headers is None: 128 headers = {} 129 headers['Content-Type'] = 'multipart/%s; boundary="%s"' % ( 130 subtype, self.boundary 131 ) 132 self._write_headers(headers) 133 134 def open(self, headers=None, subtype='mixed', boundary=None): 135 self.fileobj.write(b'--') 136 self.fileobj.write(self.boundary.encode('utf-8')) 137 self.fileobj.write(CRLF) 138 return MultipartWriter(self.fileobj, headers=headers, subtype=subtype, 139 boundary=boundary) 140 141 def add(self, mimetype, content, headers=None): 142 self.fileobj.write(b'--') 143 self.fileobj.write(self.boundary.encode('utf-8')) 144 self.fileobj.write(CRLF) 145 if headers is None: 146 headers = {} 147 148 ctype, params = parse_header(mimetype) 149 if isinstance(content, util.utype): 150 if 'charset' in params: 151 content = content.encode(params['charset']) 152 else: 153 content = content.encode('utf-8') 154 mimetype = mimetype + ';charset=utf-8' 155 156 headers['Content-Type'] = mimetype 157 if content: 158 headers['Content-Length'] = str(len(content)) 159 hash = b64encode(md5(content).digest()).decode('ascii') 160 headers['Content-MD5'] = hash 161 self._write_headers(headers) 162 if content: 163 # XXX: throw an exception if a boundary appears in the content?? 164 self.fileobj.write(content) 165 self.fileobj.write(CRLF) 166 167 def close(self): 168 self.fileobj.write(b'--') 169 self.fileobj.write(self.boundary.encode('ascii')) 170 self.fileobj.write(b'--') 171 self.fileobj.write(CRLF) 172 173 def _write_headers(self, headers): 174 if headers: 175 for name in sorted(headers.keys()): 176 value = headers[name] 177 if value.encode('ascii', 'ignore') != value.encode('utf-8'): 178 value = header.make_header([(value, 'utf-8')]).encode() 179 self.fileobj.write(name.encode('utf-8')) 180 self.fileobj.write(b': ') 181 self.fileobj.write(value.encode('utf-8')) 182 self.fileobj.write(CRLF) 183 self.fileobj.write(CRLF) 184 185 def __enter__(self): 186 return self 187 188 def __exit__(self, exc_type, exc_val, exc_tb): 189 self.close() 190 191 192def write_multipart(fileobj, subtype='mixed', boundary=None): 193 r"""Simple streaming MIME multipart writer. 194 195 This function returns a `MultipartWriter` object that has a few methods to 196 control the nested MIME parts. For example, to write a flat multipart 197 envelope you call the ``add(mimetype, content, [headers])`` method for 198 every part, and finally call the ``close()`` method. 199 200 >>> from couchdb.util import StringIO 201 202 >>> buf = StringIO() 203 >>> envelope = write_multipart(buf, boundary='==123456789==') 204 >>> envelope.add('text/plain', b'Just testing') 205 >>> envelope.close() 206 >>> print(buf.getvalue().replace(b'\r\n', b'\n').decode('utf-8')) 207 Content-Type: multipart/mixed; boundary="==123456789==" 208 <BLANKLINE> 209 --==123456789== 210 Content-Length: 12 211 Content-MD5: nHmX4a6el41B06x2uCpglQ== 212 Content-Type: text/plain 213 <BLANKLINE> 214 Just testing 215 --==123456789==-- 216 <BLANKLINE> 217 218 Note that an explicit boundary is only specified for testing purposes. If 219 the `boundary` parameter is omitted, the multipart writer will generate a 220 random string for the boundary. 221 222 To write nested structures, call the ``open([headers])`` method on the 223 respective envelope, and finish each envelope using the ``close()`` method: 224 225 >>> buf = StringIO() 226 >>> envelope = write_multipart(buf, boundary='==123456789==') 227 >>> part = envelope.open(boundary='==abcdefghi==') 228 >>> part.add('text/plain', u'Just testing') 229 >>> part.close() 230 >>> envelope.close() 231 >>> print(buf.getvalue().replace(b'\r\n', b'\n').decode('utf-8')) #:doctest +ELLIPSIS 232 Content-Type: multipart/mixed; boundary="==123456789==" 233 <BLANKLINE> 234 --==123456789== 235 Content-Type: multipart/mixed; boundary="==abcdefghi==" 236 <BLANKLINE> 237 --==abcdefghi== 238 Content-Length: 12 239 Content-MD5: nHmX4a6el41B06x2uCpglQ== 240 Content-Type: text/plain;charset=utf-8 241 <BLANKLINE> 242 Just testing 243 --==abcdefghi==-- 244 --==123456789==-- 245 <BLANKLINE> 246 247 :param fileobj: a writable file-like object that the output should get 248 written to 249 :param subtype: the subtype of the multipart MIME type (e.g. "mixed") 250 :param boundary: the boundary to use to separate the different parts 251 :since: 0.6 252 """ 253 return MultipartWriter(fileobj, subtype=subtype, boundary=boundary) 254