1#!/usr/bin/env python 2 3""" 4Models declaration for application ``django_mailbox``. 5""" 6import gzip 7from email.encoders import encode_base64 8from email.message import Message as EmailMessage 9from email.utils import formatdate, parseaddr 10from urllib.parse import parse_qs, unquote, urlparse 11from quopri import encode as encode_quopri 12from io import BytesIO 13import base64 14import email 15import logging 16import mimetypes 17import os.path 18import sys 19import uuid 20from tempfile import NamedTemporaryFile 21 22import django 23from django.conf import settings as django_settings 24from django.core.files.base import ContentFile, File 25from django.core.mail.message import make_msgid 26from django.db import models 27from django.utils.translation import gettext_lazy as _ 28from django.utils.timezone import now 29 30from django_mailbox import utils 31from django_mailbox.signals import message_received 32from django_mailbox.transports import Pop3Transport, ImapTransport, \ 33 MaildirTransport, MboxTransport, BabylTransport, MHTransport, \ 34 MMDFTransport, GmailImapTransport 35 36logger = logging.getLogger(__name__) 37 38 39class MailboxQuerySet(models.QuerySet): 40 def get_new_mail(self): 41 count = 0 42 for mailbox in self.all(): 43 logger.debug('Receiving mail for %s' % mailbox) 44 count += sum(1 for i in mailbox.get_new_mail()) 45 logger.debug('Received %d %s.', count, 'mails' if count != 1 else 'mail') 46 47 48class MailboxManager(models.Manager): 49 def get_queryset(self): 50 return MailboxQuerySet(self.model, using=self._db) 51 52 53class ActiveMailboxManager(MailboxManager): 54 def get_queryset(self): 55 return super().get_queryset().filter( 56 active=True, 57 ) 58 59 60class Mailbox(models.Model): 61 name = models.CharField( 62 _('Name'), 63 max_length=255, 64 ) 65 66 uri = models.CharField( 67 _('URI'), 68 max_length=255, 69 help_text=(_( 70 "Example: imap+ssl://myusername:mypassword@someserver <br />" 71 "<br />" 72 "Internet transports include 'imap' and 'pop3'; " 73 "common local file transports include 'maildir', 'mbox', " 74 "and less commonly 'babyl', 'mh', and 'mmdf'. <br />" 75 "<br />" 76 "Be sure to urlencode your username and password should they " 77 "contain illegal characters (like @, :, etc)." 78 )), 79 blank=True, 80 null=True, 81 default=None, 82 ) 83 84 from_email = models.CharField( 85 _('From email'), 86 max_length=255, 87 help_text=(_( 88 "Example: MailBot <mailbot@yourdomain.com><br />" 89 "'From' header to set for outgoing email.<br />" 90 "<br />" 91 "If you do not use this e-mail inbox for outgoing mail, this " 92 "setting is unnecessary.<br />" 93 "If you send e-mail without setting this, your 'From' header will'" 94 "be set to match the setting `DEFAULT_FROM_EMAIL`." 95 )), 96 blank=True, 97 null=True, 98 default=None, 99 ) 100 101 active = models.BooleanField( 102 _('Active'), 103 help_text=(_( 104 "Check this e-mail inbox for new e-mail messages during polling " 105 "cycles. This checkbox does not have an effect upon whether " 106 "mail is collected here when this mailbox receives mail from a " 107 "pipe, and does not affect whether e-mail messages can be " 108 "dispatched from this mailbox. " 109 )), 110 blank=True, 111 default=True, 112 ) 113 114 last_polling = models.DateTimeField( 115 _("Last polling"), 116 help_text=(_("The time of last successful polling for messages." 117 "It is blank for new mailboxes and is not set for " 118 "mailboxes that only receive messages via a pipe.")), 119 blank=True, 120 null=True 121 ) 122 123 objects = MailboxManager() 124 active_mailboxes = ActiveMailboxManager() 125 126 @property 127 def _protocol_info(self): 128 return urlparse(self.uri) 129 130 @property 131 def _query_string(self): 132 return parse_qs(self._protocol_info.query) 133 134 @property 135 def _domain(self): 136 return self._protocol_info.hostname 137 138 @property 139 def port(self): 140 """Returns the port to use for fetching messages.""" 141 return self._protocol_info.port 142 143 @property 144 def username(self): 145 """Returns the username to use for fetching messages.""" 146 return unquote(self._protocol_info.username) 147 148 @property 149 def password(self): 150 """Returns the password to use for fetching messages.""" 151 return unquote(self._protocol_info.password) 152 153 @property 154 def location(self): 155 """Returns the location (domain and path) of messages.""" 156 return self._domain if self._domain else '' + self._protocol_info.path 157 158 @property 159 def type(self): 160 """Returns the 'transport' name for this mailbox.""" 161 scheme = self._protocol_info.scheme.lower() 162 if '+' in scheme: 163 return scheme.split('+')[0] 164 return scheme 165 166 @property 167 def use_ssl(self): 168 """Returns whether or not this mailbox's connection uses SSL.""" 169 return '+ssl' in self._protocol_info.scheme.lower() 170 171 @property 172 def use_tls(self): 173 """Returns whether or not this mailbox's connection uses STARTTLS.""" 174 return '+tls' in self._protocol_info.scheme.lower() 175 176 @property 177 def archive(self): 178 """Returns (if specified) the folder to archive messages to.""" 179 archive_folder = self._query_string.get('archive', None) 180 if not archive_folder: 181 return None 182 return archive_folder[0] 183 184 @property 185 def folder(self): 186 """Returns (if specified) the folder to fetch mail from.""" 187 folder = self._query_string.get('folder', None) 188 if not folder: 189 return None 190 return folder[0] 191 192 def get_connection(self): 193 """Returns the transport instance for this mailbox. 194 195 These will always be instances of 196 `django_mailbox.transports.base.EmailTransport`. 197 198 """ 199 if not self.uri: 200 return None 201 elif self.type == 'imap': 202 conn = ImapTransport( 203 self.location, 204 port=self.port if self.port else None, 205 ssl=self.use_ssl, 206 tls=self.use_tls, 207 archive=self.archive, 208 folder=self.folder 209 ) 210 conn.connect(self.username, self.password) 211 elif self.type == 'gmail': 212 conn = GmailImapTransport( 213 self.location, 214 port=self.port if self.port else None, 215 ssl=True, 216 archive=self.archive 217 ) 218 conn.connect(self.username, self.password) 219 elif self.type == 'pop3': 220 conn = Pop3Transport( 221 self.location, 222 port=self.port if self.port else None, 223 ssl=self.use_ssl 224 ) 225 conn.connect(self.username, self.password) 226 elif self.type == 'maildir': 227 conn = MaildirTransport(self.location) 228 elif self.type == 'mbox': 229 conn = MboxTransport(self.location) 230 elif self.type == 'babyl': 231 conn = BabylTransport(self.location) 232 elif self.type == 'mh': 233 conn = MHTransport(self.location) 234 elif self.type == 'mmdf': 235 conn = MMDFTransport(self.location) 236 return conn 237 238 def process_incoming_message(self, message): 239 """Process a message incoming to this mailbox.""" 240 msg = self._process_message(message) 241 if msg is None: 242 return None 243 msg.outgoing = False 244 msg.save() 245 246 message_received.send(sender=self, message=msg) 247 248 return msg 249 250 def record_outgoing_message(self, message): 251 """Record an outgoing message associated with this mailbox.""" 252 msg = self._process_message(message) 253 if msg is None: 254 return None 255 msg.outgoing = True 256 msg.save() 257 return msg 258 259 def _get_dehydrated_message(self, msg, record): 260 settings = utils.get_settings() 261 262 new = EmailMessage() 263 if msg.is_multipart(): 264 for header, value in msg.items(): 265 new[header] = value 266 for part in msg.get_payload(): 267 new.attach( 268 self._get_dehydrated_message(part, record) 269 ) 270 elif ( 271 settings['strip_unallowed_mimetypes'] 272 and not msg.get_content_type() in settings['allowed_mimetypes'] 273 ): 274 for header, value in msg.items(): 275 new[header] = value 276 # Delete header, otherwise when attempting to deserialize the 277 # payload, it will be expecting a body for this. 278 del new['Content-Transfer-Encoding'] 279 new[settings['altered_message_header']] = ( 280 'Stripped; Content type %s not allowed' % ( 281 msg.get_content_type() 282 ) 283 ) 284 new.set_payload('') 285 elif ( 286 ( 287 msg.get_content_type() not in settings['text_stored_mimetypes'] 288 ) or 289 ('attachment' in msg.get('Content-Disposition', '')) 290 ): 291 filename = None 292 raw_filename = msg.get_filename() 293 if raw_filename: 294 filename = utils.convert_header_to_unicode(raw_filename) 295 if not filename: 296 extension = mimetypes.guess_extension(msg.get_content_type()) 297 else: 298 _, extension = os.path.splitext(filename) 299 if not extension: 300 extension = '.bin' 301 302 attachment = MessageAttachment() 303 304 attachment.document.save( 305 uuid.uuid4().hex + extension, 306 ContentFile( 307 BytesIO( 308 msg.get_payload(decode=True) 309 ).getvalue() 310 ) 311 ) 312 attachment.message = record 313 for key, value in msg.items(): 314 attachment[key] = value 315 attachment.save() 316 317 placeholder = EmailMessage() 318 placeholder[ 319 settings['attachment_interpolation_header'] 320 ] = str(attachment.pk) 321 new = placeholder 322 else: 323 content_charset = msg.get_content_charset() 324 if not content_charset: 325 content_charset = 'ascii' 326 try: 327 # Make sure that the payload can be properly decoded in the 328 # defined charset, if it can't, let's mash some things 329 # inside the payload :-\ 330 msg.get_payload(decode=True).decode(content_charset) 331 except LookupError: 332 logger.warning( 333 "Unknown encoding %s; interpreting as ASCII!", 334 content_charset 335 ) 336 msg.set_payload( 337 msg.get_payload(decode=True).decode( 338 'ascii', 339 'ignore' 340 ) 341 ) 342 except ValueError: 343 logger.warning( 344 "Decoding error encountered; interpreting %s as ASCII!", 345 content_charset 346 ) 347 msg.set_payload( 348 msg.get_payload(decode=True).decode( 349 'ascii', 350 'ignore' 351 ) 352 ) 353 new = msg 354 return new 355 356 def _process_message(self, message): 357 msg = Message() 358 msg._email_object = message 359 settings = utils.get_settings() 360 361 if settings['store_original_message']: 362 self._process_save_original_message(message, msg) 363 msg.mailbox = self 364 if 'subject' in message: 365 msg.subject = ( 366 utils.convert_header_to_unicode(message['subject'])[0:255] 367 ) 368 if 'message-id' in message: 369 msg.message_id = message['message-id'][0:255].strip() 370 if 'from' in message: 371 msg.from_header = utils.convert_header_to_unicode(message['from']) 372 if 'to' in message: 373 msg.to_header = utils.convert_header_to_unicode(message['to']) 374 elif 'Delivered-To' in message: 375 msg.to_header = utils.convert_header_to_unicode( 376 message['Delivered-To'] 377 ) 378 msg.save() 379 message = self._get_dehydrated_message(message, msg) 380 try: 381 body = message.as_string() 382 except KeyError as exc: 383 # email.message.replace_header may raise 'KeyError' if the header 384 # 'content-transfer-encoding' is missing 385 logger.warning("Failed to parse message: %s", exc,) 386 return None 387 msg.set_body(body) 388 if message['in-reply-to']: 389 try: 390 msg.in_reply_to = Message.objects.filter( 391 message_id=message['in-reply-to'].strip() 392 )[0] 393 except IndexError: 394 pass 395 msg.save() 396 return msg 397 398 def _process_save_original_message(self, message, msg): 399 settings = utils.get_settings() 400 if settings['compress_original_message']: 401 with NamedTemporaryFile(suffix=".eml.gz") as fp_tmp: 402 with gzip.GzipFile(fileobj=fp_tmp, mode="w") as fp: 403 fp.write(message.as_string().encode('utf-8')) 404 msg.eml.save( 405 "{}.eml.gz".format(uuid.uuid4()), 406 File(fp_tmp), 407 save=False 408 ) 409 410 else: 411 msg.eml.save( 412 '%s.eml' % uuid.uuid4(), 413 ContentFile(message.as_string()), 414 save=False 415 ) 416 417 def get_new_mail(self, condition=None): 418 """Connect to this transport and fetch new messages.""" 419 new_mail = [] 420 connection = self.get_connection() 421 if not connection: 422 return 423 for message in connection.get_message(condition): 424 msg = self.process_incoming_message(message) 425 if not msg is None: 426 yield msg 427 self.last_polling = now() 428 if django.VERSION >= (1, 5): # Django 1.5 introduces update_fields 429 self.save(update_fields=['last_polling']) 430 else: 431 self.save() 432 433 def __str__(self): 434 return self.name 435 436 class Meta: 437 verbose_name = _('Mailbox') 438 verbose_name_plural = _('Mailboxes') 439 440 441class IncomingMessageManager(models.Manager): 442 def get_queryset(self): 443 return super().get_queryset().filter( 444 outgoing=False, 445 ) 446 447 448class OutgoingMessageManager(models.Manager): 449 def get_queryset(self): 450 return super().get_queryset().filter( 451 outgoing=True, 452 ) 453 454 455class UnreadMessageManager(models.Manager): 456 def get_queryset(self): 457 return super().get_queryset().filter( 458 read=None 459 ) 460 461 462class Message(models.Model): 463 mailbox = models.ForeignKey( 464 Mailbox, 465 related_name='messages', 466 verbose_name=_('Mailbox'), 467 on_delete=models.CASCADE 468 ) 469 470 subject = models.CharField( 471 _('Subject'), 472 max_length=255 473 ) 474 475 message_id = models.CharField( 476 _('Message ID'), 477 max_length=255 478 ) 479 480 in_reply_to = models.ForeignKey( 481 'django_mailbox.Message', 482 related_name='replies', 483 blank=True, 484 null=True, 485 verbose_name=_('In reply to'), 486 on_delete=models.CASCADE 487 ) 488 489 from_header = models.CharField( 490 _('From header'), 491 max_length=255, 492 ) 493 494 to_header = models.TextField( 495 _('To header'), 496 ) 497 498 outgoing = models.BooleanField( 499 _('Outgoing'), 500 default=False, 501 blank=True, 502 ) 503 504 body = models.TextField( 505 _('Body'), 506 ) 507 508 encoded = models.BooleanField( 509 _('Encoded'), 510 default=False, 511 help_text=_('True if the e-mail body is Base64 encoded'), 512 ) 513 514 processed = models.DateTimeField( 515 _('Processed'), 516 auto_now_add=True 517 ) 518 519 read = models.DateTimeField( 520 _('Read'), 521 default=None, 522 blank=True, 523 null=True, 524 ) 525 526 eml = models.FileField( 527 _('Raw message contents'), 528 null=True, 529 upload_to="messages", 530 help_text=_('Original full content of message') 531 ) 532 objects = models.Manager() 533 unread_messages = UnreadMessageManager() 534 incoming_messages = IncomingMessageManager() 535 outgoing_messages = OutgoingMessageManager() 536 537 @property 538 def address(self): 539 """Property allowing one to get the relevant address(es). 540 541 In earlier versions of this library, the model had an `address` field 542 storing the e-mail address from which a message was received. During 543 later refactorings, it became clear that perhaps storing sent messages 544 would also be useful, so the address field was replaced with two 545 separate fields. 546 547 """ 548 addresses = [] 549 addresses = self.to_addresses + self.from_address 550 return addresses 551 552 @property 553 def from_address(self): 554 """Returns the address (as a list) from which this message was received 555 556 .. note:: 557 558 This was once (and probably should be) a string rather than a list, 559 but in a pull request received long, long ago it was changed; 560 presumably to make the interface identical to that of 561 `to_addresses`. 562 563 """ 564 if self.from_header: 565 return [parseaddr(self.from_header)[1].lower()] 566 else: 567 return [] 568 569 @property 570 def to_addresses(self): 571 """Returns a list of addresses to which this message was sent.""" 572 addresses = [] 573 for address in self.to_header.split(','): 574 if address: 575 addresses.append( 576 parseaddr( 577 address 578 )[1].lower() 579 ) 580 return addresses 581 582 def reply(self, message): 583 """Sends a message as a reply to this message instance. 584 585 Although Django's e-mail processing will set both Message-ID 586 and Date upon generating the e-mail message, we will not be able 587 to retrieve that information through normal channels, so we must 588 pre-set it. 589 590 """ 591 if not message.from_email: 592 if self.mailbox.from_email: 593 message.from_email = self.mailbox.from_email 594 else: 595 message.from_email = django_settings.DEFAULT_FROM_EMAIL 596 message.extra_headers['Message-ID'] = make_msgid() 597 message.extra_headers['Date'] = formatdate() 598 message.extra_headers['In-Reply-To'] = self.message_id.strip() 599 message.send() 600 return self.mailbox.record_outgoing_message( 601 email.message_from_string( 602 message.message().as_string() 603 ) 604 ) 605 606 @property 607 def text(self): 608 """ 609 Returns the message body matching content type 'text/plain'. 610 """ 611 return utils.get_body_from_message( 612 self.get_email_object(), 'text', 'plain' 613 ).replace('=\n', '').strip() 614 615 @property 616 def html(self): 617 """ 618 Returns the message body matching content type 'text/html'. 619 """ 620 return utils.get_body_from_message( 621 self.get_email_object(), 'text', 'html' 622 ).replace('\n', '').strip() 623 624 def _rehydrate(self, msg): 625 new = EmailMessage() 626 settings = utils.get_settings() 627 628 if msg.is_multipart(): 629 for header, value in msg.items(): 630 new[header] = value 631 for part in msg.get_payload(): 632 new.attach( 633 self._rehydrate(part) 634 ) 635 elif settings['attachment_interpolation_header'] in msg.keys(): 636 try: 637 attachment = MessageAttachment.objects.get( 638 pk=msg[settings['attachment_interpolation_header']] 639 ) 640 for header, value in attachment.items(): 641 new[header] = value 642 encoding = new['Content-Transfer-Encoding'] 643 if encoding and encoding.lower() == 'quoted-printable': 644 # Cannot use `email.encoders.encode_quopri due to 645 # bug 14360: http://bugs.python.org/issue14360 646 output = BytesIO() 647 encode_quopri( 648 BytesIO( 649 attachment.document.read() 650 ), 651 output, 652 quotetabs=True, 653 header=False, 654 ) 655 new.set_payload( 656 output.getvalue().decode().replace(' ', '=20') 657 ) 658 del new['Content-Transfer-Encoding'] 659 new['Content-Transfer-Encoding'] = 'quoted-printable' 660 else: 661 new.set_payload( 662 attachment.document.read() 663 ) 664 del new['Content-Transfer-Encoding'] 665 encode_base64(new) 666 except MessageAttachment.DoesNotExist: 667 new[settings['altered_message_header']] = ( 668 'Missing; Attachment %s not found' % ( 669 msg[settings['attachment_interpolation_header']] 670 ) 671 ) 672 new.set_payload('') 673 else: 674 for header, value in msg.items(): 675 new[header] = value 676 new.set_payload( 677 msg.get_payload() 678 ) 679 return new 680 681 def get_body(self): 682 """Returns the `body` field of this record. 683 684 This will automatically base64-decode the message contents 685 if they are encoded as such. 686 687 """ 688 if self.encoded: 689 return base64.b64decode(self.body.encode('ascii')) 690 return self.body.encode('utf-8') 691 692 def set_body(self, body): 693 """Set the `body` field of this record. 694 695 This will automatically base64-encode the message contents to 696 circumvent a limitation in earlier versions of Django in which 697 no fields existed for storing arbitrary bytes. 698 699 """ 700 self.encoded = True 701 self.body = base64.b64encode(body.encode('utf-8')).decode('ascii') 702 703 def get_email_object(self): 704 """Returns an `email.message.EmailMessage` instance representing the 705 contents of this message and all attachments. 706 707 See [email.message.EmailMessage]_ for more information as to what methods 708 and properties are available on `email.message.EmailMessage` instances. 709 710 .. note:: 711 712 Depending upon the storage methods in use (specifically -- 713 whether ``DJANGO_MAILBOX_STORE_ORIGINAL_MESSAGE`` is set 714 to ``True``, this may either create a "rehydrated" message 715 using stored attachments, or read the message contents stored 716 on-disk. 717 718 .. [email.message.EmailMessage] Python's `email.message.EmailMessage` docs 719 (https://docs.python.org/3/library/email.message.html) 720 721 """ 722 if not hasattr(self, '_email_object'): # Cache fill 723 if self.eml: 724 if self.eml.name.endswith('.gz'): 725 body = gzip.GzipFile(fileobj=self.eml).read() 726 else: 727 self.eml.open() 728 body = self.eml.file.read() 729 self.eml.close() 730 else: 731 body = self.get_body() 732 flat = email.message_from_bytes(body) 733 self._email_object = self._rehydrate(flat) 734 return self._email_object 735 736 def delete(self, *args, **kwargs): 737 """Delete this message and all stored attachments.""" 738 for attachment in self.attachments.all(): 739 # This attachment is attached only to this message. 740 attachment.delete() 741 return super().delete(*args, **kwargs) 742 743 def __str__(self): 744 return self.subject 745 746 class Meta: 747 verbose_name = _('E-mail message') 748 verbose_name_plural = _('E-mail messages') 749 750 751class MessageAttachment(models.Model): 752 message = models.ForeignKey( 753 Message, 754 related_name='attachments', 755 null=True, 756 blank=True, 757 verbose_name=_('Message'), 758 on_delete=models.CASCADE 759 ) 760 761 headers = models.TextField( 762 _('Headers'), 763 null=True, 764 blank=True, 765 ) 766 767 document = models.FileField( 768 _('Document'), 769 upload_to=utils.get_attachment_save_path, 770 ) 771 772 def delete(self, *args, **kwargs): 773 """Deletes the attachment.""" 774 self.document.delete() 775 return super().delete(*args, **kwargs) 776 777 def _get_rehydrated_headers(self): 778 headers = self.headers 779 if headers is None: 780 return EmailMessage() 781 return email.message_from_string(headers) 782 783 def _set_dehydrated_headers(self, email_object): 784 self.headers = email_object.as_string() 785 786 def __delitem__(self, name): 787 rehydrated = self._get_rehydrated_headers() 788 del rehydrated[name] 789 self._set_dehydrated_headers(rehydrated) 790 791 def __setitem__(self, name, value): 792 rehydrated = self._get_rehydrated_headers() 793 rehydrated[name] = value 794 self._set_dehydrated_headers(rehydrated) 795 796 def get_filename(self): 797 """Returns the original filename of this attachment.""" 798 file_name = self._get_rehydrated_headers().get_filename() 799 if isinstance(file_name, str): 800 result = utils.convert_header_to_unicode(file_name) 801 if result is None: 802 return file_name 803 return result 804 else: 805 return None 806 807 def items(self): 808 return self._get_rehydrated_headers().items() 809 810 def __getitem__(self, name): 811 value = self._get_rehydrated_headers()[name] 812 if value is None: 813 raise KeyError('Header %s does not exist' % name) 814 return value 815 816 def __str__(self): 817 return self.document.url 818 819 class Meta: 820 verbose_name = _('Message attachment') 821 verbose_name_plural = _('Message attachments') 822