1# coding: utf-8 2# 3# Postfix queue control python tool (pymailq) 4# 5# Copyright (C) 2014 Denis Pompilio (jawa) <denis.pompilio@gmail.com> 6# 7# This file is part of pymailq 8# 9# This program is free software; you can redistribute it and/or 10# modify it under the terms of the GNU General Public License 11# as published by the Free Software Foundation; either version 2 12# of the License, or (at your option) any later version. 13# 14# This program is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17# GNU General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with this program; if not, see <http://www.gnu.org/licenses/>. 21 22from __future__ import unicode_literals 23 24import sys 25import os 26import gc 27import re 28import subprocess 29import email 30from email import header 31from collections import Counter 32from datetime import datetime, timedelta 33from pymailq import CONFIG, debug 34 35 36class MailHeaders(object): 37 """ 38 Simple object to store mail headers. 39 40 Object's attributes are dynamically created when parent :class:`~store.Mail` 41 object's method :meth:`~store.Mail.parse` is called. Those attributes are 42 retrieved with help of :func:`~email.message_from_string` method provided 43 by the :mod:`email` module. 44 45 Standard RFC *822-style* mail headers becomes attributes including but not 46 limited to: 47 48 - :mailheader:`Received` 49 - :mailheader:`From` 50 - :mailheader:`To` 51 - :mailheader:`Cc` 52 - :mailheader:`Bcc` 53 - :mailheader:`Sender` 54 - :mailheader:`Reply-To` 55 - :mailheader:`Subject` 56 57 Case is kept while creating attribute and access will be made with 58 :attr:`Mail.From` or :attr:`Mail.Received` for example. All those 59 attributes will return *list* of values. 60 61 .. seealso:: 62 63 Python modules: 64 :mod:`email` -- An email and MIME handling package 65 66 :class:`email.message.Message` -- Representing an email message 67 68 :rfc:`822` -- Standard for ARPA Internet Text Messages 69 """ 70 71 72class Mail(object): 73 """ 74 Simple object to manipulate email messages. 75 76 This class provides the necessary methods to load and inspect mails 77 content. This object functionnalities are mainly based on :mod:`email` 78 module's provided class and methods. However, 79 :class:`email.message.Message` instance's stored informations are 80 extracted to extend :class:`~store.Mail` instances attributes. 81 82 Initialization of :class:`~store.Mail` instances are made the following 83 way: 84 85 :param str mail_id: Mail's queue ID string 86 :param int size: Mail size in Bytes (Default: ``0``) 87 :param datetime.datetime date: Acceptance date and time in mails queue. 88 (Default: :data:`None`) 89 :param str sender: Mail sender string as seen in mails queue. 90 (Default: empty :func:`str`) 91 92 The :class:`~pymailq.Mail` class defines the following attributes: 93 94 .. attribute:: qid 95 96 Mail Postfix queue ID string, validated by 97 :meth:`~store.PostqueueStore._is_mail_id` method. 98 99 .. attribute:: size 100 101 Mail size in bytes. Expected type is :func:`int`. 102 103 .. attribute:: parsed 104 105 :func:`bool` value to track if mail's content has been loaded from 106 corresponding spool file. 107 108 .. attribute:: parse_error 109 110 Last encountered parse error message :func:`str`. 111 112 .. attribute:: date 113 114 :class:`~datetime.datetime` object of acceptance date and time in 115 mails queue. 116 117 .. attribute:: status 118 119 Mail's queue status :func:`str`. 120 121 .. attribute:: sender 122 123 Mail's sender :func:`str` as seen in mails queue. 124 125 .. attribute:: recipients 126 127 Recipients :func:`list` as seen in mails queue. 128 129 .. attribute:: errors 130 131 Mail deliver errors :func:`list` as seen in mails queue. 132 133 .. attribute:: head 134 135 Mail's headers :class:`~store.MailHeaders` structure. 136 137 .. attribute:: postcat_cmd 138 139 This property use Postfix mails content parsing command defined in 140 :attr:`pymailq.CONFIG` attribute under the key 'cat_message'. 141 Command and arguments list is build on call with the configuration 142 data. 143 144 .. seealso:: 145 146 :ref:`pymailq-configuration` 147 """ 148 149 def __init__(self, mail_id, size=0, date=None, sender=""): 150 """Init method""" 151 self.parsed = False 152 self.parse_error = "" 153 self.qid = mail_id 154 self.date = date 155 self.status = "" 156 self.size = int(size) 157 self.sender = sender 158 self.recipients = [] 159 self.errors = [] 160 self.head = MailHeaders() 161 162 # Getting optionnal status from postqueue mail_id 163 postqueue_status = {'*': "active", '!': "hold"} 164 if mail_id[-1] in postqueue_status: 165 self.qid = mail_id[:-1] 166 self.status = postqueue_status.get(mail_id[-1], "deferred") 167 168 @property 169 def postcat_cmd(self): 170 """ 171 Get the cat_message command from configuration 172 :return: Command as :class:`list` 173 """ 174 postcat_cmd = CONFIG['commands']['cat_message'] + [self.qid] 175 if CONFIG['commands']['use_sudo']: 176 postcat_cmd.insert(0, 'sudo') 177 return postcat_cmd 178 179 def show(self): 180 """ 181 Return mail detailled representation for printing 182 183 :return: Representation as :class:`str` 184 """ 185 output = "=== Mail %s ===\n" % (self.qid,) 186 for attr in sorted(dir(self.head)): 187 if attr.startswith("_"): 188 continue 189 190 value = getattr(self.head, attr) 191 if not isinstance(value, str): 192 value = ", ".join(value) 193 194 if attr == "Subject": 195 print(attr, value) 196 value, enc = header.decode_header(value)[0] 197 print(enc, attr, value) 198 if sys.version_info[0] == 2: 199 value = value.decode(enc) if enc else unicode(value) 200 201 output += "%s: %s\n" % (attr, value) 202 return output 203 204 @debug 205 def parse(self): 206 """ 207 Parse message content. 208 209 This method use Postfix mails content parsing command defined in 210 :attr:`~Mail.postcat_cmd` attribute. 211 This command is runned using :class:`subprocess.Popen` instance. 212 213 Parsed headers become attributes and are retrieved with help of 214 :func:`~email.message_from_string` function provided by the 215 :mod:`email` module. 216 217 .. seealso:: 218 219 Postfix manual: 220 `postcat`_ -- Show Postfix queue file contents 221 222 """ 223 # Reset parsing error message 224 self.parse_error = "" 225 226 child = subprocess.Popen(self.postcat_cmd, 227 stdout=subprocess.PIPE, 228 stderr=subprocess.PIPE) 229 stdout, stderr = child.communicate() 230 231 if not len(stdout): 232 # Ignore first 3 line on stderr which are: 233 # postcat: name_mask: all 234 # postcat: inet_addr_local: configured 3 IPv4 addresses 235 # postcat: inet_addr_local: configured 3 IPv6 addresses 236 self.parse_error = "\n".join(stderr.decode().split('\n')[3:]) 237 return 238 239 raw_content = "" 240 for line in stdout.decode('utf-8', errors='replace').split('\n'): 241 if self.size == 0 and line.startswith("message_size: "): 242 self.size = int(line[14:].strip().split()[0]) 243 elif self.date is None and line.startswith("create_time: "): 244 self.date = datetime.strptime(line[13:].strip(), 245 "%a %b %d %H:%M:%S %Y") 246 elif not len(self.sender) and line.startswith("sender: "): 247 self.sender = line[8:].strip() 248 elif line.startswith("regular_text: "): 249 raw_content += "%s\n" % (line[14:],) 250 251 # For python2.7 compatibility, encode unicode to str 252 if not isinstance(raw_content, str): 253 raw_content = raw_content.encode('utf-8') 254 255 message = email.message_from_string(raw_content) 256 257 for header in set(message.keys()): 258 value = message.get_all(header) 259 setattr(self.head, header, value) 260 261 self.parsed = True 262 263 @debug 264 def dump(self): 265 """ 266 Dump mail's gathered informations to a :class:`dict` object. 267 268 Mails informations are splitted in two parts in dictionnary. 269 ``postqueue`` key regroups every informations directly gathered from 270 Postfix queue, while ``headers`` regroups :class:`~store.MailHeaders` 271 attributes converted from mail content with the 272 :meth:`~store.Mail.parse` method. 273 274 If mail has not been parsed with the :meth:`~store.Mail.parse` method, 275 informations under the ``headers`` key will be empty. 276 277 :return: Mail gathered informations 278 :rtype: :class:`dict` 279 """ 280 datas = {'postqueue': {}, 281 'headers': {}} 282 283 for attr in self.__dict__: 284 if attr[0] != "_" and attr != 'head': 285 datas['postqueue'].update({attr: getattr(self, attr)}) 286 287 for header in self.head.__dict__: 288 if header[0] != "_": 289 datas['headers'].update({header: getattr(self.head, header)}) 290 291 return datas 292 293 294class PostqueueStore(object): 295 """ 296 Postfix mails queue informations storage. 297 298 The :class:`~store.PostqueueStore` provides methods to load Postfix 299 queued mails informations into Python structures. Thoses structures are 300 based on :class:`~store.Mail` and :class:`~store.MailHeaders` classes 301 which can be processed by a :class:`~selector.MailSelector` instance. 302 303 The :class:`~store.PostqueueStore` class defines the following attributes: 304 305 .. attribute:: mails 306 307 Loaded :class:`MailClass` objects :func:`list`. 308 309 .. attribute:: loaded_at 310 311 :class:`datetime.datetime` instance to store load date and time 312 informations, useful for datas deprecation tracking. Updated on 313 :meth:`~store.PostqueueStore.load` call with 314 :meth:`datetime.datetime.now` method. 315 316 .. attribute:: postqueue_cmd 317 318 :obj:`list` object to store Postfix command and arguments to view 319 the mails queue content. This property use Postfix mails content 320 parsing command defined in :attr:`pymailq.CONFIG` attribute under 321 the key 'list_queue'. Command and arguments list is build on call 322 with the configuration data. 323 324 .. attribute:: spool_path 325 326 Postfix spool path string. 327 Default is ``"/var/spool/postfix"``. 328 329 .. attribute:: postqueue_mailstatus 330 331 Postfix known queued mail status list. 332 Default is ``['active', 'deferred', 'hold']``. 333 334 .. attribute:: mail_id_re 335 336 Python compiled regular expression object (:class:`re.RegexObject`) 337 provided by :func:`re.compile` method to match postfix IDs. 338 Recognized IDs are hexadecimals, may be 10 to 12 chars length and 339 followed with ``*`` or ``!``. 340 Default used regular expression is: ``r"^[A-F0-9]{10,12}[*!]?$"``. 341 342 .. attribute:: mail_addr_re 343 344 Python compiled regular expression object (:class:`re.RegexObject`) 345 provided by :func:`re.compile` method to match email addresses. 346 Default used regular expression is: 347 ``r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]+$"`` 348 349 .. attribute:: MailClass 350 351 The class used to manipulate/parse mails individually. 352 Default is :class:`~store.Mail`. 353 354 .. seealso:: 355 356 Python modules: 357 :mod:`datetime` -- Basic date and time types 358 359 :mod:`re` -- Regular expression operations 360 361 Postfix manual: 362 `postqueue`_ -- Postfix queue control 363 364 :rfc:`3696` -- Checking and Transformation of Names 365 """ 366 postqueue_cmd = None 367 spool_path = None 368 postqueue_mailstatus = ['active', 'deferred', 'hold'] 369 mail_id_re = re.compile(r"^[A-F0-9]{10,12}[*!]?$") 370 mail_addr_re = re.compile(r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]+$") 371 MailClass = Mail 372 373 def __init__(self): 374 """Init method""" 375 self.spool_path = CONFIG['core']['postfix_spool'] 376 self.postqueue_cmd = CONFIG['commands']['list_queue'] 377 if CONFIG['commands']['use_sudo']: 378 self.postqueue_cmd.insert(0, 'sudo') 379 380 self.loaded_at = None 381 self.mails = [] 382 383 @debug 384 def _get_postqueue_output(self): 385 """ 386 Get Postfix postqueue command output. 387 388 This method used the postfix command defined in 389 :attr:`~PostqueueStore.postqueue_cmd` attribute to view the mails queue 390 content. 391 392 Command defined in :attr:`~PostqueueStore.postqueue_cmd` attribute is 393 runned using a :class:`subprocess.Popen` instance. 394 395 :return: Command's output lines. 396 :rtype: :func:`list` 397 398 .. seealso:: 399 400 Python module: 401 :mod:`subprocess` -- Subprocess management 402 """ 403 child = subprocess.Popen(self.postqueue_cmd, 404 stdout=subprocess.PIPE) 405 stdout = child.communicate()[0] 406 407 # return lines list without the headers and footers 408 return [line.strip() for line in stdout.decode().split('\n')][1:-2] 409 410 def _is_mail_id(self, mail_id): 411 """ 412 Check mail_id for a valid postfix queued mail ID. 413 414 Validation is made using a :class:`re.RegexObject` stored in 415 the :attr:`~PostqueueStore.mail_id_re` attribute of the 416 :class:`~store.PostqueueStore` instance. 417 418 :param str mail_id: Mail Postfix queue ID string 419 :return: True or false 420 :rtype: :func:`bool` 421 """ 422 423 if self.mail_id_re.match(mail_id) is None: 424 return False 425 return True 426 427 @debug 428 def _load_from_postqueue(self, filename=None): 429 """ 430 Load content from postfix queue using postqueue command output. 431 432 Output lines from :attr:`~store.PostqueueStore._get_postqueue_output` 433 are parsed to build :class:`~store.Mail` objects. Sample Postfix queue 434 control tool (`postqueue`_) output:: 435 436 C0004979687 4769 Tue Apr 29 06:35:05 sender@domain.com 437 (error message from mx.remote1.org with parenthesis) 438 first.rcpt@remote1.org 439 (error message from mx.remote2.org with parenthesis) 440 second.rcpt@remote2.org 441 third.rcpt@remote2.org 442 443 Parsing rules are pretty simple: 444 445 - Line starts with a valid :attr:`Mail.qid`: create new 446 :class:`~store.Mail` object with :attr:`~Mail.qid`, 447 :attr:`~Mail.size`, :attr:`~Mail.date` and :attr:`~Mail.sender` 448 informations from line. 449 450 +-------------+------+---------------------------+-----------------+ 451 | Queue ID | Size | Reception date and time | Sender | 452 +-------------+------+-----+-----+----+----------+-----------------+ 453 | C0004979687 | 4769 | Tue | Apr | 29 | 06:35:05 | user@domain.com | 454 +-------------+------+-----+-----+----+----------+-----------------+ 455 456 - Line starts with a parenthesis: store error messages to last created 457 :class:`~store.Mail` object's :attr:`~Mail.errors` attribute. 458 459 - Any other matches: add new recipient to the :attr:`~Mail.recipients` 460 attribute of the last created :class:`~store.Mail` object. 461 462 Optionnal argument ``filename`` can be set with a file containing 463 output of the `postqueue`_ command. In this case, output lines of 464 `postqueue`_ command are directly read from ``filename`` and parsed, 465 the `postqueue`_ command is never used. 466 """ 467 if filename is None: 468 postqueue_output = self._get_postqueue_output() 469 else: 470 postqueue_output = open(filename).readlines() 471 472 mail = None 473 for line in postqueue_output: 474 line = line.strip() 475 476 # Headers and footers start with dash (-) 477 if line.startswith('-'): 478 continue 479 # Mails are blank line separated 480 if not len(line): 481 continue 482 483 fields = line.split() 484 if "(" == fields[0][0]: 485 # Store error message without parenthesis: [1:-1] 486 # gathered errors must be associated with specific recipients 487 # TODO: change recipients or errors structures to link these 488 # objects together. 489 mail.errors.append(" ".join(fields)[1:-1]) 490 else: 491 if self._is_mail_id(fields[0]): 492 # postfix does not precise year in mails timestamps so 493 # we consider mails have been sent this year. 494 # If gathered date is in the future: 495 # mail has been received last year (or NTP problem). 496 now = datetime.now() 497 datestr = "{0} {1}".format(" ".join(fields[2:-1]), now.year) 498 date = datetime.strptime(datestr, "%a %b %d %H:%M:%S %Y") 499 if date > now: 500 date = date - timedelta(days=365) 501 502 mail = self.MailClass(fields[0], size=fields[1], 503 date=date, 504 sender=fields[-1]) 505 self.mails.append(mail) 506 else: 507 # Email address validity check can be tricky. RFC3696 talks 508 # about. Fow now, we use a simple regular expression to 509 # match most of email addresses. 510 rcpt_email_addr = " ".join(fields) 511 if self.mail_addr_re.match(rcpt_email_addr): 512 mail.recipients.append(rcpt_email_addr) 513 514 @debug 515 def _load_from_spool(self): 516 """ 517 Load content from postfix queue using files from spool. 518 519 Mails are loaded using the command defined in 520 :attr:`~PostqueueStore.postqueue_cmd` attribute. Some informations may 521 be missing using the :meth:`~store.PostqueueStore._load_from_spool` 522 method, including at least :attr:`Mail.status` field. 523 524 Loaded mails are stored as :class:`~store.Mail` objects in 525 :attr:`~PostqueueStore.mails` attribute. 526 527 .. warning:: 528 529 Be aware that parsing mails on disk is slow and can lead to 530 high load usage on system with large mails queue. 531 """ 532 for status in self.postqueue_mailstatus: 533 for fs_data in os.walk("%s/%s" % (self.spool_path, status)): 534 for mail_id in fs_data[2]: 535 mail = self.MailClass(mail_id) 536 mail.status = status 537 538 mail.parse() 539 540 self.mails.append(mail) 541 542 @debug 543 def _load_from_file(self, filename): 544 """Unimplemented method""" 545 546 @debug 547 def load(self, method="postqueue", filename=None): 548 """ 549 Load content from postfix mails queue. 550 551 Mails are loaded using postqueue command line tool or reading directly 552 from spool. The optionnal argument, if present, is a method string and 553 specifies the method used to gather mails informations. By default, 554 method is set to ``"postqueue"`` and the standard Postfix queue 555 control tool: `postqueue`_ is used. 556 557 :param str method: Method used to load mails from Postfix queue 558 :param str filename: File to load mails from 559 560 Provided method :func:`str` name is directly used with :func:`getattr` 561 to find a *self._load_from_<method>* method. 562 """ 563 # releasing memory 564 del self.mails 565 gc.collect() 566 567 self.mails = [] 568 if filename is None: 569 getattr(self, "_load_from_{0}".format(method))() 570 else: 571 getattr(self, "_load_from_{0}".format(method))(filename) 572 self.loaded_at = datetime.now() 573 574 @debug 575 def summary(self): 576 """ 577 Summarize the mails queue content. 578 579 :return: Mail queue summary as :class:`dict` 580 581 Sizes are in bytes. 582 583 Example response:: 584 585 { 586 'total_mails': 500, 587 'total_mails_size': 709750, 588 'average_mail_size': 1419.5, 589 'max_mail_size': 2414, 590 'min_mail_size': 423, 591 'top_errors': [ 592 ('mail transport unavailable', 484), 593 ('Test error message', 16) 594 ], 595 'top_recipient_domains': [ 596 ('test-domain.tld', 500) 597 ], 598 'top_recipients': [ 599 ('user-3@test-domain.tld', 200), 600 ('user-2@test-domain.tld', 200), 601 ('user-1@test-domain.tld', 100) 602 ], 603 'top_sender_domains': [ 604 ('test-domain.tld', 500) 605 ], 606 'top_senders': [ 607 ('sender-1@test-domain.tld', 100), 608 ('sender-2@test-domain.tld', 100), 609 ('sender-7@test-domain.tld', 50), 610 ('sender-4@test-domain.tld', 50), 611 ('sender-5@test-domain.tld', 50) 612 ], 613 'top_status': [ 614 ('deferred', 500), 615 ('active', 0), 616 ('hold', 0) 617 ], 618 'unique_recipient_domains': 1, 619 'unique_recipients': 3, 620 'unique_sender_domains': 1, 621 'unique_senders': 8 622 } 623 """ 624 senders = Counter() 625 sender_domains = Counter() 626 recipients = Counter() 627 recipient_domains = Counter() 628 status = Counter(active=0, hold=0, deferred=0) 629 errors = Counter() 630 total_mails_size = 0 631 average_mail_size = 0 632 max_mail_size = 0 633 min_mail_size = 0 634 mails_by_age = { 635 'last_24h': 0, 636 '1_to_4_days_ago': 0, 637 'older_than_4_days': 0 638 } 639 640 for mail in self.mails: 641 status[mail.status] += 1 642 senders[mail.sender] += 1 643 if '@' in mail.sender: 644 sender_domains[mail.sender.split('@', 1)[1]] += 1 645 for recipient in mail.recipients: 646 recipients[recipient] += 1 647 if '@' in recipient: 648 recipient_domains[recipient.split('@', 1)[1]] += 1 649 for error in mail.errors: 650 errors[error] += 1 651 total_mails_size += mail.size 652 if mail.size > max_mail_size: 653 max_mail_size = mail.size 654 if min_mail_size == 0: 655 min_mail_size = mail.size 656 elif mail.size < min_mail_size: 657 min_mail_size = mail.size 658 659 mail_age = datetime.now() - mail.date 660 if mail_age.days >= 4: 661 mails_by_age['older_than_4_days'] += 1 662 elif mail_age.days == 1: 663 mails_by_age['1_to_4_days_ago'] += 1 664 elif mail_age.days == 0: 665 mails_by_age['last_24h'] += 1 666 667 if len(self.mails): 668 average_mail_size = total_mails_size / len(self.mails) 669 670 summary = { 671 'total_mails': len(self.mails), 672 'mails_by_age': mails_by_age, 673 'total_mails_size': total_mails_size, 674 'average_mail_size': average_mail_size, 675 'max_mail_size': max_mail_size, 676 'min_mail_size': min_mail_size, 677 'top_status': status.most_common()[:5], 678 'unique_senders': len(list(senders)), 679 'unique_sender_domains': len(list(sender_domains)), 680 'unique_recipients': len(list(recipients)), 681 'unique_recipient_domains': len(list(recipient_domains)), 682 'top_senders': senders.most_common()[:5], 683 'top_sender_domains': sender_domains.most_common()[:5], 684 'top_recipients': recipients.most_common()[:5], 685 'top_recipient_domains': recipient_domains.most_common()[:5], 686 'top_errors': errors.most_common()[:5] 687 } 688 return summary 689