1# coding: utf-8
2#
3#    Postfix queue control python tool (pymailq)
4#
5#    Copyright (C) 2014 Denis Pompilio (jawa) <denis.pompilio@gmail.com>
6#
7#    This file is part of pymailq
8#
9#    This program is free software; you can redistribute it and/or
10#    modify it under the terms of the GNU General Public License
11#    as published by the Free Software Foundation; either version 2
12#    of the License, or (at your option) any later version.
13#
14#    This program is distributed in the hope that it will be useful,
15#    but WITHOUT ANY WARRANTY; without even the implied warranty of
16#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17#    GNU General Public License for more details.
18#
19#    You should have received a copy of the GNU General Public License
20#    along with this program; if not, see <http://www.gnu.org/licenses/>.
21
22from __future__ import unicode_literals
23
24import sys
25import os
26import gc
27import re
28import subprocess
29import email
30from email import header
31from collections import Counter
32from datetime import datetime, timedelta
33from pymailq import CONFIG, debug
34
35
36class MailHeaders(object):
37    """
38    Simple object to store mail headers.
39
40    Object's attributes are dynamically created when parent :class:`~store.Mail`
41    object's method :meth:`~store.Mail.parse` is called. Those attributes are
42    retrieved with help of :func:`~email.message_from_string` method provided
43    by the :mod:`email` module.
44
45    Standard RFC *822-style* mail headers becomes attributes including but not
46    limited to:
47
48    - :mailheader:`Received`
49    - :mailheader:`From`
50    - :mailheader:`To`
51    - :mailheader:`Cc`
52    - :mailheader:`Bcc`
53    - :mailheader:`Sender`
54    - :mailheader:`Reply-To`
55    - :mailheader:`Subject`
56
57    Case is kept while creating attribute and access will be made with
58    :attr:`Mail.From` or :attr:`Mail.Received` for example. All those
59    attributes will return *list* of values.
60
61    .. seealso::
62
63        Python modules:
64            :mod:`email` -- An email and MIME handling package
65
66            :class:`email.message.Message` -- Representing an email message
67
68        :rfc:`822` -- Standard for ARPA Internet Text Messages
69    """
70
71
72class Mail(object):
73    """
74    Simple object to manipulate email messages.
75
76    This class provides the necessary methods to load and inspect mails
77    content. This object functionnalities are mainly based on :mod:`email`
78    module's provided class and methods. However,
79    :class:`email.message.Message` instance's stored informations are
80    extracted to extend :class:`~store.Mail` instances attributes.
81
82    Initialization of :class:`~store.Mail` instances are made the following
83    way:
84
85    :param str mail_id: Mail's queue ID string
86    :param int size: Mail size in Bytes (Default: ``0``)
87    :param datetime.datetime date:  Acceptance date and time in mails queue.
88                                    (Default: :data:`None`)
89    :param str sender: Mail sender string as seen in mails queue.
90                       (Default: empty :func:`str`)
91
92    The :class:`~pymailq.Mail` class defines the following attributes:
93
94        .. attribute:: qid
95
96            Mail Postfix queue ID string, validated by
97            :meth:`~store.PostqueueStore._is_mail_id` method.
98
99        .. attribute:: size
100
101            Mail size in bytes. Expected type is :func:`int`.
102
103        .. attribute:: parsed
104
105            :func:`bool` value to track if mail's content has been loaded from
106            corresponding spool file.
107
108        .. attribute:: parse_error
109
110            Last encountered parse error message :func:`str`.
111
112        .. attribute:: date
113
114            :class:`~datetime.datetime` object of acceptance date and time in
115            mails queue.
116
117        .. attribute:: status
118
119            Mail's queue status :func:`str`.
120
121        .. attribute:: sender
122
123            Mail's sender :func:`str` as seen in mails queue.
124
125        .. attribute:: recipients
126
127            Recipients :func:`list` as seen in mails queue.
128
129        .. attribute:: errors
130
131            Mail deliver errors :func:`list` as seen in mails queue.
132
133        .. attribute:: head
134
135            Mail's headers :class:`~store.MailHeaders` structure.
136
137        .. attribute:: postcat_cmd
138
139            This property use Postfix mails content parsing command defined in
140            :attr:`pymailq.CONFIG` attribute under the key 'cat_message'.
141            Command and arguments list is build on call with the configuration
142            data.
143
144            .. seealso::
145
146                :ref:`pymailq-configuration`
147    """
148
149    def __init__(self, mail_id, size=0, date=None, sender=""):
150        """Init method"""
151        self.parsed = False
152        self.parse_error = ""
153        self.qid = mail_id
154        self.date = date
155        self.status = ""
156        self.size = int(size)
157        self.sender = sender
158        self.recipients = []
159        self.errors = []
160        self.head = MailHeaders()
161
162        # Getting optionnal status from postqueue mail_id
163        postqueue_status = {'*': "active", '!': "hold"}
164        if mail_id[-1] in postqueue_status:
165            self.qid = mail_id[:-1]
166        self.status = postqueue_status.get(mail_id[-1], "deferred")
167
168    @property
169    def postcat_cmd(self):
170        """
171        Get the cat_message command from configuration
172        :return: Command as :class:`list`
173        """
174        postcat_cmd = CONFIG['commands']['cat_message'] + [self.qid]
175        if CONFIG['commands']['use_sudo']:
176            postcat_cmd.insert(0, 'sudo')
177        return postcat_cmd
178
179    def show(self):
180        """
181        Return mail detailled representation for printing
182
183        :return: Representation as :class:`str`
184        """
185        output = "=== Mail %s ===\n" % (self.qid,)
186        for attr in sorted(dir(self.head)):
187            if attr.startswith("_"):
188                continue
189
190            value = getattr(self.head, attr)
191            if not isinstance(value, str):
192                value = ", ".join(value)
193
194            if attr == "Subject":
195                print(attr, value)
196                value, enc = header.decode_header(value)[0]
197                print(enc, attr, value)
198                if sys.version_info[0] == 2:
199                    value = value.decode(enc) if enc else unicode(value)
200
201            output += "%s: %s\n" % (attr, value)
202        return output
203
204    @debug
205    def parse(self):
206        """
207        Parse message content.
208
209        This method use Postfix mails content parsing command defined in
210        :attr:`~Mail.postcat_cmd` attribute.
211        This command is runned using :class:`subprocess.Popen` instance.
212
213        Parsed headers become attributes and are retrieved with help of
214        :func:`~email.message_from_string` function provided by the
215        :mod:`email` module.
216
217        .. seealso::
218
219            Postfix manual:
220                `postcat`_ -- Show Postfix queue file contents
221
222        """
223        # Reset parsing error message
224        self.parse_error = ""
225
226        child = subprocess.Popen(self.postcat_cmd,
227                                 stdout=subprocess.PIPE,
228                                 stderr=subprocess.PIPE)
229        stdout, stderr = child.communicate()
230
231        if not len(stdout):
232            # Ignore first 3 line on stderr which are:
233            #   postcat: name_mask: all
234            #   postcat: inet_addr_local: configured 3 IPv4 addresses
235            #   postcat: inet_addr_local: configured 3 IPv6 addresses
236            self.parse_error = "\n".join(stderr.decode().split('\n')[3:])
237            return
238
239        raw_content = ""
240        for line in stdout.decode('utf-8', errors='replace').split('\n'):
241            if self.size == 0 and line.startswith("message_size: "):
242                self.size = int(line[14:].strip().split()[0])
243            elif self.date is None and line.startswith("create_time: "):
244                self.date = datetime.strptime(line[13:].strip(),
245                                              "%a %b %d %H:%M:%S %Y")
246            elif not len(self.sender) and line.startswith("sender: "):
247                self.sender = line[8:].strip()
248            elif line.startswith("regular_text: "):
249                raw_content += "%s\n" % (line[14:],)
250
251        # For python2.7 compatibility, encode unicode to str
252        if not isinstance(raw_content, str):
253            raw_content = raw_content.encode('utf-8')
254
255        message = email.message_from_string(raw_content)
256
257        for header in set(message.keys()):
258            value = message.get_all(header)
259            setattr(self.head, header, value)
260
261        self.parsed = True
262
263    @debug
264    def dump(self):
265        """
266        Dump mail's gathered informations to a :class:`dict` object.
267
268        Mails informations are splitted in two parts in dictionnary.
269        ``postqueue`` key regroups every informations directly gathered from
270        Postfix queue, while ``headers`` regroups :class:`~store.MailHeaders`
271        attributes converted from mail content with the
272        :meth:`~store.Mail.parse` method.
273
274        If mail has not been parsed with the :meth:`~store.Mail.parse` method,
275        informations under the ``headers`` key will be empty.
276
277        :return: Mail gathered informations
278        :rtype: :class:`dict`
279        """
280        datas = {'postqueue': {},
281                 'headers': {}}
282
283        for attr in self.__dict__:
284            if attr[0] != "_" and attr != 'head':
285                datas['postqueue'].update({attr: getattr(self, attr)})
286
287        for header in self.head.__dict__:
288            if header[0] != "_":
289                datas['headers'].update({header: getattr(self.head, header)})
290
291        return datas
292
293
294class PostqueueStore(object):
295    """
296    Postfix mails queue informations storage.
297
298    The :class:`~store.PostqueueStore` provides methods to load Postfix
299    queued mails informations into Python structures. Thoses structures are
300    based on :class:`~store.Mail` and :class:`~store.MailHeaders` classes
301    which can be processed by a :class:`~selector.MailSelector` instance.
302
303    The :class:`~store.PostqueueStore` class defines the following attributes:
304
305        .. attribute:: mails
306
307            Loaded :class:`MailClass` objects :func:`list`.
308
309        .. attribute:: loaded_at
310
311            :class:`datetime.datetime` instance to store load date and time
312            informations, useful for datas deprecation tracking. Updated on
313            :meth:`~store.PostqueueStore.load` call with
314            :meth:`datetime.datetime.now` method.
315
316        .. attribute:: postqueue_cmd
317
318            :obj:`list` object to store Postfix command and arguments to view
319            the mails queue content. This property use Postfix mails content
320            parsing command defined in :attr:`pymailq.CONFIG` attribute under
321            the key 'list_queue'. Command and arguments list is build on call
322            with the configuration data.
323
324        .. attribute:: spool_path
325
326            Postfix spool path string.
327            Default is ``"/var/spool/postfix"``.
328
329        .. attribute:: postqueue_mailstatus
330
331            Postfix known queued mail status list.
332            Default is ``['active', 'deferred', 'hold']``.
333
334        .. attribute:: mail_id_re
335
336            Python compiled regular expression object (:class:`re.RegexObject`)
337            provided by :func:`re.compile` method to match postfix IDs.
338            Recognized IDs are hexadecimals, may be 10 to 12 chars length and
339            followed with ``*`` or ``!``.
340            Default used regular expression is: ``r"^[A-F0-9]{10,12}[*!]?$"``.
341
342        .. attribute:: mail_addr_re
343
344            Python compiled regular expression object (:class:`re.RegexObject`)
345            provided by :func:`re.compile` method to match email addresses.
346            Default used regular expression is:
347            ``r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]+$"``
348
349        .. attribute:: MailClass
350
351            The class used to manipulate/parse mails individually.
352            Default is :class:`~store.Mail`.
353
354    .. seealso::
355
356        Python modules:
357            :mod:`datetime` -- Basic date and time types
358
359            :mod:`re` -- Regular expression operations
360
361        Postfix manual:
362            `postqueue`_ -- Postfix queue control
363
364        :rfc:`3696` -- Checking and Transformation of Names
365    """
366    postqueue_cmd = None
367    spool_path = None
368    postqueue_mailstatus = ['active', 'deferred', 'hold']
369    mail_id_re = re.compile(r"^[A-F0-9]{10,12}[*!]?$")
370    mail_addr_re = re.compile(r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]+$")
371    MailClass = Mail
372
373    def __init__(self):
374        """Init method"""
375        self.spool_path = CONFIG['core']['postfix_spool']
376        self.postqueue_cmd = CONFIG['commands']['list_queue']
377        if CONFIG['commands']['use_sudo']:
378            self.postqueue_cmd.insert(0, 'sudo')
379
380        self.loaded_at = None
381        self.mails = []
382
383    @debug
384    def _get_postqueue_output(self):
385        """
386        Get Postfix postqueue command output.
387
388        This method used the postfix command defined in
389        :attr:`~PostqueueStore.postqueue_cmd` attribute to view the mails queue
390        content.
391
392        Command defined in :attr:`~PostqueueStore.postqueue_cmd` attribute is
393        runned using a :class:`subprocess.Popen` instance.
394
395        :return: Command's output lines.
396        :rtype: :func:`list`
397
398        .. seealso::
399
400            Python module:
401                :mod:`subprocess` -- Subprocess management
402        """
403        child = subprocess.Popen(self.postqueue_cmd,
404                                 stdout=subprocess.PIPE)
405        stdout = child.communicate()[0]
406
407        # return lines list without the headers and footers
408        return [line.strip() for line in stdout.decode().split('\n')][1:-2]
409
410    def _is_mail_id(self, mail_id):
411        """
412        Check mail_id for a valid postfix queued mail ID.
413
414        Validation is made using a :class:`re.RegexObject` stored in
415        the :attr:`~PostqueueStore.mail_id_re` attribute of the
416        :class:`~store.PostqueueStore` instance.
417
418        :param str mail_id: Mail Postfix queue ID string
419        :return: True or false
420        :rtype: :func:`bool`
421        """
422
423        if self.mail_id_re.match(mail_id) is None:
424            return False
425        return True
426
427    @debug
428    def _load_from_postqueue(self, filename=None):
429        """
430        Load content from postfix queue using postqueue command output.
431
432        Output lines from :attr:`~store.PostqueueStore._get_postqueue_output`
433        are parsed to build :class:`~store.Mail` objects. Sample Postfix queue
434        control tool (`postqueue`_) output::
435
436            C0004979687     4769 Tue Apr 29 06:35:05  sender@domain.com
437            (error message from mx.remote1.org with parenthesis)
438                                                     first.rcpt@remote1.org
439            (error message from mx.remote2.org with parenthesis)
440                                                     second.rcpt@remote2.org
441                                                     third.rcpt@remote2.org
442
443        Parsing rules are pretty simple:
444
445        - Line starts with a valid :attr:`Mail.qid`: create new
446          :class:`~store.Mail` object with :attr:`~Mail.qid`,
447          :attr:`~Mail.size`, :attr:`~Mail.date` and :attr:`~Mail.sender`
448          informations from line.
449
450          +-------------+------+---------------------------+-----------------+
451          | Queue ID    | Size | Reception date and time   | Sender          |
452          +-------------+------+-----+-----+----+----------+-----------------+
453          | C0004979687 | 4769 | Tue | Apr | 29 | 06:35:05 | user@domain.com |
454          +-------------+------+-----+-----+----+----------+-----------------+
455
456        - Line starts with a parenthesis: store error messages to last created
457          :class:`~store.Mail` object's :attr:`~Mail.errors` attribute.
458
459        - Any other matches: add new recipient to the :attr:`~Mail.recipients`
460          attribute of the last created :class:`~store.Mail` object.
461
462        Optionnal argument ``filename`` can be set with a file containing
463        output of the `postqueue`_ command. In this case, output lines of
464        `postqueue`_ command are directly read from ``filename`` and parsed,
465        the `postqueue`_ command is never used.
466        """
467        if filename is None:
468            postqueue_output = self._get_postqueue_output()
469        else:
470            postqueue_output = open(filename).readlines()
471
472        mail = None
473        for line in postqueue_output:
474            line = line.strip()
475
476            # Headers and footers start with dash (-)
477            if line.startswith('-'):
478                continue
479            # Mails are blank line separated
480            if not len(line):
481                continue
482
483            fields = line.split()
484            if "(" == fields[0][0]:
485                # Store error message without parenthesis: [1:-1]
486                # gathered errors must be associated with specific recipients
487                # TODO: change recipients or errors structures to link these
488                #       objects together.
489                mail.errors.append(" ".join(fields)[1:-1])
490            else:
491                if self._is_mail_id(fields[0]):
492                    # postfix does not precise year in mails timestamps so
493                    # we consider mails have been sent this year.
494                    # If gathered date is in the future:
495                    # mail has been received last year (or NTP problem).
496                    now = datetime.now()
497                    datestr = "{0} {1}".format(" ".join(fields[2:-1]), now.year)
498                    date = datetime.strptime(datestr, "%a %b %d %H:%M:%S %Y")
499                    if date > now:
500                        date = date - timedelta(days=365)
501
502                    mail = self.MailClass(fields[0], size=fields[1],
503                                          date=date,
504                                          sender=fields[-1])
505                    self.mails.append(mail)
506                else:
507                    # Email address validity check can be tricky. RFC3696 talks
508                    # about. Fow now, we use a simple regular expression to
509                    # match most of email addresses.
510                    rcpt_email_addr = " ".join(fields)
511                    if self.mail_addr_re.match(rcpt_email_addr):
512                        mail.recipients.append(rcpt_email_addr)
513
514    @debug
515    def _load_from_spool(self):
516        """
517        Load content from postfix queue using files from spool.
518
519        Mails are loaded using the command defined in
520        :attr:`~PostqueueStore.postqueue_cmd` attribute. Some informations may
521        be missing using the :meth:`~store.PostqueueStore._load_from_spool`
522        method, including at least :attr:`Mail.status` field.
523
524        Loaded mails are stored as :class:`~store.Mail` objects in
525        :attr:`~PostqueueStore.mails` attribute.
526
527        .. warning::
528
529            Be aware that parsing mails on disk is slow and can lead to
530            high load usage on system with large mails queue.
531        """
532        for status in self.postqueue_mailstatus:
533            for fs_data in os.walk("%s/%s" % (self.spool_path, status)):
534                for mail_id in fs_data[2]:
535                    mail = self.MailClass(mail_id)
536                    mail.status = status
537
538                    mail.parse()
539
540                    self.mails.append(mail)
541
542    @debug
543    def _load_from_file(self, filename):
544        """Unimplemented method"""
545
546    @debug
547    def load(self, method="postqueue", filename=None):
548        """
549        Load content from postfix mails queue.
550
551        Mails are loaded using postqueue command line tool or reading directly
552        from spool. The optionnal argument, if present, is a method string and
553        specifies the method used to gather mails informations. By default,
554        method is set to ``"postqueue"`` and the standard Postfix queue
555        control tool: `postqueue`_ is used.
556
557        :param str method: Method used to load mails from Postfix queue
558        :param str filename: File to load mails from
559
560        Provided method :func:`str` name is directly used with :func:`getattr`
561        to find a *self._load_from_<method>* method.
562        """
563        # releasing memory
564        del self.mails
565        gc.collect()
566
567        self.mails = []
568        if filename is None:
569            getattr(self, "_load_from_{0}".format(method))()
570        else:
571            getattr(self, "_load_from_{0}".format(method))(filename)
572        self.loaded_at = datetime.now()
573
574    @debug
575    def summary(self):
576        """
577        Summarize the mails queue content.
578
579        :return: Mail queue summary as :class:`dict`
580
581        Sizes are in bytes.
582
583        Example response::
584
585            {
586                'total_mails': 500,
587                'total_mails_size': 709750,
588                'average_mail_size': 1419.5,
589                'max_mail_size': 2414,
590                'min_mail_size': 423,
591                'top_errors': [
592                    ('mail transport unavailable', 484),
593                    ('Test error message', 16)
594                ],
595                'top_recipient_domains': [
596                    ('test-domain.tld', 500)
597                ],
598                'top_recipients': [
599                    ('user-3@test-domain.tld', 200),
600                    ('user-2@test-domain.tld', 200),
601                    ('user-1@test-domain.tld', 100)
602                ],
603                'top_sender_domains': [
604                    ('test-domain.tld', 500)
605                ],
606                'top_senders': [
607                    ('sender-1@test-domain.tld', 100),
608                    ('sender-2@test-domain.tld', 100),
609                    ('sender-7@test-domain.tld', 50),
610                    ('sender-4@test-domain.tld', 50),
611                    ('sender-5@test-domain.tld', 50)
612                ],
613                'top_status': [
614                    ('deferred', 500),
615                    ('active', 0),
616                    ('hold', 0)
617                ],
618                'unique_recipient_domains': 1,
619                'unique_recipients': 3,
620                'unique_sender_domains': 1,
621                'unique_senders': 8
622            }
623        """
624        senders = Counter()
625        sender_domains = Counter()
626        recipients = Counter()
627        recipient_domains = Counter()
628        status = Counter(active=0, hold=0, deferred=0)
629        errors = Counter()
630        total_mails_size = 0
631        average_mail_size = 0
632        max_mail_size = 0
633        min_mail_size = 0
634        mails_by_age = {
635            'last_24h': 0,
636            '1_to_4_days_ago': 0,
637            'older_than_4_days': 0
638        }
639
640        for mail in self.mails:
641            status[mail.status] += 1
642            senders[mail.sender] += 1
643            if '@' in mail.sender:
644                sender_domains[mail.sender.split('@', 1)[1]] += 1
645            for recipient in mail.recipients:
646                recipients[recipient] += 1
647                if '@' in recipient:
648                    recipient_domains[recipient.split('@', 1)[1]] += 1
649            for error in mail.errors:
650                errors[error] += 1
651            total_mails_size += mail.size
652            if mail.size > max_mail_size:
653                max_mail_size = mail.size
654            if min_mail_size == 0:
655                min_mail_size = mail.size
656            elif mail.size < min_mail_size:
657                min_mail_size = mail.size
658
659            mail_age = datetime.now() - mail.date
660            if mail_age.days >= 4:
661                mails_by_age['older_than_4_days'] += 1
662            elif mail_age.days == 1:
663                mails_by_age['1_to_4_days_ago'] += 1
664            elif mail_age.days == 0:
665                mails_by_age['last_24h'] += 1
666
667        if len(self.mails):
668            average_mail_size = total_mails_size / len(self.mails)
669
670        summary = {
671            'total_mails': len(self.mails),
672            'mails_by_age': mails_by_age,
673            'total_mails_size': total_mails_size,
674            'average_mail_size': average_mail_size,
675            'max_mail_size': max_mail_size,
676            'min_mail_size': min_mail_size,
677            'top_status': status.most_common()[:5],
678            'unique_senders': len(list(senders)),
679            'unique_sender_domains': len(list(sender_domains)),
680            'unique_recipients': len(list(recipients)),
681            'unique_recipient_domains': len(list(recipient_domains)),
682            'top_senders': senders.most_common()[:5],
683            'top_sender_domains': sender_domains.most_common()[:5],
684            'top_recipients': recipients.most_common()[:5],
685            'top_recipient_domains': recipient_domains.most_common()[:5],
686            'top_errors': errors.most_common()[:5]
687        }
688        return summary
689