1#!/usr/bin/env python
2"""mailerdaemon - classes to parse mailer-daemon messages"""
3
4import rfc822
5import calendar
6import re
7import os
8import sys
9
10Unparseable = 'mailerdaemon.Unparseable'
11
12class ErrorMessage(rfc822.Message):
13    def __init__(self, fp):
14        rfc822.Message.__init__(self, fp)
15        self.sub = ''
16
17    def is_warning(self):
18        sub = self.getheader('Subject')
19        if not sub:
20            return 0
21        sub = sub.lower()
22        if sub.startswith('waiting mail'): return 1
23        if 'warning' in sub: return 1
24        self.sub = sub
25        return 0
26
27    def get_errors(self):
28        for p in EMPARSERS:
29            self.rewindbody()
30            try:
31                return p(self.fp, self.sub)
32            except Unparseable:
33                pass
34        raise Unparseable
35
36# List of re's or tuples of re's.
37# If a re, it should contain at least a group (?P<email>...) which
38# should refer to the email address.  The re can also contain a group
39# (?P<reason>...) which should refer to the reason (error message).
40# If no reason is present, the emparse_list_reason list is used to
41# find a reason.
42# If a tuple, the tuple should contain 2 re's.  The first re finds a
43# location, the second re is repeated one or more times to find
44# multiple email addresses.  The second re is matched (not searched)
45# where the previous match ended.
46# The re's are compiled using the re module.
47emparse_list_list = [
48    'error: (?P<reason>unresolvable): (?P<email>.+)',
49    ('----- The following addresses had permanent fatal errors -----\n',
50     '(?P<email>[^ \n].*)\n( .*\n)?'),
51    'remote execution.*\n.*rmail (?P<email>.+)',
52    ('The following recipients did not receive your message:\n\n',
53     ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
54    '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
55    '^<(?P<email>.*)>:\n(?P<reason>.*)',
56    '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
57    '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
58    '^Original-Recipient: rfc822;(?P<email>.*)',
59    '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
60    '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
61    '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
62    '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
63    '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
64    ]
65# compile the re's in the list and store them in-place.
66for i in range(len(emparse_list_list)):
67    x = emparse_list_list[i]
68    if type(x) is type(''):
69        x = re.compile(x, re.MULTILINE)
70    else:
71        xl = []
72        for x in x:
73            xl.append(re.compile(x, re.MULTILINE))
74        x = tuple(xl)
75        del xl
76    emparse_list_list[i] = x
77    del x
78del i
79
80# list of re's used to find reasons (error messages).
81# if a string, "<>" is replaced by a copy of the email address.
82# The expressions are searched for in order.  After the first match,
83# no more expressions are searched for.  So, order is important.
84emparse_list_reason = [
85    r'^5\d{2} <>\.\.\. (?P<reason>.*)',
86    '<>\.\.\. (?P<reason>.*)',
87    re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
88    re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
89    re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
90    ]
91emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
92def emparse_list(fp, sub):
93    data = fp.read()
94    res = emparse_list_from.search(data)
95    if res is None:
96        from_index = len(data)
97    else:
98        from_index = res.start(0)
99    errors = []
100    emails = []
101    reason = None
102    for regexp in emparse_list_list:
103        if type(regexp) is type(()):
104            res = regexp[0].search(data, 0, from_index)
105            if res is not None:
106                try:
107                    reason = res.group('reason')
108                except IndexError:
109                    pass
110                while 1:
111                    res = regexp[1].match(data, res.end(0), from_index)
112                    if res is None:
113                        break
114                    emails.append(res.group('email'))
115                break
116        else:
117            res = regexp.search(data, 0, from_index)
118            if res is not None:
119                emails.append(res.group('email'))
120                try:
121                    reason = res.group('reason')
122                except IndexError:
123                    pass
124                break
125    if not emails:
126        raise Unparseable
127    if not reason:
128        reason = sub
129        if reason[:15] == 'returned mail: ':
130            reason = reason[15:]
131        for regexp in emparse_list_reason:
132            if type(regexp) is type(''):
133                for i in range(len(emails)-1,-1,-1):
134                    email = emails[i]
135                    exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
136                    res = exp.search(data)
137                    if res is not None:
138                        errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
139                        del emails[i]
140                continue
141            res = regexp.search(data)
142            if res is not None:
143                reason = res.group('reason')
144                break
145    for email in emails:
146        errors.append(' '.join((email.strip()+': '+reason).split()))
147    return errors
148
149EMPARSERS = [emparse_list, ]
150
151def sort_numeric(a, b):
152    a = int(a)
153    b = int(b)
154    if a < b: return -1
155    elif a > b: return 1
156    else: return 0
157
158def parsedir(dir, modify):
159    os.chdir(dir)
160    pat = re.compile('^[0-9]*$')
161    errordict = {}
162    errorfirst = {}
163    errorlast = {}
164    nok = nwarn = nbad = 0
165
166    # find all numeric file names and sort them
167    files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))
168    files.sort(sort_numeric)
169
170    for fn in files:
171        # Lets try to parse the file.
172        fp = open(fn)
173        m = ErrorMessage(fp)
174        sender = m.getaddr('From')
175        print '%s\t%-40s\t'%(fn, sender[1]),
176
177        if m.is_warning():
178            fp.close()
179            print 'warning only'
180            nwarn = nwarn + 1
181            if modify:
182                os.rename(fn, ','+fn)
183##              os.unlink(fn)
184            continue
185
186        try:
187            errors = m.get_errors()
188        except Unparseable:
189            print '** Not parseable'
190            nbad = nbad + 1
191            fp.close()
192            continue
193        print len(errors), 'errors'
194
195        # Remember them
196        for e in errors:
197            try:
198                mm, dd = m.getdate('date')[1:1+2]
199                date = '%s %02d' % (calendar.month_abbr[mm], dd)
200            except:
201                date = '??????'
202            if not errordict.has_key(e):
203                errordict[e] = 1
204                errorfirst[e] = '%s (%s)' % (fn, date)
205            else:
206                errordict[e] = errordict[e] + 1
207            errorlast[e] = '%s (%s)' % (fn, date)
208
209        fp.close()
210        nok = nok + 1
211        if modify:
212            os.rename(fn, ','+fn)
213##          os.unlink(fn)
214
215    print '--------------'
216    print nok, 'files parsed,',nwarn,'files warning-only,',
217    print nbad,'files unparseable'
218    print '--------------'
219    list = []
220    for e in errordict.keys():
221        list.append((errordict[e], errorfirst[e], errorlast[e], e))
222    list.sort()
223    for num, first, last, e in list:
224        print '%d %s - %s\t%s' % (num, first, last, e)
225
226def main():
227    modify = 0
228    if len(sys.argv) > 1 and sys.argv[1] == '-d':
229        modify = 1
230        del sys.argv[1]
231    if len(sys.argv) > 1:
232        for folder in sys.argv[1:]:
233            parsedir(folder, modify)
234    else:
235        parsedir('/ufs/jack/Mail/errorsinbox', modify)
236
237if __name__ == '__main__' or sys.argv[0] == __name__:
238    main()
239