1#!/usr/bin/env python3
2"""Classes to parse mailer-daemon messages."""
3
4import calendar
5import email.message
6import re
7import os
8import sys
9
10
11class Unparseable(Exception):
12    pass
13
14
15class ErrorMessage(email.message.Message):
16    def __init__(self):
17        email.message.Message.__init__(self)
18        self.sub = ''
19
20    def is_warning(self):
21        sub = self.get('Subject')
22        if not sub:
23            return 0
24        sub = sub.lower()
25        if sub.startswith('waiting mail'):
26            return 1
27        if 'warning' in sub:
28            return 1
29        self.sub = sub
30        return 0
31
32    def get_errors(self):
33        for p in EMPARSERS:
34            self.rewindbody()
35            try:
36                return p(self.fp, self.sub)
37            except Unparseable:
38                pass
39        raise Unparseable
40
41# List of re's or tuples of re's.
42# If a re, it should contain at least a group (?P<email>...) which
43# should refer to the email address.  The re can also contain a group
44# (?P<reason>...) which should refer to the reason (error message).
45# If no reason is present, the emparse_list_reason list is used to
46# find a reason.
47# If a tuple, the tuple should contain 2 re's.  The first re finds a
48# location, the second re is repeated one or more times to find
49# multiple email addresses.  The second re is matched (not searched)
50# where the previous match ended.
51# The re's are compiled using the re module.
52emparse_list_list = [
53    'error: (?P<reason>unresolvable): (?P<email>.+)',
54    ('----- The following addresses had permanent fatal errors -----\n',
55     '(?P<email>[^ \n].*)\n( .*\n)?'),
56    'remote execution.*\n.*rmail (?P<email>.+)',
57    ('The following recipients did not receive your message:\n\n',
58     ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
59    '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
60    '^<(?P<email>.*)>:\n(?P<reason>.*)',
61    '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
62    '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
63    '^Original-Recipient: rfc822;(?P<email>.*)',
64    '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
65    '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
66    '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
67    '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
68    '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
69    ]
70# compile the re's in the list and store them in-place.
71for i in range(len(emparse_list_list)):
72    x = emparse_list_list[i]
73    if type(x) is type(''):
74        x = re.compile(x, re.MULTILINE)
75    else:
76        xl = []
77        for x in x:
78            xl.append(re.compile(x, re.MULTILINE))
79        x = tuple(xl)
80        del xl
81    emparse_list_list[i] = x
82    del x
83del i
84
85# list of re's used to find reasons (error messages).
86# if a string, "<>" is replaced by a copy of the email address.
87# The expressions are searched for in order.  After the first match,
88# no more expressions are searched for.  So, order is important.
89emparse_list_reason = [
90    r'^5\d{2} <>\.\.\. (?P<reason>.*)',
91    r'<>\.\.\. (?P<reason>.*)',
92    re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
93    re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
94    re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
95    ]
96emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
97def emparse_list(fp, sub):
98    data = fp.read()
99    res = emparse_list_from.search(data)
100    if res is None:
101        from_index = len(data)
102    else:
103        from_index = res.start(0)
104    errors = []
105    emails = []
106    reason = None
107    for regexp in emparse_list_list:
108        if type(regexp) is type(()):
109            res = regexp[0].search(data, 0, from_index)
110            if res is not None:
111                try:
112                    reason = res.group('reason')
113                except IndexError:
114                    pass
115                while 1:
116                    res = regexp[1].match(data, res.end(0), from_index)
117                    if res is None:
118                        break
119                    emails.append(res.group('email'))
120                break
121        else:
122            res = regexp.search(data, 0, from_index)
123            if res is not None:
124                emails.append(res.group('email'))
125                try:
126                    reason = res.group('reason')
127                except IndexError:
128                    pass
129                break
130    if not emails:
131        raise Unparseable
132    if not reason:
133        reason = sub
134        if reason[:15] == 'returned mail: ':
135            reason = reason[15:]
136        for regexp in emparse_list_reason:
137            if type(regexp) is type(''):
138                for i in range(len(emails)-1,-1,-1):
139                    email = emails[i]
140                    exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
141                    res = exp.search(data)
142                    if res is not None:
143                        errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
144                        del emails[i]
145                continue
146            res = regexp.search(data)
147            if res is not None:
148                reason = res.group('reason')
149                break
150    for email in emails:
151        errors.append(' '.join((email.strip()+': '+reason).split()))
152    return errors
153
154EMPARSERS = [emparse_list]
155
156def sort_numeric(a, b):
157    a = int(a)
158    b = int(b)
159    if a < b:
160        return -1
161    elif a > b:
162        return 1
163    else:
164        return 0
165
166def parsedir(dir, modify):
167    os.chdir(dir)
168    pat = re.compile('^[0-9]*$')
169    errordict = {}
170    errorfirst = {}
171    errorlast = {}
172    nok = nwarn = nbad = 0
173
174    # find all numeric file names and sort them
175    files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
176    files.sort(sort_numeric)
177
178    for fn in files:
179        # Lets try to parse the file.
180        fp = open(fn)
181        m = email.message_from_file(fp, _class=ErrorMessage)
182        sender = m.getaddr('From')
183        print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
184
185        if m.is_warning():
186            fp.close()
187            print('warning only')
188            nwarn = nwarn + 1
189            if modify:
190                os.rename(fn, ','+fn)
191##              os.unlink(fn)
192            continue
193
194        try:
195            errors = m.get_errors()
196        except Unparseable:
197            print('** Not parseable')
198            nbad = nbad + 1
199            fp.close()
200            continue
201        print(len(errors), 'errors')
202
203        # Remember them
204        for e in errors:
205            try:
206                mm, dd = m.getdate('date')[1:1+2]
207                date = '%s %02d' % (calendar.month_abbr[mm], dd)
208            except:
209                date = '??????'
210            if e not in errordict:
211                errordict[e] = 1
212                errorfirst[e] = '%s (%s)' % (fn, date)
213            else:
214                errordict[e] = errordict[e] + 1
215            errorlast[e] = '%s (%s)' % (fn, date)
216
217        fp.close()
218        nok = nok + 1
219        if modify:
220            os.rename(fn, ','+fn)
221##          os.unlink(fn)
222
223    print('--------------')
224    print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
225    print(nbad,'files unparseable')
226    print('--------------')
227    list = []
228    for e in errordict.keys():
229        list.append((errordict[e], errorfirst[e], errorlast[e], e))
230    list.sort()
231    for num, first, last, e in list:
232        print('%d %s - %s\t%s' % (num, first, last, e))
233
234def main():
235    modify = 0
236    if len(sys.argv) > 1 and sys.argv[1] == '-d':
237        modify = 1
238        del sys.argv[1]
239    if len(sys.argv) > 1:
240        for folder in sys.argv[1:]:
241            parsedir(folder, modify)
242    else:
243        parsedir('/ufs/jack/Mail/errorsinbox', modify)
244
245if __name__ == '__main__' or sys.argv[0] == __name__:
246    main()
247