1# This file is part of Buildbot.  Buildbot is free software: you can
2# redistribute it and/or modify it under the terms of the GNU General Public
3# License as published by the Free Software Foundation, version 2.
4#
5# This program is distributed in the hope that it will be useful, but WITHOUT
6# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
7# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
8# details.
9#
10# You should have received a copy of the GNU General Public License along with
11# this program; if not, write to the Free Software Foundation, Inc., 51
12# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
13#
14# Copyright Buildbot Team Members
15
16"""
17Parse various kinds of 'CVS notify' email.
18"""
19
20import calendar
21import datetime
22import re
23import time
24from email import message_from_file
25from email.iterators import body_line_iterator
26from email.utils import mktime_tz
27from email.utils import parseaddr
28from email.utils import parsedate_tz
29
30from twisted.internet import defer
31from twisted.python import log
32from zope.interface import implementer
33
34from buildbot import util
35from buildbot.interfaces import IChangeSource
36from buildbot.util.maildir import MaildirService
37
38
39@implementer(IChangeSource)
40class MaildirSource(MaildirService, util.ComparableMixin):
41
42    """Generic base class for Maildir-based change sources"""
43
44    compare_attrs = ("basedir", "pollinterval", "prefix")
45    name = 'MaildirSource'
46
47    def __init__(self, maildir, prefix=None, category='', repository=''):
48        super().__init__(maildir)
49        self.prefix = prefix
50        self.category = category
51        self.repository = repository
52        if prefix and not prefix.endswith("/"):
53            log.msg(("MaildirSource: you probably want your prefix=('{}') to end with a slash"
54                     ).format(prefix))
55
56    def describe(self):
57        return "{} watching maildir '{}'".format(self.__class__.__name__, self.basedir)
58
59    @defer.inlineCallbacks
60    def messageReceived(self, filename):
61        with self.moveToCurDir(filename) as f:
62            chtuple = self.parse_file(f, self.prefix)
63
64        src, chdict = None, None
65        if chtuple:
66            src, chdict = chtuple
67        if chdict:
68            yield self.master.data.updates.addChange(src=str(src), **chdict)
69        else:
70            log.msg("no change found in maildir file '{}'".format(filename))
71
72    def parse_file(self, fd, prefix=None):
73        m = message_from_file(fd)
74        return self.parse(m, prefix)
75
76
77class CVSMaildirSource(MaildirSource):
78    name = "CVSMaildirSource"
79
80    def __init__(self, maildir, prefix=None, category='',
81                 repository='', properties=None):
82        super().__init__(maildir, prefix, category, repository)
83        if properties is None:
84            properties = {}
85        self.properties = properties
86
87    def parse(self, m, prefix=None):
88        """Parse messages sent by the 'buildbot-cvs-mail' program.
89        """
90        # The mail is sent from the person doing the checkin. Assume that the
91        # local username is enough to identify them (this assumes a one-server
92        # cvs-over-rsh environment rather than the server-dirs-shared-over-NFS
93        # model)
94        name, addr = parseaddr(m["from"])
95        if not addr:
96            # no From means this message isn't from buildbot-cvs-mail
97            return None
98        at = addr.find("@")
99        if at == -1:
100            author = addr  # might still be useful
101        else:
102            author = addr[:at]
103        author = util.bytes2unicode(author, encoding="ascii")
104
105        # CVS accepts RFC822 dates. buildbot-cvs-mail adds the date as
106        # part of the mail header, so use that.
107        # This assumes cvs is being access via ssh or pserver, so the time
108        # will be the CVS server's time.
109
110        # calculate a "revision" based on that timestamp, or the current time
111        # if we're unable to parse the date.
112        log.msg('Processing CVS mail')
113        dateTuple = parsedate_tz(m["date"])
114        if dateTuple is None:
115            when = util.now()
116        else:
117            when = mktime_tz(dateTuple)
118
119        theTime = datetime.datetime.utcfromtimestamp(float(when))
120        rev = theTime.strftime('%Y-%m-%d %H:%M:%S')
121
122        catRE = re.compile(r'^Category:\s*(\S.*)')
123        cvsRE = re.compile(r'^CVSROOT:\s*(\S.*)')
124        cvsmodeRE = re.compile(r'^Cvsmode:\s*(\S.*)')
125        filesRE = re.compile(r'^Files:\s*(\S.*)')
126        modRE = re.compile(r'^Module:\s*(\S.*)')
127        pathRE = re.compile(r'^Path:\s*(\S.*)')
128        projRE = re.compile(r'^Project:\s*(\S.*)')
129        singleFileRE = re.compile(r'(.*) (NONE|\d(\.|\d)+) (NONE|\d(\.|\d)+)')
130        tagRE = re.compile(r'^\s+Tag:\s*(\S.*)')
131        updateRE = re.compile(r'^Update of:\s*(\S.*)')
132        comments = ""
133        branch = None
134        cvsroot = None
135        fileList = None
136        files = []
137        isdir = 0
138        path = None
139        project = None
140
141        lines = list(body_line_iterator(m))
142        while lines:
143            line = lines.pop(0)
144            m = catRE.match(line)
145            if m:
146                category = m.group(1)
147                continue
148            m = cvsRE.match(line)
149            if m:
150                cvsroot = m.group(1)
151                continue
152            m = cvsmodeRE.match(line)
153            if m:
154                cvsmode = m.group(1)
155                continue
156            m = filesRE.match(line)
157            if m:
158                fileList = m.group(1)
159                continue
160            m = modRE.match(line)
161            if m:
162                # We don't actually use this
163                # module = m.group(1)
164                continue
165            m = pathRE.match(line)
166            if m:
167                path = m.group(1)
168                continue
169            m = projRE.match(line)
170            if m:
171                project = m.group(1)
172                continue
173            m = tagRE.match(line)
174            if m:
175                branch = m.group(1)
176                continue
177            m = updateRE.match(line)
178            if m:
179                # We don't actually use this
180                # updateof = m.group(1)
181                continue
182            if line == "Log Message:\n":
183                break
184
185        # CVS 1.11 lists files as:
186        #   repo/path file,old-version,new-version file2,old-version,new-version
187        # Version 1.12 lists files as:
188        #   file1 old-version new-version file2 old-version new-version
189        #
190        # files consists of tuples of 'file-name old-version new-version'
191        # The versions are either dotted-decimal version numbers, ie 1.1
192        # or NONE. New files are of the form 'NONE NUMBER', while removed
193        # files are 'NUMBER NONE'. 'NONE' is a literal string
194        # Parsing this instead of files list in 'Added File:' etc
195        # makes it possible to handle files with embedded spaces, though
196        # it could fail if the filename was 'bad 1.1 1.2'
197        # For cvs version 1.11, we expect
198        #  my_module new_file.c,NONE,1.1
199        #  my_module removed.txt,1.2,NONE
200        #  my_module modified_file.c,1.1,1.2
201        # While cvs version 1.12 gives us
202        #  new_file.c NONE 1.1
203        #  removed.txt 1.2 NONE
204        #  modified_file.c 1.1,1.2
205
206        if fileList is None:
207            log.msg('CVSMaildirSource Mail with no files. Ignoring')
208            return None       # We don't have any files. Email not from CVS
209
210        if cvsmode == '1.11':
211            # Please, no repo paths with spaces!
212            m = re.search('([^ ]*) ', fileList)
213            if m:
214                path = m.group(1)
215            else:
216                log.msg(
217                    'CVSMaildirSource can\'t get path from file list. Ignoring mail')
218                return None
219            fileList = fileList[len(path):].strip()
220            singleFileRE = re.compile(
221                r'(.+?),(NONE|(?:\d+\.(?:\d+\.\d+\.)*\d+)),(NONE|(?:\d+\.(?:\d+\.\d+\.)*\d+))(?: |$)')  # noqa pylint: disable=line-too-long
222        elif cvsmode == '1.12':
223            singleFileRE = re.compile(
224                r'(.+?) (NONE|(?:\d+\.(?:\d+\.\d+\.)*\d+)) (NONE|(?:\d+\.(?:\d+\.\d+\.)*\d+))(?: |$)')  # noqa pylint: disable=line-too-long
225            if path is None:
226                raise ValueError(
227                    'CVSMaildirSource cvs 1.12 require path. Check cvs loginfo config')
228        else:
229            raise ValueError('Expected cvsmode 1.11 or 1.12. got: {}'.format(cvsmode))
230
231        log.msg("CVSMaildirSource processing filelist: {}".format(fileList))
232        while(fileList):
233            m = singleFileRE.match(fileList)
234            if m:
235                curFile = path + '/' + m.group(1)
236                files.append(curFile)
237                fileList = fileList[m.end():]
238            else:
239                log.msg('CVSMaildirSource no files matched regex. Ignoring')
240                return None   # bail - we couldn't parse the files that changed
241        # Now get comments
242        while lines:
243            line = lines.pop(0)
244            comments += line
245
246        comments = comments.rstrip() + "\n"
247        if comments == '\n':
248            comments = None
249        return ('cvs', dict(author=author, committer=None, files=files, comments=comments,
250                            isdir=isdir, when=when, branch=branch,
251                            revision=rev, category=category,
252                            repository=cvsroot, project=project,
253                            properties=self.properties))
254
255# svn "commit-email.pl" handler.  The format is very similar to freshcvs mail;
256# here's a sample:
257
258#  From: username [at] apache.org    [slightly obfuscated to avoid spam here]
259#  To: commits [at] spamassassin.apache.org
260#  Subject: svn commit: r105955 - in spamassassin/trunk: . lib/Mail
261#  ...
262#
263#  Author: username
264#  Date: Sat Nov 20 00:17:49 2004      [note: TZ = local tz on server!]
265#  New Revision: 105955
266#
267#  Modified:   [also Removed: and Added:]
268#    [filename]
269#    ...
270#  Log:
271#  [log message]
272#  ...
273#
274#
275#  Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm
276#  [unified diff]
277#
278#  [end of mail]
279
280
281class SVNCommitEmailMaildirSource(MaildirSource):
282    name = "SVN commit-email.pl"
283
284    def parse(self, m, prefix=None):
285        """Parse messages sent by the svn 'commit-email.pl' trigger.
286        """
287
288        # The mail is sent from the person doing the checkin. Assume that the
289        # local username is enough to identify them (this assumes a one-server
290        # cvs-over-rsh environment rather than the server-dirs-shared-over-NFS
291        # model)
292        name, addr = parseaddr(m["from"])
293        if not addr:
294            return None  # no From means this message isn't from svn
295        at = addr.find("@")
296        if at == -1:
297            author = addr  # might still be useful
298        else:
299            author = addr[:at]
300
301        # we take the time of receipt as the time of checkin. Not correct (it
302        # depends upon the email latency), but it avoids the
303        # out-of-order-changes issue. Also syncmail doesn't give us anything
304        # better to work with, unless you count pulling the v1-vs-v2
305        # timestamp out of the diffs, which would be ugly. TODO: Pulling the
306        # 'Date:' header from the mail is a possibility, and
307        # email.utils.parsedate_tz may be useful. It should be configurable,
308        # however, because there are a lot of broken clocks out there.
309        when = util.now()
310
311        files = []
312        comments = ""
313        lines = list(body_line_iterator(m))
314        rev = None
315        while lines:
316            line = lines.pop(0)
317
318            # "Author: jmason"
319            match = re.search(r"^Author: (\S+)", line)
320            if match:
321                author = match.group(1)
322
323            # "New Revision: 105955"
324            match = re.search(r"^New Revision: (\d+)", line)
325            if match:
326                rev = match.group(1)
327
328            # possible TODO: use "Date: ..." data here instead of time of
329            # commit message receipt, above. however, this timestamp is
330            # specified *without* a timezone, in the server's local TZ, so to
331            # be accurate buildbot would need a config setting to specify the
332            # source server's expected TZ setting! messy.
333
334            # this stanza ends with the "Log:"
335            if (line == "Log:\n"):
336                break
337
338        # commit message is terminated by the file-listing section
339        while lines:
340            line = lines.pop(0)
341            if line in ("Modified:\n", "Added:\n", "Removed:\n"):
342                break
343            comments += line
344        comments = comments.rstrip() + "\n"
345
346        while lines:
347            line = lines.pop(0)
348            if line == "\n":
349                break
350            if line.find("Modified:\n") == 0:
351                continue            # ignore this line
352            if line.find("Added:\n") == 0:
353                continue            # ignore this line
354            if line.find("Removed:\n") == 0:
355                continue            # ignore this line
356            line = line.strip()
357
358            thesefiles = line.split(" ")
359            for f in thesefiles:
360                if prefix:
361                    # insist that the file start with the prefix: we may get
362                    # changes we don't care about too
363                    if f.startswith(prefix):
364                        f = f[len(prefix):]
365                    else:
366                        log.msg(("ignored file from svn commit: prefix '{}' "
367                                 "does not match filename '{}'").format(prefix, f))
368                        continue
369
370                # TODO: figure out how new directories are described, set
371                # .isdir
372                files.append(f)
373
374        if not files:
375            log.msg("no matching files found, ignoring commit")
376            return None
377
378        return ('svn', dict(author=author, committer=None, files=files, comments=comments,
379                            when=when, revision=rev))
380
381# bzr Launchpad branch subscription mails. Sample mail:
382#
383#   From: noreply@launchpad.net
384#   Subject: [Branch ~knielsen/maria/tmp-buildbot-test] Rev 2701: test add file
385#   To: Joe <joe@acme.com>
386#   ...
387#
388#   ------------------------------------------------------------
389#   revno: 2701
390#   committer: Joe <joe@acme.com>
391#   branch nick: tmpbb
392#   timestamp: Fri 2009-05-15 10:35:43 +0200
393#   message:
394#     test add file
395#   added:
396#     test-add-file
397#
398#
399#   --
400#
401#   https://code.launchpad.net/~knielsen/maria/tmp-buildbot-test
402#
403#   You are subscribed to branch lp:~knielsen/maria/tmp-buildbot-test.
404#   To unsubscribe from this branch go to
405#   https://code.launchpad.net/~knielsen/maria/tmp-buildbot-test/+edit-subscription.
406#
407# [end of mail]
408
409
410class BzrLaunchpadEmailMaildirSource(MaildirSource):
411    name = "Launchpad"
412
413    compare_attrs = ("branchMap", "defaultBranch")
414
415    def __init__(self, maildir, prefix=None, branchMap=None, defaultBranch=None, **kwargs):
416        self.branchMap = branchMap
417        self.defaultBranch = defaultBranch
418        super().__init__(maildir, prefix, **kwargs)
419
420    def parse(self, m, prefix=None):
421        """Parse branch notification messages sent by Launchpad.
422        """
423
424        subject = m["subject"]
425        match = re.search(r"^\s*\[Branch\s+([^]]+)\]", subject)
426        if match:
427            repository = match.group(1)
428        else:
429            repository = None
430
431        # Put these into a dictionary, otherwise we cannot assign them
432        # from nested function definitions.
433        d = {'files': [], 'comments': ""}
434        gobbler = None
435        rev = None
436        author = None
437        when = util.now()
438
439        def gobble_comment(s):
440            d['comments'] += s + "\n"
441
442        def gobble_removed(s):
443            d['files'].append('{} REMOVED'.format(s))
444
445        def gobble_added(s):
446            d['files'].append('{} ADDED'.format(s))
447
448        def gobble_modified(s):
449            d['files'].append('{} MODIFIED'.format(s))
450
451        def gobble_renamed(s):
452            match = re.search(r"^(.+) => (.+)$", s)
453            if match:
454                d['files'].append('{} RENAMED {}'.format(match.group(1), match.group(2)))
455            else:
456                d['files'].append('{} RENAMED'.format(s))
457
458        lines = list(body_line_iterator(m, True))
459        rev = None
460        while lines:
461            line = str(lines.pop(0), "utf-8", errors="ignore")
462
463            # revno: 101
464            match = re.search(r"^revno: ([0-9.]+)", line)
465            if match:
466                rev = match.group(1)
467
468            # committer: Joe <joe@acme.com>
469            match = re.search(r"^committer: (.*)$", line)
470            if match:
471                author = match.group(1)
472
473            # timestamp: Fri 2009-05-15 10:35:43 +0200
474            # datetime.strptime() is supposed to support %z for time zone, but
475            # it does not seem to work. So handle the time zone manually.
476            match = re.search(
477                r"^timestamp: [a-zA-Z]{3} (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) ([-+])(\d{2})(\d{2})$", line)  # noqa pylint: disable=line-too-long
478            if match:
479                datestr = match.group(1)
480                tz_sign = match.group(2)
481                tz_hours = match.group(3)
482                tz_minutes = match.group(4)
483                when = parseLaunchpadDate(
484                    datestr, tz_sign, tz_hours, tz_minutes)
485
486            if re.search(r"^message:\s*$", line):
487                gobbler = gobble_comment
488            elif re.search(r"^removed:\s*$", line):
489                gobbler = gobble_removed
490            elif re.search(r"^added:\s*$", line):
491                gobbler = gobble_added
492            elif re.search(r"^renamed:\s*$", line):
493                gobbler = gobble_renamed
494            elif re.search(r"^modified:\s*$", line):
495                gobbler = gobble_modified
496            elif re.search(r"^  ", line) and gobbler:
497                gobbler(line[2:-1])  # Use :-1 to gobble trailing newline
498
499        # Determine the name of the branch.
500        branch = None
501        if self.branchMap and repository:
502            if repository in self.branchMap:
503                branch = self.branchMap[repository]
504            elif ("lp:" + repository) in self.branchMap:
505                branch = self.branchMap['lp:' + repository]
506        if not branch:
507            if self.defaultBranch:
508                branch = self.defaultBranch
509            else:
510                if repository:
511                    branch = 'lp:' + repository
512                else:
513                    branch = None
514
515        if rev and author:
516            return ('bzr', dict(author=author, committer=None, files=d['files'],
517                                comments=d['comments'],
518                                when=when, revision=rev,
519                                branch=branch, repository=repository or ''))
520        return None
521
522
523def parseLaunchpadDate(datestr, tz_sign, tz_hours, tz_minutes):
524    time_no_tz = calendar.timegm(time.strptime(datestr, "%Y-%m-%d %H:%M:%S"))
525    tz_delta = 60 * 60 * int(tz_sign + tz_hours) + 60 * int(tz_minutes)
526    return time_no_tz - tz_delta
527