1# This file is part of Buildbot. Buildbot is free software: you can 2# redistribute it and/or modify it under the terms of the GNU General Public 3# License as published by the Free Software Foundation, version 2. 4# 5# This program is distributed in the hope that it will be useful, but WITHOUT 6# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 7# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 8# details. 9# 10# You should have received a copy of the GNU General Public License along with 11# this program; if not, write to the Free Software Foundation, Inc., 51 12# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 13# 14# Copyright Buildbot Team Members 15 16""" 17Parse various kinds of 'CVS notify' email. 18""" 19 20import calendar 21import datetime 22import re 23import time 24from email import message_from_file 25from email.iterators import body_line_iterator 26from email.utils import mktime_tz 27from email.utils import parseaddr 28from email.utils import parsedate_tz 29 30from twisted.internet import defer 31from twisted.python import log 32from zope.interface import implementer 33 34from buildbot import util 35from buildbot.interfaces import IChangeSource 36from buildbot.util.maildir import MaildirService 37 38 39@implementer(IChangeSource) 40class MaildirSource(MaildirService, util.ComparableMixin): 41 42 """Generic base class for Maildir-based change sources""" 43 44 compare_attrs = ("basedir", "pollinterval", "prefix") 45 name = 'MaildirSource' 46 47 def __init__(self, maildir, prefix=None, category='', repository=''): 48 super().__init__(maildir) 49 self.prefix = prefix 50 self.category = category 51 self.repository = repository 52 if prefix and not prefix.endswith("/"): 53 log.msg(("MaildirSource: you probably want your prefix=('{}') to end with a slash" 54 ).format(prefix)) 55 56 def describe(self): 57 return "{} watching maildir '{}'".format(self.__class__.__name__, self.basedir) 58 59 @defer.inlineCallbacks 60 def messageReceived(self, filename): 61 with self.moveToCurDir(filename) as f: 62 chtuple = self.parse_file(f, self.prefix) 63 64 src, chdict = None, None 65 if chtuple: 66 src, chdict = chtuple 67 if chdict: 68 yield self.master.data.updates.addChange(src=str(src), **chdict) 69 else: 70 log.msg("no change found in maildir file '{}'".format(filename)) 71 72 def parse_file(self, fd, prefix=None): 73 m = message_from_file(fd) 74 return self.parse(m, prefix) 75 76 77class CVSMaildirSource(MaildirSource): 78 name = "CVSMaildirSource" 79 80 def __init__(self, maildir, prefix=None, category='', 81 repository='', properties=None): 82 super().__init__(maildir, prefix, category, repository) 83 if properties is None: 84 properties = {} 85 self.properties = properties 86 87 def parse(self, m, prefix=None): 88 """Parse messages sent by the 'buildbot-cvs-mail' program. 89 """ 90 # The mail is sent from the person doing the checkin. Assume that the 91 # local username is enough to identify them (this assumes a one-server 92 # cvs-over-rsh environment rather than the server-dirs-shared-over-NFS 93 # model) 94 name, addr = parseaddr(m["from"]) 95 if not addr: 96 # no From means this message isn't from buildbot-cvs-mail 97 return None 98 at = addr.find("@") 99 if at == -1: 100 author = addr # might still be useful 101 else: 102 author = addr[:at] 103 author = util.bytes2unicode(author, encoding="ascii") 104 105 # CVS accepts RFC822 dates. buildbot-cvs-mail adds the date as 106 # part of the mail header, so use that. 107 # This assumes cvs is being access via ssh or pserver, so the time 108 # will be the CVS server's time. 109 110 # calculate a "revision" based on that timestamp, or the current time 111 # if we're unable to parse the date. 112 log.msg('Processing CVS mail') 113 dateTuple = parsedate_tz(m["date"]) 114 if dateTuple is None: 115 when = util.now() 116 else: 117 when = mktime_tz(dateTuple) 118 119 theTime = datetime.datetime.utcfromtimestamp(float(when)) 120 rev = theTime.strftime('%Y-%m-%d %H:%M:%S') 121 122 catRE = re.compile(r'^Category:\s*(\S.*)') 123 cvsRE = re.compile(r'^CVSROOT:\s*(\S.*)') 124 cvsmodeRE = re.compile(r'^Cvsmode:\s*(\S.*)') 125 filesRE = re.compile(r'^Files:\s*(\S.*)') 126 modRE = re.compile(r'^Module:\s*(\S.*)') 127 pathRE = re.compile(r'^Path:\s*(\S.*)') 128 projRE = re.compile(r'^Project:\s*(\S.*)') 129 singleFileRE = re.compile(r'(.*) (NONE|\d(\.|\d)+) (NONE|\d(\.|\d)+)') 130 tagRE = re.compile(r'^\s+Tag:\s*(\S.*)') 131 updateRE = re.compile(r'^Update of:\s*(\S.*)') 132 comments = "" 133 branch = None 134 cvsroot = None 135 fileList = None 136 files = [] 137 isdir = 0 138 path = None 139 project = None 140 141 lines = list(body_line_iterator(m)) 142 while lines: 143 line = lines.pop(0) 144 m = catRE.match(line) 145 if m: 146 category = m.group(1) 147 continue 148 m = cvsRE.match(line) 149 if m: 150 cvsroot = m.group(1) 151 continue 152 m = cvsmodeRE.match(line) 153 if m: 154 cvsmode = m.group(1) 155 continue 156 m = filesRE.match(line) 157 if m: 158 fileList = m.group(1) 159 continue 160 m = modRE.match(line) 161 if m: 162 # We don't actually use this 163 # module = m.group(1) 164 continue 165 m = pathRE.match(line) 166 if m: 167 path = m.group(1) 168 continue 169 m = projRE.match(line) 170 if m: 171 project = m.group(1) 172 continue 173 m = tagRE.match(line) 174 if m: 175 branch = m.group(1) 176 continue 177 m = updateRE.match(line) 178 if m: 179 # We don't actually use this 180 # updateof = m.group(1) 181 continue 182 if line == "Log Message:\n": 183 break 184 185 # CVS 1.11 lists files as: 186 # repo/path file,old-version,new-version file2,old-version,new-version 187 # Version 1.12 lists files as: 188 # file1 old-version new-version file2 old-version new-version 189 # 190 # files consists of tuples of 'file-name old-version new-version' 191 # The versions are either dotted-decimal version numbers, ie 1.1 192 # or NONE. New files are of the form 'NONE NUMBER', while removed 193 # files are 'NUMBER NONE'. 'NONE' is a literal string 194 # Parsing this instead of files list in 'Added File:' etc 195 # makes it possible to handle files with embedded spaces, though 196 # it could fail if the filename was 'bad 1.1 1.2' 197 # For cvs version 1.11, we expect 198 # my_module new_file.c,NONE,1.1 199 # my_module removed.txt,1.2,NONE 200 # my_module modified_file.c,1.1,1.2 201 # While cvs version 1.12 gives us 202 # new_file.c NONE 1.1 203 # removed.txt 1.2 NONE 204 # modified_file.c 1.1,1.2 205 206 if fileList is None: 207 log.msg('CVSMaildirSource Mail with no files. Ignoring') 208 return None # We don't have any files. Email not from CVS 209 210 if cvsmode == '1.11': 211 # Please, no repo paths with spaces! 212 m = re.search('([^ ]*) ', fileList) 213 if m: 214 path = m.group(1) 215 else: 216 log.msg( 217 'CVSMaildirSource can\'t get path from file list. Ignoring mail') 218 return None 219 fileList = fileList[len(path):].strip() 220 singleFileRE = re.compile( 221 r'(.+?),(NONE|(?:\d+\.(?:\d+\.\d+\.)*\d+)),(NONE|(?:\d+\.(?:\d+\.\d+\.)*\d+))(?: |$)') # noqa pylint: disable=line-too-long 222 elif cvsmode == '1.12': 223 singleFileRE = re.compile( 224 r'(.+?) (NONE|(?:\d+\.(?:\d+\.\d+\.)*\d+)) (NONE|(?:\d+\.(?:\d+\.\d+\.)*\d+))(?: |$)') # noqa pylint: disable=line-too-long 225 if path is None: 226 raise ValueError( 227 'CVSMaildirSource cvs 1.12 require path. Check cvs loginfo config') 228 else: 229 raise ValueError('Expected cvsmode 1.11 or 1.12. got: {}'.format(cvsmode)) 230 231 log.msg("CVSMaildirSource processing filelist: {}".format(fileList)) 232 while(fileList): 233 m = singleFileRE.match(fileList) 234 if m: 235 curFile = path + '/' + m.group(1) 236 files.append(curFile) 237 fileList = fileList[m.end():] 238 else: 239 log.msg('CVSMaildirSource no files matched regex. Ignoring') 240 return None # bail - we couldn't parse the files that changed 241 # Now get comments 242 while lines: 243 line = lines.pop(0) 244 comments += line 245 246 comments = comments.rstrip() + "\n" 247 if comments == '\n': 248 comments = None 249 return ('cvs', dict(author=author, committer=None, files=files, comments=comments, 250 isdir=isdir, when=when, branch=branch, 251 revision=rev, category=category, 252 repository=cvsroot, project=project, 253 properties=self.properties)) 254 255# svn "commit-email.pl" handler. The format is very similar to freshcvs mail; 256# here's a sample: 257 258# From: username [at] apache.org [slightly obfuscated to avoid spam here] 259# To: commits [at] spamassassin.apache.org 260# Subject: svn commit: r105955 - in spamassassin/trunk: . lib/Mail 261# ... 262# 263# Author: username 264# Date: Sat Nov 20 00:17:49 2004 [note: TZ = local tz on server!] 265# New Revision: 105955 266# 267# Modified: [also Removed: and Added:] 268# [filename] 269# ... 270# Log: 271# [log message] 272# ... 273# 274# 275# Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm 276# [unified diff] 277# 278# [end of mail] 279 280 281class SVNCommitEmailMaildirSource(MaildirSource): 282 name = "SVN commit-email.pl" 283 284 def parse(self, m, prefix=None): 285 """Parse messages sent by the svn 'commit-email.pl' trigger. 286 """ 287 288 # The mail is sent from the person doing the checkin. Assume that the 289 # local username is enough to identify them (this assumes a one-server 290 # cvs-over-rsh environment rather than the server-dirs-shared-over-NFS 291 # model) 292 name, addr = parseaddr(m["from"]) 293 if not addr: 294 return None # no From means this message isn't from svn 295 at = addr.find("@") 296 if at == -1: 297 author = addr # might still be useful 298 else: 299 author = addr[:at] 300 301 # we take the time of receipt as the time of checkin. Not correct (it 302 # depends upon the email latency), but it avoids the 303 # out-of-order-changes issue. Also syncmail doesn't give us anything 304 # better to work with, unless you count pulling the v1-vs-v2 305 # timestamp out of the diffs, which would be ugly. TODO: Pulling the 306 # 'Date:' header from the mail is a possibility, and 307 # email.utils.parsedate_tz may be useful. It should be configurable, 308 # however, because there are a lot of broken clocks out there. 309 when = util.now() 310 311 files = [] 312 comments = "" 313 lines = list(body_line_iterator(m)) 314 rev = None 315 while lines: 316 line = lines.pop(0) 317 318 # "Author: jmason" 319 match = re.search(r"^Author: (\S+)", line) 320 if match: 321 author = match.group(1) 322 323 # "New Revision: 105955" 324 match = re.search(r"^New Revision: (\d+)", line) 325 if match: 326 rev = match.group(1) 327 328 # possible TODO: use "Date: ..." data here instead of time of 329 # commit message receipt, above. however, this timestamp is 330 # specified *without* a timezone, in the server's local TZ, so to 331 # be accurate buildbot would need a config setting to specify the 332 # source server's expected TZ setting! messy. 333 334 # this stanza ends with the "Log:" 335 if (line == "Log:\n"): 336 break 337 338 # commit message is terminated by the file-listing section 339 while lines: 340 line = lines.pop(0) 341 if line in ("Modified:\n", "Added:\n", "Removed:\n"): 342 break 343 comments += line 344 comments = comments.rstrip() + "\n" 345 346 while lines: 347 line = lines.pop(0) 348 if line == "\n": 349 break 350 if line.find("Modified:\n") == 0: 351 continue # ignore this line 352 if line.find("Added:\n") == 0: 353 continue # ignore this line 354 if line.find("Removed:\n") == 0: 355 continue # ignore this line 356 line = line.strip() 357 358 thesefiles = line.split(" ") 359 for f in thesefiles: 360 if prefix: 361 # insist that the file start with the prefix: we may get 362 # changes we don't care about too 363 if f.startswith(prefix): 364 f = f[len(prefix):] 365 else: 366 log.msg(("ignored file from svn commit: prefix '{}' " 367 "does not match filename '{}'").format(prefix, f)) 368 continue 369 370 # TODO: figure out how new directories are described, set 371 # .isdir 372 files.append(f) 373 374 if not files: 375 log.msg("no matching files found, ignoring commit") 376 return None 377 378 return ('svn', dict(author=author, committer=None, files=files, comments=comments, 379 when=when, revision=rev)) 380 381# bzr Launchpad branch subscription mails. Sample mail: 382# 383# From: noreply@launchpad.net 384# Subject: [Branch ~knielsen/maria/tmp-buildbot-test] Rev 2701: test add file 385# To: Joe <joe@acme.com> 386# ... 387# 388# ------------------------------------------------------------ 389# revno: 2701 390# committer: Joe <joe@acme.com> 391# branch nick: tmpbb 392# timestamp: Fri 2009-05-15 10:35:43 +0200 393# message: 394# test add file 395# added: 396# test-add-file 397# 398# 399# -- 400# 401# https://code.launchpad.net/~knielsen/maria/tmp-buildbot-test 402# 403# You are subscribed to branch lp:~knielsen/maria/tmp-buildbot-test. 404# To unsubscribe from this branch go to 405# https://code.launchpad.net/~knielsen/maria/tmp-buildbot-test/+edit-subscription. 406# 407# [end of mail] 408 409 410class BzrLaunchpadEmailMaildirSource(MaildirSource): 411 name = "Launchpad" 412 413 compare_attrs = ("branchMap", "defaultBranch") 414 415 def __init__(self, maildir, prefix=None, branchMap=None, defaultBranch=None, **kwargs): 416 self.branchMap = branchMap 417 self.defaultBranch = defaultBranch 418 super().__init__(maildir, prefix, **kwargs) 419 420 def parse(self, m, prefix=None): 421 """Parse branch notification messages sent by Launchpad. 422 """ 423 424 subject = m["subject"] 425 match = re.search(r"^\s*\[Branch\s+([^]]+)\]", subject) 426 if match: 427 repository = match.group(1) 428 else: 429 repository = None 430 431 # Put these into a dictionary, otherwise we cannot assign them 432 # from nested function definitions. 433 d = {'files': [], 'comments': ""} 434 gobbler = None 435 rev = None 436 author = None 437 when = util.now() 438 439 def gobble_comment(s): 440 d['comments'] += s + "\n" 441 442 def gobble_removed(s): 443 d['files'].append('{} REMOVED'.format(s)) 444 445 def gobble_added(s): 446 d['files'].append('{} ADDED'.format(s)) 447 448 def gobble_modified(s): 449 d['files'].append('{} MODIFIED'.format(s)) 450 451 def gobble_renamed(s): 452 match = re.search(r"^(.+) => (.+)$", s) 453 if match: 454 d['files'].append('{} RENAMED {}'.format(match.group(1), match.group(2))) 455 else: 456 d['files'].append('{} RENAMED'.format(s)) 457 458 lines = list(body_line_iterator(m, True)) 459 rev = None 460 while lines: 461 line = str(lines.pop(0), "utf-8", errors="ignore") 462 463 # revno: 101 464 match = re.search(r"^revno: ([0-9.]+)", line) 465 if match: 466 rev = match.group(1) 467 468 # committer: Joe <joe@acme.com> 469 match = re.search(r"^committer: (.*)$", line) 470 if match: 471 author = match.group(1) 472 473 # timestamp: Fri 2009-05-15 10:35:43 +0200 474 # datetime.strptime() is supposed to support %z for time zone, but 475 # it does not seem to work. So handle the time zone manually. 476 match = re.search( 477 r"^timestamp: [a-zA-Z]{3} (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) ([-+])(\d{2})(\d{2})$", line) # noqa pylint: disable=line-too-long 478 if match: 479 datestr = match.group(1) 480 tz_sign = match.group(2) 481 tz_hours = match.group(3) 482 tz_minutes = match.group(4) 483 when = parseLaunchpadDate( 484 datestr, tz_sign, tz_hours, tz_minutes) 485 486 if re.search(r"^message:\s*$", line): 487 gobbler = gobble_comment 488 elif re.search(r"^removed:\s*$", line): 489 gobbler = gobble_removed 490 elif re.search(r"^added:\s*$", line): 491 gobbler = gobble_added 492 elif re.search(r"^renamed:\s*$", line): 493 gobbler = gobble_renamed 494 elif re.search(r"^modified:\s*$", line): 495 gobbler = gobble_modified 496 elif re.search(r"^ ", line) and gobbler: 497 gobbler(line[2:-1]) # Use :-1 to gobble trailing newline 498 499 # Determine the name of the branch. 500 branch = None 501 if self.branchMap and repository: 502 if repository in self.branchMap: 503 branch = self.branchMap[repository] 504 elif ("lp:" + repository) in self.branchMap: 505 branch = self.branchMap['lp:' + repository] 506 if not branch: 507 if self.defaultBranch: 508 branch = self.defaultBranch 509 else: 510 if repository: 511 branch = 'lp:' + repository 512 else: 513 branch = None 514 515 if rev and author: 516 return ('bzr', dict(author=author, committer=None, files=d['files'], 517 comments=d['comments'], 518 when=when, revision=rev, 519 branch=branch, repository=repository or '')) 520 return None 521 522 523def parseLaunchpadDate(datestr, tz_sign, tz_hours, tz_minutes): 524 time_no_tz = calendar.timegm(time.strptime(datestr, "%Y-%m-%d %H:%M:%S")) 525 tz_delta = 60 * 60 * int(tz_sign + tz_hours) + 60 * int(tz_minutes) 526 return time_no_tz - tz_delta 527