1#!/usr/bin/env python
2#
3#
4# Licensed to the Apache Software Foundation (ASF) under one or more
5# contributor license agreements.  See the NOTICE file distributed with
6# this work for additional information regarding copyright ownership.
7# The ASF licenses this file to You under the Apache License, Version 2.0
8# (the "License"); you may not use this file except in compliance with
9# the License.  You may obtain a copy of the License at
10#
11#     http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18#
19
20# IrkerBridge - Bridge an SvnPubSub stream to Irker.
21
22# Example:
23#  irkerbridge.py --daemon --pidfile pid --logfile log config
24#
25# For detailed option help use:
26#  irkerbridge.py --help
27
28# It expects a config file that has the following parameters:
29# streams=url
30#   Space separated list of URLs to streams.
31#   This option should only be in the DEFAULT section, is ignored in
32#   all other sections.
33# irker=hostname:port
34#   The hostname/port combination of the irker daemon.  If port is
35#   omitted it defaults to 6659.  Irker is connected to over UDP.
36# match=What to use to decide if the commit should be sent to irker.
37#   It consists of the repository UUID followed by a slash and a glob pattern.
38#   The UUID may be replaced by a * to match all UUIDs. The glob pattern will
39#   be matched against all of the dirs_changed.  Both the UUID and the glob
40#   pattern must match to send the message to irker.
41# to=url
42#   Space separated list of URLs (any URL that Irker will accept) to
43#   send the resulting message to.  At current Irker only supports IRC.
44# template=string
45#   A string to use to format the output.  The string is a Python
46#   string Template.  The following variables are available:
47#   $committer, $id, $date, $repository, $log, $log_firstline,
48#   $log_firstparagraph, $dirs_changed, $dirs_count, $dirs_count_s,
49#   $subdirs_count, $subdirs_count_s, $dirs_root
50#   Most of them should be self explanatory.  $dirs_count is the number of
51#   entries in $dirs_changed, $dirs_count_s is a friendly string version,
52#   $dirs_root is the common root of all the $dirs_changed, $subdirs_count
53#   is the number of subdirs under the $dirs_root that changed,
54#   $subdirs_root_s is a friendly string version. $log_firstparagraph cuts
55#   the log message at the first blank line and replaces newlines with spaces.
56#
57# Within the config file you have sections.  Any configuration option
58# missing from a given section is found in the [DEFAULT] section.
59#
60# Section names are arbitrary names that mean nothing to the bridge.  Each
61# section other than the [DEFAULT] section consists of a configuration that
62# may match and send a message to irker to deliver.  All matching sections
63# will generate a message.
64#
65# Interpolation of values within the config file is allowed by including
66# %(name)s within a value.  For example I can reference the UUID of a repo
67# repeatedly by doing:
68# [DEFAULT]
69# ASF_REPO=13f79535-47bb-0310-9956-ffa450edef68
70#
71# [#commits]
72# match=%(ASF_REPO)s/
73#
74# You can HUP the process to reload the config file without restarting the
75# process.  However, you cannot change the streams it is listening to without
76# restarting the process.
77#
78# TODO: Logging in a better way.
79
80# Messages longer than this will be truncated and ... added to the end such
81# that the resulting message is no longer than this:
82MAX_PRIVMSG = 400
83
84import os
85import sys
86import posixpath
87import socket
88import json
89import optparse
90import ConfigParser
91import traceback
92import signal
93import re
94import fnmatch
95from string import Template
96
97try:
98  # Python >=3.0
99  from urllib.parse import urlparse
100except ImportError:
101  # Python <3.0
102  from urlparse import urlparse
103
104
105# Packages that come with svnpubsub
106import svnpubsub.client
107import daemonize
108
109class Daemon(daemonize.Daemon):
110  def __init__(self, logfile, pidfile, bdec):
111    daemonize.Daemon.__init__(self, logfile, pidfile)
112
113    self.bdec = bdec
114
115  def setup(self):
116    # There is no setup which the parent needs to wait for.
117    pass
118
119  def run(self):
120    print('irkerbridge started, pid=%d' % (os.getpid()))
121
122    mc = svnpubsub.client.MultiClient(self.bdec.urls,
123                                      self.bdec.commit,
124                                      self.bdec.event)
125    mc.run_forever()
126
127
128class BigDoEverythingClass(object):
129  def __init__(self, config, options):
130    self.config = config
131    self.options = options
132    self.urls = config.get_value('streams').split()
133
134  def locate_matching_configs(self, commit):
135    result = [ ]
136    for section in self.config.sections():
137      match = self.config.get(section, "match").split('/', 1)
138      if len(match) < 2:
139        # No slash so assume all paths
140        match.append('*')
141      match_uuid, match_path = match
142      if commit.repository == match_uuid or match_uuid == "*":
143        for path in commit.changed:
144          if fnmatch.fnmatch(path, match_path):
145            result.append(section)
146            break
147    return result
148
149  def _generate_dirs_changed(self, commit):
150    if hasattr(commit, 'dirs_changed') or not hasattr(commit, 'changed'):
151      return
152
153    dirs_changed = set()
154    for p in commit.changed:
155      if p[-1] == '/' and commit.changed[p]['flags'][1] == 'U':
156        # directory with property changes add the directory itself.
157        dirs_changed.add(p)
158      else:
159        # everything else add the parent of the path
160        # directories have a trailing slash so if it's present remove
161        # it before finding the parent.  The result will be a directory
162        # so it needs a trailing slash
163        dirs_changed.add(posixpath.dirname(p.rstrip('/')) + '/')
164
165    commit.dirs_changed = dirs_changed
166    return
167
168  def fill_in_extra_args(self, commit):
169    # Set any empty members to the string "<null>"
170    v = vars(commit)
171    for k in v.keys():
172      if not v[k]:
173        v[k] = '<null>'
174
175    self._generate_dirs_changed(commit)
176    # Add entries to the commit object that are useful for
177    # formatting.
178    commit.log_firstline = commit.log.split("\n",1)[0]
179    commit.log_firstparagraph = re.split("\r?\n\r?\n",commit.log,1)[0]
180    commit.log_firstparagraph = re.sub("\r?\n"," ",commit.log_firstparagraph)
181    if commit.dirs_changed:
182      commit.dirs_root = posixpath.commonprefix(commit.dirs_changed)
183      if commit.dirs_root == '':
184        commit.dirs_root = '/'
185      commit.dirs_count = len(commit.dirs_changed)
186      if commit.dirs_count > 1:
187        commit.dirs_count_s = " (%d dirs)" %(commit.dirs_count)
188      else:
189        commit.dirs_count_s = ""
190
191      commit.subdirs_count = commit.dirs_count
192      if commit.dirs_root in commit.dirs_changed:
193        commit.subdirs_count -= 1
194      if commit.subdirs_count >= 1:
195        commit.subdirs_count_s = " + %d subdirs" % (commit.subdirs_count)
196      else:
197        commit.subdirs_count_s = ""
198
199  def _send(self, irker, msg):
200    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
201    irker_list = irker.split(':')
202    if len(irker_list) < 2:
203      irker_list.append(6659)
204    json_msg = json.dumps(msg)
205    sock.sendto(json_msg, (irker_list[0],int(irker_list[1])))
206    if self.options.verbose:
207      print("SENT: %s to %s" % (json_msg, irker))
208
209  def join_all(self):
210    # Like self.commit(), but ignores self.config.get(section, "template").
211    for section in self.config.sections():
212      irker = self.config.get(section, "irker")
213      to_list = self.config.get(section, "to").split()
214      if not irker or not to_list:
215        continue
216      for to in to_list:
217        msg = {'to': to, 'privmsg': ''}
218        self._send(irker, msg)
219
220  def commit(self, url, commit):
221    if self.options.verbose:
222      print("RECV: from %s" % url)
223      print(json.dumps(vars(commit), indent=2))
224
225    try:
226      config_sections = self.locate_matching_configs(commit)
227      if len(config_sections) > 0:
228        self.fill_in_extra_args(commit)
229        for section in config_sections:
230          irker = self.config.get(section, "irker")
231          to_list = self.config.get(section, "to").split()
232          template = self.config.get(section, "template")
233          if not irker or not to_list or not template:
234            continue
235          privmsg = Template(template).safe_substitute(vars(commit))
236          if len(privmsg) > MAX_PRIVMSG:
237            privmsg = privmsg[:MAX_PRIVMSG-3] + '...'
238          for to in to_list:
239            msg = {'to': to, 'privmsg': privmsg}
240            self._send(irker, msg)
241
242    except:
243      print("Unexpected error:")
244      traceback.print_exc()
245      sys.stdout.flush()
246      raise
247
248  def event(self, url, event_name, event_arg):
249    if self.options.verbose or event_name != "ping":
250      print('EVENT: %s from %s' % (event_name, url))
251      sys.stdout.flush()
252
253
254
255class ReloadableConfig(ConfigParser.SafeConfigParser):
256  def __init__(self, fname):
257    ConfigParser.SafeConfigParser.__init__(self)
258
259    self.fname = fname
260    self.read(fname)
261
262    signal.signal(signal.SIGHUP, self.hangup)
263
264  def hangup(self, signalnum, frame):
265    self.reload()
266
267  def reload(self):
268    print("RELOAD: config file: %s" % self.fname)
269    sys.stdout.flush()
270
271    # Delete everything. Just re-reading would overlay, and would not
272    # remove sections/options. Note that [DEFAULT] will not be removed.
273    for section in self.sections():
274      self.remove_section(section)
275
276    # Get rid of [DEFAULT]
277    self.remove_section(ConfigParser.DEFAULTSECT)
278
279    # Now re-read the configuration file.
280    self.read(self.fname)
281
282  def get_value(self, which):
283    return self.get(ConfigParser.DEFAULTSECT, which)
284
285
286def main(args):
287  parser = optparse.OptionParser(
288      description='An SvnPubSub client that bridges the data to irker.',
289      usage='Usage: %prog [options] CONFIG_FILE',
290      )
291  parser.add_option('--logfile',
292      help='filename for logging')
293  parser.add_option('--verbose', action='store_true',
294      help="enable verbose logging")
295  parser.add_option('--pidfile',
296      help="the process' PID will be written to this file")
297  parser.add_option('--daemon', action='store_true',
298      help='run as a background daemon')
299
300  options, extra = parser.parse_args(args)
301
302  if len(extra) != 1:
303    parser.error('CONFIG_FILE is requried')
304  config_file = os.path.abspath(extra[0])
305
306  logfile, pidfile = None, None
307  if options.daemon:
308    if options.logfile:
309      logfile = os.path.abspath(options.logfile)
310    else:
311      parser.error('LOGFILE is required when running as a daemon')
312
313    if options.pidfile:
314      pidfile = os.path.abspath(options.pidfile)
315    else:
316      parser.error('PIDFILE is required when running as a daemon')
317
318
319  config = ReloadableConfig(config_file)
320  bdec = BigDoEverythingClass(config, options)
321
322  d = Daemon(logfile, pidfile, bdec)
323  if options.daemon:
324    d.daemonize_exit()
325  else:
326    d.foreground()
327
328if __name__ == "__main__":
329  main(sys.argv[1:])
330