1# -*-python-*-
2#
3# Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved.
4#
5# By using this file, you agree to the terms and conditions set forth in
6# the LICENSE.html file which can be found at the top level of the ViewVC
7# distribution or at http://viewvc.org/license-1.html.
8#
9# For more information, visit http://viewvc.org/
10#
11# -----------------------------------------------------------------------
12#
13# viewvc: View CVS/SVN repositories via a web browser
14#
15# -----------------------------------------------------------------------
16from __future__ import print_function
17
18__version__ = '1.3.0-dev'
19
20# Standard modules that we know are in the path or builtin.
21import sys
22import os
23import calendar
24import copy
25import fnmatch
26import gzip
27import mimetypes
28import re
29import email.utils
30import stat
31import struct
32import tempfile
33import time
34import functools
35import io
36import popen
37from urllib.parse import urlencode as _urlencode, quote as _quote
38import subprocess
39
40# These modules come from our library (the stub has set up the path)
41from common import (ViewVCException, get_exception_data, print_exception_data,
42                    _RCSDIFF_NO_CHANGES, _RCSDIFF_IS_BINARY, _RCSDIFF_ERROR,
43                    TemplateData, _item)
44import accept
45import config
46import ezt
47import sapi
48import vcauth
49import vclib
50import vclib.ccvs
51import vclib.svn
52
53try:
54  import idiff
55except (SyntaxError, ImportError):
56  idiff = None
57
58# Initialize the system tracebacklimit value to 0, meaning stack
59# traces will carry only the top-level exception string.  This can be
60# overridden via configuration.
61sys.tracebacklimit = 0
62
63#########################################################################
64
65docroot_magic_path = '*docroot*'
66viewcvs_mime_type = 'text/vnd.viewcvs-markup'
67alt_mime_type = 'text/x-cvsweb-markup'
68view_roots_magic = '*viewroots*'
69
70# Put here the variables we need in order to hold our state - they
71# will be added (with their current value) to (almost) any link/query
72# string you construct.
73_sticky_vars = [
74  'hideattic',
75  'sortby',
76  'sortdir',
77  'logsort',
78  'diff_format',
79  'search',
80  'limit_changes',
81  ]
82
83# for reading/writing between a couple descriptors
84CHUNK_SIZE = 8192
85
86# special characters that don't need to be URL encoded
87_URL_SAFE_CHARS = "/*~"
88
89
90# Python 3: workaround for cmp()
91def cmp(a, b):
92  return (a > b) - (a < b)
93
94class TextIOWrapper_noclose(io.TextIOWrapper):
95  """Custom TextIOWrapper class which doesn't close underlaying IO object when
96  close() is called or this object is destroyed."""
97  def close(self):
98    if not self.closed:
99      self.closed = True
100      self.flush()
101      self.detach()
102
103class Request:
104  def __init__(self, server, cfg):
105    self.server = server
106    self.cfg = cfg
107
108    self.script_name = _normalize_path(server.getenv('SCRIPT_NAME', ''))
109    self.browser = server.getenv('HTTP_USER_AGENT', 'unknown')
110
111    # process the Accept-Language: header, and load the key/value
112    # files, given the selected language
113    hal = server.getenv('HTTP_ACCEPT_LANGUAGE','')
114    try:
115      self.lang_selector = accept.language(hal)
116    except accept.AcceptLanguageParseError:
117      self.lang_selector = accept.language('en')
118    self.language = self.lang_selector.select_from(cfg.general.languages)
119    self.kv = cfg.load_kv_files(self.language)
120
121    # check for an authenticated username
122    self.username = server.getenv('REMOTE_USER')
123
124    # if we allow compressed output, see if the client does too
125    self.gzip_compress_level = 0
126    if cfg.options.allow_compress:
127      http_accept_encoding = os.environ.get("HTTP_ACCEPT_ENCODING", "")
128      if "gzip" in [x.strip() for x in http_accept_encoding.split(',')]:
129        self.gzip_compress_level = 9  # make this configurable?
130
131  def run_viewvc(self):
132
133    cfg = self.cfg
134
135    # This function first parses the query string and sets the following
136    # variables. Then it executes the request.
137    self.view_func = None  # function to call to process the request
138    self.repos = None      # object representing current repository
139    self.rootname = None   # name of current root (as used in viewvc.conf)
140    self.roottype = None   # current root type ('svn' or 'cvs')
141    self.rootpath = None   # physical path to current root
142    self.pathtype = None   # type of path, either vclib.FILE or vclib.DIR
143    self.where = None      # path to file or directory in current root
144    self.query_dict = {}   # validated and cleaned up query options
145    self.path_parts = None # for convenience, equals where.split('/')
146    self.pathrev = None    # current path revision or tag
147    self.auth = None       # authorizer module in use
148
149    # redirect if we're loading from a valid but irregular URL
150    # These redirects aren't neccessary to make ViewVC work, it functions
151    # just fine without them, but they make it easier for server admins to
152    # implement access restrictions based on URL
153    needs_redirect = 0
154
155    # Process the query params
156    for name, values in self.server.params().items():
157      # we only care about the first value
158      value = values[0]
159
160      # patch up old queries that use 'cvsroot' to look like they used 'root'
161      if name == 'cvsroot':
162        name = 'root'
163        needs_redirect = 1
164
165      # same for 'only_with_tag' and 'pathrev'
166      if name == 'only_with_tag':
167        name = 'pathrev'
168        needs_redirect = 1
169
170      # redirect view=rev to view=revision, too
171      if name == 'view' and value == 'rev':
172        value = 'revision'
173        needs_redirect = 1
174
175      # validate the parameter
176      _validate_param(name, value)
177
178      # if we're here, then the parameter is okay
179      self.query_dict[name] = value
180
181    # Resolve the view parameter into a handler function.
182    self.view_func = _views.get(self.query_dict.get('view', None),
183                                self.view_func)
184
185    # Process PATH_INFO component of query string
186    path_info = self.server.getenv('PATH_INFO', '')
187
188    # clean it up. this removes duplicate '/' characters and any that may
189    # exist at the front or end of the path.
190    ### we might want to redirect to the cleaned up URL
191    path_parts = _path_parts(path_info)
192
193    if path_parts:
194      # handle docroot magic path prefixes
195      if path_parts[0] == docroot_magic_path:
196        # if this is just a simple hunk of doc, then serve it up
197        self.where = _path_join(path_parts[1:])
198        return view_doc(self)
199      # handle tarball magic suffixes
200      if self.view_func is download_tarball:
201        if (self.query_dict.get('parent')):
202          del path_parts[-1]
203        elif path_parts[-1][-7:] == ".tar.gz":
204          path_parts[-1] = path_parts[-1][:-7]
205
206    # Figure out root name
207    self.rootname = self.query_dict.get('root')
208    if self.rootname == view_roots_magic:
209      del self.query_dict['root']
210      self.rootname = ""
211      needs_redirect = 1
212    elif self.rootname is None:
213      if cfg.options.root_as_url_component:
214        if path_parts:
215          roottype, rootpath, self.rootname, new_path_parts = \
216                  locate_root_from_path(cfg, path_parts)
217          if roottype is None:
218            # Perhaps the root name is candidate for renaming...
219            # Take care of old-new roots mapping
220            for old_root, new_root in cfg.general.renamed_roots.items():
221              pp = _path_parts(old_root)
222              if _path_starts_with(path_parts, pp):
223                path_parts = path_parts[len(pp):]
224                self.rootname = new_root
225                needs_redirect = 1
226            if self.rootname is None:
227              # Not found; interpret whole path as root, to show as error
228              self.rootname = _path_join(path_parts)
229              path_parts = []
230          else:
231            path_parts = new_path_parts
232        else:
233          self.rootname = ""
234      elif self.view_func != view_roots:
235        self.rootname = cfg.general.default_root
236    elif cfg.options.root_as_url_component:
237      needs_redirect = 1
238
239    # Take care of old-new roots mapping
240    for old_root, new_root in cfg.general.renamed_roots.items():
241      if self.rootname == old_root:
242        self.rootname = new_root
243        needs_redirect = 1
244
245    self.where = _path_join(path_parts)
246    self.path_parts = path_parts
247
248    if self.rootname:
249      roottype, rootpath = locate_root(cfg, self.rootname)
250      if roottype:
251        # Overlay root-specific options.
252        cfg.overlay_root_options(self.rootname)
253
254        # Setup an Authorizer for this rootname and username
255        self.auth = setup_authorizer(cfg, self.username)
256
257        # Create the repository object
258        try:
259          if roottype == 'cvs':
260            self.rootpath = vclib.ccvs.canonicalize_rootpath(rootpath)
261            self.repos = vclib.ccvs.CVSRepository(self.rootname,
262                                                  self.rootpath,
263                                                  self.auth,
264                                                  cfg.utilities,
265                                                  cfg.options.use_rcsparse,
266                                                  cfg.options.default_encoding)
267            # required so that spawned rcs programs correctly expand
268            # $CVSHeader$
269            os.environ['CVSROOT'] = self.rootpath
270          elif roottype == 'svn':
271            self.rootpath = vclib.svn.canonicalize_rootpath(rootpath)
272            self.repos = vclib.svn.SubversionRepository(self.rootname,
273                                                        self.rootpath,
274                                                        self.auth,
275                                                        cfg.utilities,
276                                                        cfg.options.svn_config_dir,
277                                                        cfg.options.default_encoding)
278          else:
279            raise vclib.ReposNotFound()
280        except vclib.ReposNotFound:
281          pass
282      if self.repos is None:
283        raise ViewVCException(
284          'The root "%s" is unknown. If you believe the value is '
285          'correct, then please double-check your configuration.'
286          % self.rootname, "404 Not Found")
287
288    if self.repos:
289      self.repos.open()
290      vctype = self.repos.roottype()
291      if vctype == vclib.SVN:
292        self.roottype = 'svn'
293      elif vctype == vclib.CVS:
294        self.roottype = 'cvs'
295      else:
296        raise ViewVCException(
297          'The root "%s" has an unknown type ("%s").  Expected "cvs" or "svn".'
298          % (self.rootname, vctype),
299          "500 Internal Server Error")
300
301    # If this is using an old-style 'rev' parameter, redirect to new hotness.
302    # Subversion URLs will now use 'pathrev'; CVS ones use 'revision'.
303    if self.repos and 'rev' in self.query_dict:
304      if self.roottype == 'svn' \
305             and 'pathrev' not in self.query_dict \
306             and not self.view_func == view_revision:
307        self.query_dict['pathrev'] = self.query_dict['rev']
308        del self.query_dict['rev']
309      else: # elif 'revision' not in self.query_dict: ?
310        self.query_dict['revision'] = self.query_dict['rev']
311        del self.query_dict['rev']
312      needs_redirect = 1
313
314    if self.repos and self.view_func is not redirect_pathrev:
315      # If this is an intended-to-be-hidden CVSROOT path, complain.
316      if cfg.options.hide_cvsroot \
317         and is_cvsroot_path(self.roottype, path_parts):
318        raise ViewVCException("Unknown location: /%s" % self.where,
319                              "404 Not Found")
320
321      # Make sure path exists
322      self.pathrev = pathrev = self.query_dict.get('pathrev')
323      self.pathtype = _repos_pathtype(self.repos, path_parts, pathrev)
324
325      if self.pathtype is None:
326        # Path doesn't exist, see if it could be an old-style ViewVC URL
327        # with a fake suffix.
328        result = _strip_suffix('.diff', path_parts, pathrev, vclib.FILE,     \
329                               self.repos, view_diff) or                     \
330                 _strip_suffix('.tar.gz', path_parts, pathrev, vclib.DIR,    \
331                               self.repos, download_tarball) or              \
332                 _strip_suffix('root.tar.gz', path_parts, pathrev, vclib.DIR,\
333                               self.repos, download_tarball) or              \
334                 _strip_suffix(self.rootname + '-root.tar.gz',               \
335                               path_parts, pathrev, vclib.DIR,               \
336                               self.repos, download_tarball) or              \
337                 _strip_suffix('root',                                       \
338                               path_parts, pathrev, vclib.DIR,               \
339                               self.repos, download_tarball) or              \
340                 _strip_suffix(self.rootname + '-root',                      \
341                               path_parts, pathrev, vclib.DIR,               \
342                               self.repos, download_tarball)
343        if result:
344          self.path_parts, self.pathtype, self.view_func = result
345          self.where = _path_join(self.path_parts)
346          needs_redirect = 1
347        else:
348          raise ViewVCException("Unknown location: /%s" % self.where,
349                                "404 Not Found")
350
351      # If we have an old ViewCVS Attic URL which is still valid, redirect
352      if self.roottype == 'cvs':
353        attic_parts = None
354        if (self.pathtype == vclib.FILE and len(self.path_parts) > 1
355            and self.path_parts[-2] == 'Attic'):
356          attic_parts = self.path_parts[:-2] + self.path_parts[-1:]
357        elif (self.pathtype == vclib.DIR and len(self.path_parts) > 0
358              and self.path_parts[-1] == 'Attic'):
359          attic_parts = self.path_parts[:-1]
360        if attic_parts:
361          self.path_parts = attic_parts
362          self.where = _path_join(attic_parts)
363          needs_redirect = 1
364
365    if self.view_func is None:
366      # view parameter is not set, try looking at pathtype and the
367      # other parameters
368      if not self.rootname:
369        self.view_func = view_roots
370      elif self.pathtype == vclib.DIR:
371        # ViewCVS 0.9.2 used to put ?tarball=1 at the end of tarball urls
372        if 'tarball' in self.query_dict:
373          self.view_func = download_tarball
374        elif 'r1' in self.query_dict and 'r2' in self.query_dict:
375          self.view_func = view_diff
376        else:
377          self.view_func = view_directory
378      elif self.pathtype == vclib.FILE:
379        if 'r1' in self.query_dict and 'r2' in self.query_dict:
380          self.view_func = view_diff
381        elif 'annotate' in self.query_dict:
382          self.view_func = view_annotate
383        elif 'graph' in self.query_dict:
384          if 'makeimage' not in self.query_dict:
385            self.view_func = view_cvsgraph
386          else:
387            self.view_func = view_cvsgraph_image
388        elif 'revision' in self.query_dict \
389                 or cfg.options.default_file_view != "log":
390          if cfg.options.default_file_view == "markup" \
391             or self.query_dict.get('content-type', None) \
392                 in (viewcvs_mime_type, alt_mime_type):
393            self.view_func = view_markup
394          else:
395            self.view_func = view_checkout
396        else:
397          self.view_func = view_log
398
399    # If we've chosen the roots or revision view, our effective
400    # location is not really "inside" the repository, so we have no
401    # path and therefore no path parts or type, either.
402    if self.view_func is view_revision or self.view_func is view_roots:
403      self.where = ''
404      self.path_parts = []
405      self.pathtype = None
406
407    # if we have a directory and the request didn't end in "/", then redirect
408    # so that it does.
409    if (self.pathtype == vclib.DIR and path_info[-1:] != '/'
410        and self.view_func is not download_tarball
411        and self.view_func is not redirect_pathrev):
412      needs_redirect = 1
413
414    # startup is done now.
415
416    # If we need to redirect, do so.  Otherwise, handle our requested view.
417    if needs_redirect:
418      self.server.redirect(self.get_url())
419    else:
420      self.view_func(self)
421
422  def get_url(self, escape=0, partial=0, prefix=0, **args):
423    """Constructs a link to another ViewVC page just like the get_link
424    function except that it returns a single URL instead of a URL
425    split into components.  If PREFIX is set, include the protocol and
426    server name portions of the URL."""
427
428    url, params = self.get_link(*(), **args)
429    qs = _urlencode(params)
430    if qs:
431      result = _quote(url, _URL_SAFE_CHARS,
432                      'utf-8', 'surrogateescape') + '?' + qs
433    else:
434      result = _quote(url, _URL_SAFE_CHARS,
435                      'utf-8', 'surrogateescape')
436
437    if partial:
438      result = result + (qs and '&' or '?')
439    if escape:
440      result = self.server.escape(result)
441    if prefix:
442      result = '%s://%s%s' % \
443               (self.server.getenv("HTTPS") == "on" and "https" or "http",
444                self.server.getenv("HTTP_HOST"),
445                result)
446    return result
447
448  def get_form(self, **args):
449    """Constructs a link to another ViewVC page just like the get_link
450    function except that it returns a base URL suitable for use as an
451    HTML form action, and an iterable object with .name and .value
452    attributes representing stuff that should be in <input
453    type=hidden> tags with the link parameters."""
454
455    url, params = self.get_link(*(), **args)
456    action = self.server.escape(_quote(url, _URL_SAFE_CHARS,
457                                       'utf-8', 'surrogateescape'))
458    hidden_values = []
459    for name, value in params.items():
460      hidden_values.append(_item(name=self.server.escape(name),
461                                 value=self.server.escape(value)))
462    return action, hidden_values
463
464  def get_link(self, view_func=None, where=None, pathtype=None, params=None):
465    """Constructs a link pointing to another ViewVC page. All arguments
466    correspond to members of the Request object. If they are set to
467    None they take values from the current page. Return value is a base
468    URL and a dictionary of parameters"""
469
470    cfg = self.cfg
471
472    if view_func is None:
473      view_func = self.view_func
474
475    if params is None:
476      params = self.query_dict.copy()
477    else:
478      params = params.copy()
479
480    # must specify both where and pathtype or neither
481    assert (where is None) == (pathtype is None)
482
483    # if we are asking for the revision info view, we don't need any
484    # path information
485    if (view_func is view_revision or view_func is view_roots
486        or view_func is redirect_pathrev):
487      where = pathtype = None
488    elif where is None:
489      where = self.where
490      pathtype = self.pathtype
491
492    # no need to add sticky variables for views with no links
493    sticky_vars = not (view_func is view_checkout
494                       or view_func is download_tarball)
495
496    # The logic used to construct the URL is an inverse of the
497    # logic used to interpret URLs in Request.run_viewvc
498
499    url = self.script_name
500
501    # add root to url
502    rootname = None
503    if view_func is not view_roots:
504      if cfg.options.root_as_url_component:
505        # remove root from parameter list if present
506        try:
507          rootname = params['root']
508        except KeyError:
509          rootname = self.rootname
510        else:
511          del params['root']
512
513        # add root path component
514        if rootname is not None:
515          url = url + '/' + rootname
516
517      else:
518        # add root to parameter list
519        try:
520          rootname = params['root']
521        except KeyError:
522          rootname = params['root'] = self.rootname
523
524        # no need to specify default root
525        if rootname == cfg.general.default_root:
526          del params['root']
527
528    # add 'pathrev' value to parameter list
529    if (self.pathrev is not None
530        and 'pathrev' not in params
531        and view_func is not view_revision
532        and rootname == self.rootname):
533      params['pathrev'] = self.pathrev
534
535    # add path
536    if where:
537      url = url + '/' + where
538
539    # add trailing slash for a directory
540    if pathtype == vclib.DIR:
541      url = url + '/'
542
543    # normalize top level URLs for use in Location headers and A tags
544    elif not url:
545      url = '/'
546
547    # no need to explicitly specify directory view for a directory
548    if view_func is view_directory and pathtype == vclib.DIR:
549      view_func = None
550
551    # no need to explicitly specify roots view when in root_as_url
552    # mode or there's no default root
553    if view_func is view_roots and (cfg.options.root_as_url_component
554                                    or not cfg.general.default_root):
555      view_func = None
556
557    # no need to explicitly specify annotate view when
558    # there's an annotate parameter
559    if view_func is view_annotate and params.get('annotate') is not None:
560      view_func = None
561
562    # no need to explicitly specify diff view when
563    # there's r1 and r2 parameters
564    if (view_func is view_diff and params.get('r1') is not None
565        and params.get('r2') is not None):
566      view_func = None
567
568    # no need to explicitly specify checkout view when it's the default view
569    if view_func is view_checkout:
570      if (cfg.options.default_file_view == "co" \
571          and pathtype == vclib.FILE):
572        view_func = None
573
574    # no need to explicitly specify markup view when it's the default view
575    if view_func is view_markup:
576      if (cfg.options.default_file_view == "markup" \
577          and pathtype == vclib.FILE):
578        view_func = None
579
580    # set the view parameter
581    view_code = _view_codes.get(view_func)
582    if view_code and not ('view' in params and params['view'] is None):
583      params['view'] = view_code
584
585    # add sticky values to parameter list
586    if sticky_vars:
587      for name in _sticky_vars:
588        value = self.query_dict.get(name)
589        if value is not None and name not in params:
590          params[name] = value
591
592    # remove null values from parameter list
593    for name, value in list(params.items()):
594      if value is None:
595        del params[name]
596
597    return url, params
598
599def _path_parts(path):
600  """Split up a repository path into a list of path components"""
601  # clean it up. this removes duplicate '/' characters and any that may
602  # exist at the front or end of the path.
603  return [pp for pp in path.split('/') if pp]
604
605def _normalize_path(path):
606  """Collapse leading slashes in the script name
607
608  You only get multiple slashes in the script name when users accidentally
609  type urls like http://abc.com//viewvc.cgi/, but we correct for it
610  because we output the script name in links and web browsers
611  interpret //viewvc.cgi/ as http://viewvc.cgi/
612  """
613
614  i = 0
615  for c in path:
616    if c != '/':
617      break
618    i = i + 1
619
620  if i:
621    return path[i-1:]
622
623  return path
624
625def _validate_param(name, value):
626  """Validate whether the given value is acceptable for the param name.
627
628  If the value is not allowed, then an error response is generated, and
629  this function throws an exception. Otherwise, it simply returns None.
630  """
631
632  # First things first -- check that we have a legal parameter name.
633  try:
634    validator = _legal_params[name]
635  except KeyError:
636    raise ViewVCException(
637      'An illegal parameter name was provided.',
638      '400 Bad Request')
639
640  # Is there a validator?  Is it a regex or a function?  Validate if
641  # we can, returning without incident on valid input.
642  if validator is None:
643    return
644  elif hasattr(validator, 'match'):
645    if validator.match(value):
646      return
647  else:
648    if validator(value):
649      return
650
651  # If we get here, the input value isn't valid.
652  raise ViewVCException(
653    'An illegal value was provided for the "%s" parameter.' % (name),
654    '400 Bad Request')
655
656def _validate_regex(value):
657  ### we need to watch the flow of these parameters through the system
658  ### to ensure they don't hit the page unescaped. otherwise, these
659  ### parameters could constitute a CSS attack.
660  try:
661    re.compile(value)
662    return True
663  except:
664    return None
665
666def _validate_view(value):
667  # Return true iff VALUE is one of our allowed views.
668  return value in _views
669
670def _validate_mimetype(value):
671  # For security purposes, we only allow mimetypes from a predefined set
672  # thereof.
673  return value in (viewcvs_mime_type, alt_mime_type, 'text/plain')
674
675# obvious things here. note that we don't need uppercase for alpha.
676_re_validate_alpha = re.compile('^[a-z]+$')
677_re_validate_number = re.compile('^[0-9]+$')
678_re_validate_boolint = re.compile('^[01]$')
679
680# when comparing two revs, we sometimes construct REV:SYMBOL, so ':' is needed
681_re_validate_revnum = re.compile('^[-_.a-zA-Z0-9:~\\[\\]/]*$')
682
683# date time values
684_re_validate_datetime = re.compile(r'^(\d\d\d\d-\d\d-\d\d(\s+\d\d:\d\d'
685                                   '(:\d\d)?)?)?$')
686
687# the legal query parameters and their validation functions
688_legal_params = {
689  'root'          : None,
690  'view'          : _validate_view,
691  'search'        : _validate_regex,
692  'p1'            : None,
693  'p2'            : None,
694
695  'hideattic'     : _re_validate_boolint,
696  'limit_changes' : _re_validate_number,
697  'sortby'        : _re_validate_alpha,
698  'sortdir'       : _re_validate_alpha,
699  'logsort'       : _re_validate_alpha,
700  'diff_format'   : _re_validate_alpha,
701  'pathrev'       : _re_validate_revnum,
702  'dir_pagestart' : _re_validate_number,
703  'log_pagestart' : _re_validate_number,
704  'annotate'      : _re_validate_revnum,
705  'graph'         : _re_validate_revnum,
706  'makeimage'     : _re_validate_boolint,
707  'r1'            : _re_validate_revnum,
708  'tr1'           : _re_validate_revnum,
709  'r2'            : _re_validate_revnum,
710  'tr2'           : _re_validate_revnum,
711  'revision'      : _re_validate_revnum,
712  'content-type'  : _validate_mimetype,
713
714  # for cvsgraph
715  'gflip'         : _re_validate_boolint,
716  'gbbox'         : _re_validate_boolint,
717  'gshow'         : _re_validate_alpha,
718  'gleft'         : _re_validate_boolint,
719  'gmaxtag'       : _re_validate_number,
720
721  # for query
722  'file_match'    : _re_validate_alpha,
723  'branch_match'  : _re_validate_alpha,
724  'who_match'     : _re_validate_alpha,
725  'comment_match' : _re_validate_alpha,
726  'dir'           : None,
727  'file'          : None,
728  'branch'        : None,
729  'who'           : None,
730  'comment'       : None,
731  'querysort'     : _re_validate_alpha,
732  'date'          : _re_validate_alpha,
733  'hours'         : _re_validate_number,
734  'mindate'       : _re_validate_datetime,
735  'maxdate'       : _re_validate_datetime,
736  'format'        : _re_validate_alpha,
737
738  # for redirect_pathrev
739  'orig_path'     : None,
740  'orig_pathtype' : None,
741  'orig_pathrev'  : None,
742  'orig_view'     : None,
743
744  # deprecated - these are no longer used, but kept around so that
745  # bookmarked URLs still "work" (for some definition thereof) after a
746  # ViewVC upgrade.
747  'parent'        : _re_validate_boolint,
748  'rev'           : _re_validate_revnum,
749  'tarball'       : _re_validate_boolint,
750  'hidecvsroot'   : _re_validate_boolint,
751  'limit'         : _re_validate_number,
752  }
753
754def _path_join(path_parts):
755  return '/'.join(path_parts)
756
757def _path_starts_with(path_parts, first_path_parts):
758  if not path_parts:
759    return False
760  if len(path_parts) < len(first_path_parts):
761    return False
762  return path_parts[0:len(first_path_parts)] == first_path_parts
763
764def _strip_suffix(suffix, path_parts, rev, pathtype, repos, view_func):
765  """strip the suffix from a repository path if the resulting path
766  is of the specified type, otherwise return None"""
767  if not path_parts:
768    return None
769  l = len(suffix)
770  if path_parts[-1][-l:] == suffix:
771    path_parts = path_parts[:]
772    if len(path_parts[-1]) == l:
773      del path_parts[-1]
774    else:
775      path_parts[-1] = path_parts[-1][:-l]
776    t = _repos_pathtype(repos, path_parts, rev)
777    if pathtype == t:
778      return path_parts, t, view_func
779  return None
780
781def _repos_pathtype(repos, path_parts, rev):
782  """Return the type of a repository path, or None if the path doesn't
783  exist"""
784  try:
785    return repos.itemtype(path_parts, rev)
786  except vclib.ItemNotFound:
787    return None
788
789def _orig_path(request, rev_param='revision', path_param=None):
790  "Get original path of requested file at old revision before copies or moves"
791
792  # The 'pathrev' variable is interpreted by nearly all ViewVC views to
793  # provide a browsable snapshot of a repository at some point in its history.
794  # 'pathrev' is a tag name for CVS repositories and a revision number for
795  # Subversion repositories. It's automatically propagated between pages by
796  # logic in the Request.get_link() function which adds it to links like a
797  # sticky variable. When 'pathrev' is set, directory listings only include
798  # entries that exist in the specified revision or tag. Similarly, log pages
799  # will only show revisions preceding the point in history specified by
800  # 'pathrev.' Markup, checkout, and annotate pages show the 'pathrev'
801  # revision of files by default when no other revision is specified.
802  #
803  # In Subversion repositories, paths are always considered to refer to the
804  # pathrev revision. For example, if there is a "circle.jpg" in revision 3,
805  # which is renamed and modified as "square.jpg" in revision 4, the original
806  # circle image is visible at the following URLs:
807  #
808  #     .../circle.jpg?pathrev=3
809  #     .../square.jpg?revision=3
810  #     .../square.jpg?revision=3&pathrev=4
811  #
812  # Note that the following:
813  #
814  #     .../circle.jpg?rev=3
815  #
816  # now gets redirected to one of the following URLs:
817  #
818  #     .../circle.jpg?pathrev=3  (for Subversion)
819  #     .../circle.jpg?revision=3  (for CVS)
820  #
821  rev = request.query_dict.get(rev_param, request.pathrev)
822  path = request.query_dict.get(path_param, request.where)
823
824  if rev is not None and hasattr(request.repos, '_getrev'):
825    try:
826      pathrev = request.repos._getrev(request.pathrev)
827      rev = request.repos._getrev(rev)
828    except vclib.InvalidRevision:
829      raise ViewVCException('Invalid revision', '404 Not Found')
830    return _path_parts(request.repos.get_location(path, pathrev, rev)), rev
831  return _path_parts(path), rev
832
833def setup_authorizer(cfg, username, rootname=None):
834  """Setup the authorizer.  If ROOTNAME is provided, assume that
835  per-root options have not been overlayed.  Otherwise, assume they
836  have (and fetch the authorizer for the configured root)."""
837
838  if rootname is None:
839    authorizer = cfg.options.authorizer
840    params = cfg.get_authorizer_params()
841  else:
842    authorizer, params = cfg.get_authorizer_and_params_hack(rootname)
843
844  # No configured authorizer?  No problem.
845  if not authorizer:
846    return None
847
848  # First, try to load a module with the configured name.
849  import imp
850  fp = None
851  try:
852    try:
853      fp, path, desc = imp.find_module("%s" % (authorizer), vcauth.__path__)
854      my_auth = imp.load_module('viewvc', fp, path, desc)
855    except ImportError:
856      raise ViewVCException(
857        'Invalid authorizer (%s) specified for root "%s"' \
858        % (authorizer, rootname),
859        '500 Internal Server Error')
860  finally:
861    if fp:
862      fp.close()
863
864  # Add a rootname mapping callback function to the parameters.
865  def _root_lookup_func(cb_rootname):
866    return locate_root(cfg, cb_rootname)
867
868  # Finally, instantiate our Authorizer.
869  return my_auth.ViewVCAuthorizer(_root_lookup_func, username, params)
870
871def check_freshness(request, mtime=None, etag=None, weak=0):
872  cfg = request.cfg
873
874  # See if we are supposed to disable etags (for debugging, usually)
875  if not cfg.options.generate_etags:
876    return 0
877
878  request_etag = request_mtime = None
879  if etag is not None:
880    if weak:
881      etag = 'W/"%s"' % etag
882    else:
883      etag = '"%s"' % etag
884    request_etag = request.server.getenv('HTTP_IF_NONE_MATCH')
885  if mtime is not None:
886    try:
887      request_mtime = request.server.getenv('HTTP_IF_MODIFIED_SINCE')
888      request_mtime = email.utils.mktime_tz(
889                              email.utils.parsedate_tz(request_mtime))
890    except:
891      request_mtime = None
892
893  # if we have an etag, use that for freshness checking.
894  # if not available, then we use the last-modified time.
895  # if not available, then the document isn't fresh.
896  if etag is not None:
897    isfresh = (request_etag == etag)
898  elif mtime is not None and request_mtime is not None:
899    isfresh = (request_mtime >= mtime)
900  else:
901    isfresh = 0
902
903  # require revalidation after the configured amount of time
904  if cfg and cfg.options.http_expiration_time >= 0:
905    expiration = email.utils.formatdate(time.time() +
906                                   cfg.options.http_expiration_time)
907    request.server.add_header('Expires', expiration)
908    request.server.add_header('Cache-Control',
909                              'max-age=%d' % cfg.options.http_expiration_time)
910
911  if isfresh:
912    request.server.start_response(status='304 Not Modified')
913  else:
914    if etag is not None:
915      request.server.add_header('ETag', etag)
916    if mtime is not None:
917      request.server.add_header('Last-Modified', email.utils.formatdate(mtime))
918  return isfresh
919
920def get_view_template(cfg, view_name, language="en"):
921  # See if the configuration specifies a template for this view.  If
922  # not, use the default template path for this view.
923  tname = vars(cfg.templates).get(view_name) or view_name + ".ezt"
924
925  # Template paths are relative to the configurated template_dir (if
926  # any, "templates" otherwise), so build the template path as such.
927  tname = os.path.join(cfg.options.template_dir or "templates", tname)
928
929  # Allow per-language template selection.
930  tname = tname.replace('%lang%', language)
931
932  # Finally, construct the whole template path and return the Template.
933  return ezt.Template(cfg.path(tname))
934
935def get_writeready_server_file(request, content_type=None, encoding=None,
936                               content_length=None, allow_compress=True,
937                               is_text=False):
938  """Return a file handle to a response body stream, after outputting
939  any queued special headers (on REQUEST.server) and (optionally) a
940  'Content-Type' header whose value is CONTENT_TYPE and character set
941  is ENCODING.
942
943  If CONTENT_LENGTH is provided and compression is not in use, also
944  generate a 'Content-Length' header for this response.
945
946  Callers my use ALLOW_COMPRESS to disable compression where it would
947  otherwise be allowed.  (Such as when transmitting an
948  already-compressed response.)
949
950  After this function is called, it is too late to add new headers to
951  the response."""
952
953  if allow_compress and request.gzip_compress_level:
954    request.server.add_header('Content-Encoding', 'gzip')
955  elif content_length is not None:
956    request.server.add_header('Content-Length', content_length)
957
958  if content_type and encoding:
959    request.server.start_response("%s; charset=%s" % (content_type, encoding))
960  elif content_type:
961    request.server.start_response(content_type)
962  else:
963    request.server.start_response()
964
965  if allow_compress and request.gzip_compress_level:
966    fp = gzip.GzipFile('', 'wb', request.gzip_compress_level,
967                       request.server.file())
968  else:
969    fp = request.server.file()
970
971  if is_text:
972    fp = TextIOWrapper_noclose(fp, 'utf-8', 'surrogateescape',
973                               write_through=True)
974
975  return fp
976
977def generate_page(request, view_name, data, content_type=None):
978  server_fp = get_writeready_server_file(request, content_type, 'utf-8',
979                                         is_text=True)
980  template = get_view_template(request.cfg, view_name, request.language)
981  template.generate(server_fp, data)
982
983def transcode_path_for_display(path, encoding, errors='replace'):
984  return path.encode('utf-8', 'surrogateescape').decode(encoding, errors)
985
986def nav_path(request):
987  """Return current path as list of items with "name" and "href" members
988
989  The href members are view_directory links for directories and view_log
990  links for files, but are set to None when the link would point to
991  the current view"""
992
993  if not request.repos:
994    return []
995
996  is_dir = request.pathtype == vclib.DIR
997
998  # add root item
999  items = []
1000  root_item = _item(name=request.server.escape(request.repos.name), href=None)
1001  if request.path_parts or request.view_func is not view_directory:
1002    root_item.href = request.get_url(view_func=view_directory,
1003                                     where='', pathtype=vclib.DIR,
1004                                     params={}, escape=1)
1005  items.append(root_item)
1006
1007  # add path part items
1008  path_parts = []
1009  for part in request.path_parts:
1010    path_parts.append(part)
1011    is_last = len(path_parts) == len(request.path_parts)
1012
1013    if request.roottype == 'cvs':
1014      part = transcode_path_for_display(part, request.repos.encoding)
1015    item = _item(name=request.server.escape(part), href=None)
1016
1017    if not is_last or (is_dir and request.view_func is not view_directory):
1018      item.href = request.get_url(view_func=view_directory,
1019                                  where=_path_join(path_parts),
1020                                  pathtype=vclib.DIR,
1021                                  params={}, escape=1)
1022    elif not is_dir and request.view_func is not view_log:
1023      item.href = request.get_url(view_func=view_log,
1024                                  where=_path_join(path_parts),
1025                                  pathtype=vclib.FILE,
1026                                  params={}, escape=1)
1027    items.append(item)
1028
1029  return items
1030
1031def prep_tags(request, tags):
1032  url, params = request.get_link(params={'pathrev': None})
1033  params = _urlencode(params)
1034  if params:
1035    url = _quote(url, _URL_SAFE_CHARS,
1036                 'utf-8', 'surrogateescape') + '?' + params + '&pathrev='
1037  else:
1038    url = _quote(url, _URL_SAFE_CHARS,
1039                 'utf-8', 'surrogateescape') + '?pathrev='
1040  url = request.server.escape(url)
1041
1042  links = [ ]
1043  for tag in tags:
1044    links.append(_item(name=tag.name, href=url+tag.name))
1045  links.sort(key=functools.cmp_to_key(lambda a, b: cmp(a.name, b.name)))
1046  return links
1047
1048def guess_mime(filename):
1049  return mimetypes.guess_type(filename)[0]
1050
1051def is_viewable_image(mime_type):
1052  return mime_type and mime_type in ('image/gif', 'image/jpeg', 'image/png')
1053
1054def is_text(mime_type):
1055  return not mime_type or mime_type[:5] == 'text/'
1056
1057def is_cvsroot_path(roottype, path_parts):
1058  return roottype == 'cvs' and path_parts and path_parts[0] == 'CVSROOT'
1059
1060def is_plain_text(mime_type):
1061  return not mime_type or mime_type == 'text/plain'
1062
1063def default_view(mime_type, cfg):
1064  "Determine whether file should be viewed through markup page or sent raw"
1065  # If the mime type is text/anything or a supported image format we view
1066  # through the markup page. If the mime type is something else, we send
1067  # it directly to the browser. That way users can see things like flash
1068  # animations, pdfs, word documents, multimedia, etc, which wouldn't be
1069  # very useful marked up. If the mime type is totally unknown (happens when
1070  # we encounter an unrecognized file extension) we also view it through
1071  # the markup page since that's better than sending it text/plain.
1072  if ('markup' in cfg.options.allowed_views and
1073      (is_viewable_image(mime_type) or is_text(mime_type))):
1074    return view_markup
1075  return view_checkout
1076
1077def is_binary_file_mime_type(mime_type, cfg):
1078  """Return True iff MIME_TYPE is set and matches one of the binary
1079  file mime type patterns in CFG."""
1080  if mime_type:
1081    # We require explicit handling of the web-friendly images.
1082    # For all other types, pattern-matching is used.
1083    if is_viewable_image(mime_type):
1084      return mime_type in cfg.options.binary_mime_types
1085    for pattern in cfg.options.binary_mime_types:
1086      if fnmatch.fnmatch(mime_type, pattern):
1087        return True
1088  return False
1089
1090def is_dir_ignored_file(file_name, cfg):
1091  """Return True if FILE_NAME is set and matches one of the file names
1092  or extensions to be ignored in directory listing per CFG."""
1093  if file_name:
1094    for pattern in cfg.options.dir_ignored_files:
1095      if fnmatch.fnmatch(file_name, pattern):
1096        return True
1097  return False
1098
1099def get_file_view_info(request, where, rev=None, mime_type=None, pathrev=-1):
1100  """Return an object holding common hrefs and a viewability flag used
1101  for various views of FILENAME at revision REV whose MIME type is
1102  MIME_TYPE.
1103
1104  The object's members include:
1105     view_href
1106     download_href
1107     download_text_href
1108     annotate_href
1109     revision_href
1110     prefer_markup
1111     is_viewable_image
1112     is_binary
1113
1114  """
1115
1116  rev = rev and str(rev) or None
1117  mime_type = mime_type or guess_mime(where)
1118  if pathrev == -1: # cheesy default value, since we need to preserve None
1119    pathrev = request.pathrev
1120
1121  view_href = None
1122  download_href = None
1123  download_text_href = None
1124  annotate_href = None
1125  revision_href = None
1126
1127  if 'markup' in request.cfg.options.allowed_views:
1128    view_href = request.get_url(view_func=view_markup,
1129                                where=where,
1130                                pathtype=vclib.FILE,
1131                                params={'revision': rev,
1132                                        'pathrev': pathrev},
1133                                escape=1)
1134  if 'co' in request.cfg.options.allowed_views:
1135    download_href = request.get_url(view_func=view_checkout,
1136                                    where=where,
1137                                    pathtype=vclib.FILE,
1138                                    params={'revision': rev,
1139                                            'pathrev': pathrev},
1140                                    escape=1)
1141    if not is_plain_text(mime_type):
1142      download_text_href = request.get_url(view_func=view_checkout,
1143                                           where=where,
1144                                           pathtype=vclib.FILE,
1145                                           params={'content-type': 'text/plain',
1146                                                   'revision': rev,
1147                                                   'pathrev': pathrev},
1148                                           escape=1)
1149  if 'annotate' in request.cfg.options.allowed_views:
1150    annotate_href = request.get_url(view_func=view_annotate,
1151                                    where=where,
1152                                    pathtype=vclib.FILE,
1153                                    params={'annotate': rev,
1154                                            'pathrev': pathrev},
1155                                    escape=1)
1156  if request.roottype == 'svn':
1157    revision_href = request.get_url(view_func=view_revision,
1158                                    params={'revision': rev},
1159                                    escape=1)
1160
1161  is_binary_file = is_binary_file_mime_type(mime_type, request.cfg)
1162  prefer_markup = default_view(mime_type, request.cfg) == view_markup
1163
1164  return _item(view_href=view_href,
1165               download_href=download_href,
1166               download_text_href=download_text_href,
1167               annotate_href=annotate_href,
1168               revision_href=revision_href,
1169               prefer_markup=ezt.boolean(prefer_markup),
1170               is_viewable_image=ezt.boolean(is_viewable_image(mime_type)),
1171               is_binary=ezt.boolean(is_binary_file))
1172
1173
1174# Matches URLs
1175_re_rewrite_url = re.compile('((http|https|ftp|file|svn|svn\+ssh)'
1176                             '(://[-a-zA-Z0-9%.~:_/]+)((\?|\&)'
1177                             '([-a-zA-Z0-9%.~:_]+)=([-a-zA-Z0-9%.~:_])+)*'
1178                             '(#([-a-zA-Z0-9%.~:_]+)?)?)')
1179# Matches email addresses
1180_re_rewrite_email = re.compile('([-a-zA-Z0-9_.\+]+)@'
1181                               '(([-a-zA-Z0-9]+\.)+[A-Za-z]{2,4})')
1182
1183# Matches revision references
1184_re_rewrite_svnrevref = re.compile(r'\b(r|rev #?|revision #?)([0-9]+)\b')
1185
1186class ViewVCHtmlFormatterTokens:
1187  def __init__(self, tokens):
1188    self.tokens = tokens
1189
1190  def get_result(self, maxlen=0):
1191    """Format the tokens per the registered set of formatters, and
1192    limited to MAXLEN visible characters (or unlimited if MAXLEN is
1193    0).  Return a 3-tuple containing the formatted result string, the
1194    number of visible characters in the result string, and a boolean
1195    flag indicating whether or not S was truncated."""
1196    out = ''
1197    out_len = 0
1198    for token in self.tokens:
1199      chunk, chunk_len = token.converter(token.match, token.userdata, maxlen)
1200      out = out + chunk
1201      out_len = out_len + chunk_len
1202      if maxlen:
1203        maxlen = maxlen - chunk_len
1204        if maxlen <= 0:
1205          return out, out_len, 1
1206    return out, out_len, 0
1207
1208
1209class ViewVCHtmlFormatter:
1210  """Format a string as HTML-encoded output with customizable markup
1211  rules, for example turning strings that look like URLs into anchor links.
1212
1213  NOTE:  While there might appear to be some unused portions of this
1214  interface, there is a good chance that there are consumers outside
1215  of ViewVC itself that make use of these things.
1216  """
1217
1218  def __init__(self):
1219    self._formatters = []
1220
1221  def format_url(self, mobj, userdata, maxlen=0):
1222    """Return a 2-tuple containing:
1223         - the text represented by MatchObject MOBJ, formatted as
1224           linkified URL, with no more than MAXLEN characters in the
1225           non-HTML-tag bits.  If MAXLEN is 0, there is no maximum.
1226         - the number of non-HTML-tag characters returned.
1227    """
1228    s = mobj.group(0)
1229    trunc_s = maxlen and s[:maxlen] or s
1230    return '<a href="%s">%s</a>' % (sapi.escape(s),
1231                                    sapi.escape(trunc_s)), \
1232           len(trunc_s)
1233
1234  def format_email(self, mobj, userdata, maxlen=0):
1235    """Return a 2-tuple containing:
1236         - the text represented by MatchObject MOBJ, formatted as
1237           linkified email address, with no more than MAXLEN characters
1238           in the non-HTML-tag bits.  If MAXLEN is 0, there is no maximum.
1239         - the number of non-HTML-tag characters returned.
1240    """
1241    s = mobj.group(0)
1242    trunc_s = maxlen and s[:maxlen] or s
1243    return '<a href="mailto:%s">%s</a>' % (_quote(s),
1244                                           self._entity_encode(trunc_s)), \
1245           len(trunc_s)
1246
1247  def format_email_obfuscated(self, mobj, userdata, maxlen=0):
1248    """Return a 2-tuple containing:
1249         - the text represented by MatchObject MOBJ, formatted as an
1250           entity-encoded email address, with no more than MAXLEN characters
1251           in the non-HTML-tag bits.  If MAXLEN is 0, there is no maximum.
1252         - the number of non-HTML-tag characters returned.
1253    """
1254    s = mobj.group(0)
1255    trunc_s = maxlen and s[:maxlen] or s
1256    return self._entity_encode(trunc_s), len(trunc_s)
1257
1258  def format_email_truncated(self, mobj, userdata, maxlen=0):
1259    """Return a 2-tuple containing:
1260         - the text represented by MatchObject MOBJ, formatted as an
1261           HTML-escaped truncated email address of no more than MAXLEN
1262           characters.  If MAXLEN is 0, there is no maximum.
1263         - the number of characters returned.
1264    """
1265    s = mobj.group(1)
1266    s_len = len(s)
1267    if (maxlen == 0) or (s_len < (maxlen - 1)):
1268      return self._entity_encode(s) + '&#64;&hellip;', s_len + 2
1269    elif s_len < maxlen:
1270      return self._entity_encode(s) + '&#64;', s_len + 1
1271    else:
1272      trunc_s = mobj.group(1)[:maxlen]
1273      return self._entity_encode(trunc_s), len(trunc_s)
1274
1275  def format_svnrevref(self, mobj, userdata, maxlen=0):
1276    """Return a 2-tuple containing:
1277         - the text represented by MatchObject MOBJ, formatted as an
1278           linkified URL to a ViewVC Subversion revision view, with no
1279           more than MAXLEN characters in the non-HTML-tag portions.
1280           If MAXLEN is 0, there is no maximum.
1281         - the number of characters returned.
1282
1283       USERDATA is a function that accepts a revision reference
1284       and returns a URL to that revision.
1285    """
1286    s = mobj.group(0)
1287    revref = mobj.group(2)
1288    trunc_s = maxlen and s[:maxlen] or s
1289    revref_url = userdata(revref)
1290    return '<a href="%s">%s</a>' % (sapi.escape(revref_url),
1291                                    sapi.escape(trunc_s)), \
1292           len(trunc_s)
1293
1294  def format_custom_url(self, mobj, userdata, maxlen=0):
1295    """Return a 2-tuple containing:
1296         - the text represented by MatchObject MOBJ, formatted as an
1297           linkified URL created by substituting match groups 0-9 into
1298           USERDATA (which is a format string that uses \\N to
1299           represent the substitution locations) and with no more than
1300           MAXLEN characters in the non-HTML-tag portions.  If MAXLEN
1301           is 0, there is no maximum.
1302         - the number of characters returned.
1303    """
1304    format = userdata
1305    text = mobj.group(0)
1306    url = format
1307    for i in range(9):
1308      try:
1309        repl = mobj.group(i)
1310      except:
1311        repl = ''
1312      url = url.replace('\%d' % (i), repl)
1313    trunc_s = maxlen and text[:maxlen] or text
1314    return '<a href="%s">%s</a>' % (sapi.escape(url),
1315                                    sapi.escape(trunc_s)), \
1316           len(trunc_s)
1317
1318  def format_text(self, s, unused, maxlen=0):
1319    """Return a 2-tuple containing:
1320         - the text S, HTML-escaped, containing no more than MAXLEN
1321           characters.  If MAXLEN is 0, there is no maximum.
1322         - the number of characters returned.
1323    """
1324    trunc_s = maxlen and s[:maxlen] or s
1325    return sapi.escape(trunc_s), len(trunc_s)
1326
1327  def add_formatter(self, regexp, conv, userdata=None):
1328    """Register a formatter which finds instances of strings matching
1329    REGEXP, and using the function CONV and USERDATA to format them.
1330
1331    CONV is a function which accepts three parameters:
1332      - the MatchObject which holds the string portion to be formatted,
1333      - the USERDATA object,
1334      - the maximum number of characters from that string to use for
1335        human-readable output (or 0 to indicate no maximum).
1336    """
1337    if type(regexp) == type(''):
1338      regexp = re.compile(regexp)
1339    self._formatters.append([regexp, conv, userdata])
1340
1341  def get_result(self, s, maxlen=0):
1342    """Format S per the set of formatters registered with this object,
1343    and limited to MAXLEN visible characters (or unlimited if MAXLEN
1344    is 0).  Return a 3-tuple containing the formatted result string,
1345    the number of visible characters in the result string, and a
1346    boolean flag indicating whether or not S was truncated.
1347    """
1348    return self.tokenize_text(s).get_result(maxlen)
1349
1350  def tokenize_text(self, s):
1351    """Return a ViewVCHtmlFormatterTokens object containing the tokens
1352    created when parsing the string S.  Callers can use that object's
1353    get_result() function to retrieve HTML-formatted text.
1354    """
1355    tokens = []
1356    # We could just have a "while s:" here instead of "for line: while
1357    # line:", but for really large log messages with heavy
1358    # tokenization, the cost in both performance and memory
1359    # consumption of the approach taken was atrocious.
1360    for line in s.replace('\r\n', '\n').split('\n'):
1361      line = line + '\n'
1362      while line:
1363        best_match = best_conv = best_userdata = None
1364        for test in self._formatters:
1365          match = test[0].search(line)
1366          # If we find and match and (a) its our first one, or (b) it
1367          # matches text earlier than our previous best match, or (c) it
1368          # matches text at the same location as our previous best match
1369          # but extends to cover more text than that match, then this is
1370          # our new best match.
1371          #
1372          # Implied here is that when multiple formatters match exactly
1373          # the same text, the first formatter in the registration list wins.
1374          if match \
1375             and ((best_match is None) \
1376                  or (match.start() < best_match.start())
1377                  or ((match.start() == best_match.start()) \
1378                      and (match.end() > best_match.end()))):
1379            best_match = match
1380            best_conv = test[1]
1381            best_userdata = test[2]
1382        # If we found a match...
1383        if best_match:
1384          # ... add any non-matching stuff first, then the matching bit.
1385          start = best_match.start()
1386          end = best_match.end()
1387          if start > 0:
1388            tokens.append(_item(match=line[:start],
1389                                converter=self.format_text,
1390                                userdata=None))
1391          tokens.append(_item(match=best_match,
1392                              converter=best_conv,
1393                              userdata=best_userdata))
1394          line = line[end:]
1395        else:
1396          # Otherwise, just add the rest of the string.
1397          tokens.append(_item(match=line,
1398                              converter=self.format_text,
1399                              userdata=None))
1400          line = ''
1401    return ViewVCHtmlFormatterTokens(tokens)
1402
1403  def _entity_encode(self, s):
1404    return ''.join(['&#%d;' % (ord(x)) for x in s])
1405
1406
1407class LogFormatter:
1408  def __init__(self, request, log):
1409    self.request = request
1410    self.log = log or ''
1411    self.tokens = None
1412    self.cache = {}  # (maxlen, htmlize) => resulting_log
1413
1414  def get(self, maxlen=0, htmlize=1):
1415    cfg = self.request.cfg
1416
1417    # Prefer the cache.
1418    if (maxlen, htmlize) in self.cache:
1419      return self.cache[(maxlen, htmlize)]
1420
1421    # If we are HTML-izing...
1422    if htmlize:
1423      # ...and we don't yet have ViewVCHtmlFormatter() object tokens...
1424      if not self.tokens:
1425        # ... then get them.
1426        lf = ViewVCHtmlFormatter()
1427
1428        # Rewrite URLs.
1429        lf.add_formatter(_re_rewrite_url, lf.format_url)
1430
1431        # Rewrite Subversion revision references.
1432        if self.request.roottype == 'svn':
1433          def revision_to_url(rev):
1434            return self.request.get_url(view_func=view_revision,
1435                                        params={'revision': rev},
1436                                        escape=0)
1437          lf.add_formatter(_re_rewrite_svnrevref, lf.format_svnrevref,
1438                           revision_to_url)
1439
1440        # Rewrite email addresses.
1441        if cfg.options.mangle_email_addresses == 2:
1442          lf.add_formatter(_re_rewrite_email, lf.format_email_truncated)
1443        elif cfg.options.mangle_email_addresses == 1:
1444          lf.add_formatter(_re_rewrite_email, lf.format_email_obfuscated)
1445        else:
1446          lf.add_formatter(_re_rewrite_email, lf.format_email)
1447
1448        # Add custom rewrite handling per configuration.
1449        for rule in cfg.options.custom_log_formatting:
1450          rule = rule.replace('\\:', '\x01')
1451          regexp, format = [x.strip() for x in  rule.split(':', 1)]
1452          regexp = regexp.replace('\x01', ':')
1453          format = format.replace('\x01', ':')
1454          lf.add_formatter(re.compile(regexp), lf.format_custom_url, format)
1455
1456        # Tokenize the log message.
1457        self.tokens = lf.tokenize_text(self.log)
1458
1459      # Use our formatter to ... you know ... format.
1460      log, log_len, truncated = self.tokens.get_result(maxlen)
1461      result_log = log + (truncated and '&hellip;' or '')
1462
1463    # But if we're not HTML-izing...
1464    else:
1465      # ...then do much more simplistic transformations as necessary.
1466      log = self.log
1467      if cfg.options.mangle_email_addresses == 2:
1468        log = re.sub(_re_rewrite_email, r'\1@...', log)
1469      result_log = maxlen and log[:maxlen] or log
1470
1471    # In either case, populate the cache and return the results.
1472    self.cache[(maxlen, htmlize)] = result_log
1473    return result_log
1474
1475
1476_time_desc = {
1477         1 : 'second',
1478        60 : 'minute',
1479      3600 : 'hour',
1480     86400 : 'day',
1481    604800 : 'week',
1482   2628000 : 'month',
1483  31536000 : 'year',
1484  }
1485
1486def get_time_text(request, interval, num):
1487  "Get some time text, possibly internationalized."
1488  ### some languages have even harder pluralization rules. we'll have to
1489  ### deal with those on demand
1490  if num == 0:
1491    return ''
1492  text = _time_desc[interval]
1493  if num == 1:
1494    attr = text + '_singular'
1495    fmt = '%d ' + text
1496  else:
1497    attr = text + '_plural'
1498    fmt = '%d ' + text + 's'
1499  try:
1500    fmt = getattr(request.kv.i18n.time, attr)
1501  except AttributeError:
1502    pass
1503  return fmt % num
1504
1505def little_time(request):
1506  try:
1507    return request.kv.i18n.time.little_time
1508  except AttributeError:
1509    return 'very little time'
1510
1511def html_time(request, secs, extended=0):
1512  secs = int(time.time()) - secs
1513  if secs < 2:
1514    return little_time(request)
1515  breaks = sorted(_time_desc.keys())
1516  i = 0
1517  while i < len(breaks):
1518    if secs < 2 * breaks[i]:
1519      break
1520    i = i + 1
1521  value = breaks[i - 1]
1522  s = get_time_text(request, value, secs // value)
1523
1524  if extended and i > 1:
1525    secs = secs % value
1526    value = breaks[i - 2]
1527    ext = get_time_text(request, value, secs // value)
1528    if ext:
1529      ### this is not i18n compatible. pass on it for now
1530      s = s + ', ' + ext
1531  return s
1532
1533def common_template_data(request, revision=None, mime_type=None):
1534  """Return a TemplateData instance with data dictionary items
1535  common to most ViewVC views."""
1536
1537  cfg = request.cfg
1538
1539  if request.roottype == 'cvs':
1540    disp_where = transcode_path_for_display(request.where,
1541                                            request.repos.encoding)
1542  else:
1543    disp_where = request.where
1544
1545  # Initialize data dictionary members (sorted alphanumerically)
1546  data = TemplateData({
1547    'annotate_href' : None,
1548    'cfg' : cfg,
1549    'docroot' : cfg.options.docroot is None \
1550                and request.script_name + '/' + docroot_magic_path \
1551                or cfg.options.docroot,
1552    'download_href' : None,
1553    'download_text_href' : None,
1554    'graph_href': None,
1555    'home_href': request.script_name or '/',
1556    'kv'  : request.kv,
1557    'lockinfo' : None,
1558    'log_href' : None,
1559    'nav_path' : nav_path(request),
1560    'pathtype' : None,
1561    'prefer_markup' : ezt.boolean(0),
1562    'queryform_href' : None,
1563    'rev'      : None,
1564    'revision_href' : None,
1565    'rootname' : request.rootname \
1566                 and request.server.escape(request.rootname) or None,
1567    'rootpath' : request.rootpath,
1568    'roots_href' : None,
1569    'roottype' : request.roottype,
1570    'rss_href' : None,
1571    'tarball_href' : None,
1572    'up_href'  : None,
1573    'username' : request.username,
1574    'view'     : _view_codes[request.view_func],
1575    'view_href' : None,
1576    'vsn' : __version__,
1577    'where' : request.server.escape(disp_where),
1578  })
1579
1580  rev = revision
1581  if not rev:
1582    rev = request.query_dict.get('annotate')
1583  if not rev:
1584    rev = request.query_dict.get('revision')
1585  if not rev and request.roottype == 'svn':
1586    rev = request.query_dict.get('pathrev')
1587  try:
1588    data['rev'] = hasattr(request.repos, '_getrev') \
1589                  and request.repos._getrev(rev) or rev
1590  except vclib.InvalidRevision:
1591    raise ViewVCException('Invalid revision', '404 Not Found')
1592
1593  if request.pathtype == vclib.DIR:
1594    data['pathtype'] = 'dir'
1595  elif request.pathtype == vclib.FILE:
1596    data['pathtype'] = 'file'
1597
1598  if request.path_parts:
1599    dir = _path_join(request.path_parts[:-1])
1600    data['up_href'] = request.get_url(view_func=view_directory,
1601                                      where=dir, pathtype=vclib.DIR,
1602                                      params={}, escape=1)
1603
1604  if 'roots' in cfg.options.allowed_views:
1605    data['roots_href'] = request.get_url(view_func=view_roots,
1606                                         escape=1, params={})
1607
1608  if request.pathtype == vclib.FILE:
1609    fvi = get_file_view_info(request, request.where, data['rev'], mime_type)
1610    data['view_href'] = fvi.view_href
1611    data['download_href'] = fvi.download_href
1612    data['download_text_href'] = fvi.download_text_href
1613    data['annotate_href'] = fvi.annotate_href
1614    data['revision_href'] = fvi.revision_href
1615    data['prefer_markup'] = fvi.prefer_markup
1616    data['log_href'] = request.get_url(view_func=view_log, params={}, escape=1)
1617    if request.roottype == 'cvs' and cfg.options.use_cvsgraph:
1618      data['graph_href'] = request.get_url(view_func=view_cvsgraph,
1619                                           params={}, escape=1)
1620    file_data = request.repos.listdir(request.path_parts[:-1],
1621                                      request.pathrev, {})
1622    entries =[item for item in file_data
1623              if item.name == request.path_parts[-1]]
1624    if len(entries) == 1:
1625      request.repos.dirlogs(request.path_parts[:-1], request.pathrev,
1626                            entries, {})
1627      data['lockinfo'] = entries[0].lockinfo
1628  elif request.pathtype == vclib.DIR:
1629    data['view_href'] = request.get_url(view_func=view_directory,
1630                                       params={}, escape=1)
1631    if 'tar' in cfg.options.allowed_views:
1632      data['tarball_href'] = request.get_url(view_func=download_tarball,
1633                                             params={},
1634                                             escape=1)
1635    if request.roottype == 'svn':
1636      data['revision_href'] = request.get_url(view_func=view_revision,
1637                                              params={'revision': data['rev']},
1638                                              escape=1)
1639
1640      data['log_href'] = request.get_url(view_func=view_log,
1641                                         params={}, escape=1)
1642
1643  if is_querydb_nonempty_for_root(request):
1644    if request.pathtype == vclib.DIR:
1645      params = {}
1646      if request.roottype == 'cvs' and request.pathrev:
1647        params['branch'] = request.pathrev
1648      data['queryform_href'] = request.get_url(view_func=view_queryform,
1649                                               params=params,
1650                                               escape=1)
1651      data['rss_href'] = request.get_url(view_func=view_query,
1652                                         params={'date': 'month',
1653                                                 'format': 'rss'},
1654                                         escape=1)
1655    elif request.pathtype == vclib.FILE:
1656      parts = _path_parts(request.where)
1657      where = _path_join(parts[:-1])
1658      data['rss_href'] = request.get_url(view_func=view_query,
1659                                         where=where,
1660                                         pathtype=request.pathtype,
1661                                         params={'date': 'month',
1662                                                 'format': 'rss',
1663                                                 'file': parts[-1],
1664                                                 'file_match': 'exact'},
1665                                         escape=1)
1666  return data
1667
1668def retry_read(src, reqlen=CHUNK_SIZE):
1669  while 1:
1670    chunk = src.read(CHUNK_SIZE)
1671    if not chunk:
1672      # need to check for eof methods because the cStringIO file objects
1673      # returned by ccvs don't provide them
1674      if hasattr(src, 'eof') and src.eof() is None:
1675        time.sleep(1)
1676        continue
1677    return chunk
1678
1679def copy_stream(src, dst, htmlize=0):
1680  while 1:
1681    chunk = retry_read(src)
1682    if not chunk:
1683      break
1684    if htmlize:
1685      chunk = sapi.escape(chunk)
1686    dst.write(chunk)
1687
1688class MarkupPipeWrapper:
1689  """An EZT callback that outputs a filepointer, plus some optional
1690  pre- and post- text."""
1691
1692  def __init__(self, fp, pretext=None, posttext=None, htmlize=0):
1693    self.fp = fp
1694    self.pretext = pretext
1695    self.posttext = posttext
1696    self.htmlize = htmlize
1697
1698  def __call__(self, fp, ctx, filename, line_number):
1699    if self.pretext:
1700      fp.write(self.pretext)
1701    copy_stream(self.fp, fp, self.htmlize)
1702    self.fp.close()
1703    if self.posttext:
1704      fp.write(self.posttext)
1705
1706_re_rewrite_escaped_url = re.compile('((http|https|ftp|file|svn|svn\+ssh)'
1707                                     '(://[-a-zA-Z0-9%.~:_/]+)'
1708                                     '((\?|\&amp;amp;|\&amp;|\&)'
1709                                     '([-a-zA-Z0-9%.~:_]+)=([-a-zA-Z0-9%.~:_])+)*'
1710                                     '(#([-a-zA-Z0-9%.~:_]+)?)?)')
1711
1712def markup_escaped_urls(s):
1713  # Return a copy of S with all URL references -- which are expected
1714  # to be already HTML-escaped -- wrapped in <a href=""></a>.
1715  def _url_repl(match_obj):
1716    url = match_obj.group(0)
1717    unescaped_url = url.replace("&amp;amp;", "&amp;")
1718    return "<a href=\"%s\">%s</a>" % (unescaped_url, url)
1719  return re.sub(_re_rewrite_escaped_url, _url_repl, s)
1720
1721
1722def detect_encoding(text_block):
1723  """Return the encoding used by TEXT_BLOCK as detected by the chardet
1724  Python module.  (Currently, this is used only when syntax
1725  highlighting is not enabled/available; otherwise, Pygments does this
1726  work for us.)"""
1727
1728  # Does the TEXT_BLOCK start with a BOM?
1729  for bom, encoding in [(b'\xef\xbb\xbf', 'utf-8'),
1730                        (b'\xff\xfe', 'utf-16'),
1731                        (b'\xfe\xff', 'utf-16be'),
1732                        (b'\xff\xfe\0\0', 'utf-32'),
1733                        (b'\0\0\xfe\xff', 'utf-32be'),
1734                        ]:
1735    if text_block.startswith(bom):
1736      return encoding
1737
1738  # If no recognized BOM, see if chardet can help us.
1739  try:
1740    try:
1741      import cchardet as chardet
1742    except ImportError:
1743      import chardet
1744
1745    # If chardet can confidently claimed a match, we'll use its
1746    # findings.  (And if that match is 'ascii' -- which is a subset of
1747    # utf-8 -- we'll just call it 'utf-8' and score a zero transform.)
1748    resp = chardet.detect(text_block)
1749    if resp.get('confidence') == 1.0:
1750      encoding = resp.get('encoding')
1751      if encoding == 'ascii':
1752        encoding = 'utf-8'
1753      return encoding
1754  except:
1755    pass
1756
1757  # By default ... we have no idea.
1758  return None
1759
1760def markup_file_contents(request, cfg, file_lines, filename,
1761                         mime_type, encoding, colorize):
1762  """Perform syntax coloration via Pygments (where allowed and
1763  possible; a lesser bit of HTML-ification otherwise) on FILE_LINES,
1764  which is a list of bytestrings believed to be using character
1765  ENCODING.  Return those same lines, converted to Unicode strings and
1766  colorized."""
1767
1768  # Nothing to mark up?  So be it.
1769  if not file_lines:
1770    return []
1771
1772  # Determine if we should (and can) use Pygments to highlight our
1773  # output.  Reasons not to include a) being told not to by the
1774  # configuration, b) not being able to import the Pygments modules,
1775  # and c) Pygments not having a lexer for our file's format.
1776  pygments_lexer = None
1777  if colorize:
1778    from pygments import highlight
1779    from pygments.formatters import HtmlFormatter
1780    from pygments.lexers import ClassNotFound, \
1781                                get_lexer_by_name, \
1782                                get_lexer_for_mimetype, \
1783                                get_lexer_for_filename, \
1784                                guess_lexer
1785    # First, see if there's a Pygments lexer associated with MIME_TYPE.
1786    if mime_type:
1787      try:
1788        pygments_lexer = get_lexer_for_mimetype(mime_type,
1789                                                tabsize=cfg.options.tabsize,
1790                                                stripnl=False)
1791      except ClassNotFound:
1792        pygments_lexer = None
1793
1794    # If we've no lexer thus far, try to find one based on the FILENAME.
1795    if not pygments_lexer:
1796      try:
1797        pygments_lexer = get_lexer_for_filename(filename,
1798                                                tabsize=cfg.options.tabsize,
1799                                                stripnl=False)
1800      except ClassNotFound:
1801        pygments_lexer = None
1802
1803    # Still no lexer?  If we've reason to believe this is a text
1804    # file, try to guess the lexer based on the file's content.
1805    if not pygments_lexer and is_text(mime_type) and file_lines:
1806      try:
1807        pygments_lexer = guess_lexer(file_lines[0],
1808                                     tabsize=cfg.options.tabsize,
1809                                     stripnl=False)
1810      except (ClassNotFound, UnicodeDecodeError):
1811        pygments_lexer = None
1812
1813  # If we aren't highlighting, just return FILE_LINES with URLs
1814  # manually marked up and tabs manually expanded.
1815  if not pygments_lexer:
1816    def _poor_mans_markup(l):
1817      l = l.expandtabs(cfg.options.tabsize)
1818      return markup_escaped_urls(sapi.escape(l))
1819    return [_poor_mans_markup(l) for l in file_lines]
1820
1821  # If we get here, we're letting Pygments highlight syntax.
1822  #
1823  ### FIXME: This implementation expects (without ample API promises
1824  ### to guarantee it) that PygmentsSink.write() will be called
1825  ### exactly once per line.  So far, it's worked out okay...
1826  class PygmentsSink:
1827    def __init__(self):
1828      self.colorized_file_lines = []
1829    def write(self, buf):
1830      self.colorized_file_lines.append(markup_escaped_urls(buf.rstrip('\n\r')))
1831
1832  ps = PygmentsSink()
1833  highlight(''.join(file_lines), pygments_lexer,
1834            HtmlFormatter(nowrap=True, classprefix="pygments-", encoding=None),
1835            ps)
1836  return ps.colorized_file_lines
1837
1838def empty_blame_item(line, line_no):
1839  blame_item = vclib.Annotation(line, line_no, None, None, None, None)
1840  blame_item.diff_href = None
1841  return blame_item
1842
1843def merge_blame_data(file_lines, blame_data):
1844  errorful = 0
1845  if blame_data and (len(file_lines) != len(blame_data)):
1846    errorful = 1
1847    blame_data = None
1848  if not blame_data:
1849    new_blame_data = []
1850  for i in range(len(file_lines)):
1851    line = file_lines[i]
1852    if blame_data:
1853      blame_data[i].text = line
1854    else:
1855      new_blame_data.append(empty_blame_item(line, i + 1))
1856  return blame_data or new_blame_data, errorful
1857
1858def make_time_string(date, cfg):
1859  """Returns formatted date string in either local time or UTC.
1860
1861  The passed in 'date' variable is seconds since epoch.
1862
1863  """
1864  if date is None:
1865    return None
1866  if cfg.options.use_localtime:
1867    tm = time.localtime(date)
1868  else:
1869    tm = time.gmtime(date)
1870  if cfg.options.iso8601_timestamps:
1871    if cfg.options.use_localtime:
1872      if tm[8] and time.daylight:
1873        tz = -time.altzone
1874      else:
1875        tz = -time.timezone
1876      if tz < 0:
1877        tz = '-%02d:%02d' % (-tz // 3600, (-tz % 3600) // 60)
1878      else:
1879        tz = '+%02d:%02d' % (tz // 3600, (tz % 3600) // 60)
1880    else:
1881      tz = 'Z'
1882    return time.strftime('%Y-%m-%dT%H:%M:%S', tm) + tz
1883  else:
1884    return time.asctime(tm) + ' ' + \
1885           (cfg.options.use_localtime and time.tzname[tm[8]] or 'UTC')
1886
1887def make_rss_time_string(date, cfg):
1888  """Returns formatted date string in UTC, formatted for RSS.
1889
1890  The passed in 'date' variable is seconds since epoch.
1891
1892  """
1893  if date is None:
1894    return None
1895  return time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime(date)) + ' UTC'
1896
1897def make_comma_sep_list_string(items):
1898  return ', '.join([x.name for x in items])
1899
1900def is_undisplayable(val, encoding='utf-8'):
1901  # FIXME: must revise usage later (hopefully, we can display val
1902  # with encodings other than utf-8 ...)
1903  if not isinstance(val, bytes):
1904    val = val.encode('utf-8', 'surrogateescape')
1905  try:
1906    val.decode(encoding, 'strict')
1907    return 0
1908  except (UnicodeDecodeError, TypeError):
1909    return 1
1910
1911def get_itemprops(request, path_parts, rev):
1912  itemprops = request.repos.itemprops(path_parts, rev)
1913  propnames = sorted(itemprops.keys())
1914  props = []
1915  for name in propnames:
1916    undisplayable = not isinstance(itemprops[name], str)
1917    if not undisplayable:
1918      lf = LogFormatter(request, itemprops[name])
1919      value = lf.get(maxlen=0, htmlize=1)
1920    else:
1921      value = None
1922    props.append(_item(name=name, value=value,
1923                       undisplayable=ezt.boolean(undisplayable)))
1924  return props
1925
1926def parse_mime_type(mime_type):
1927  mime_parts = [x.strip() for x in mime_type.split(';')]
1928  type_subtype = mime_parts[0].lower()
1929  parameters = {}
1930  for part in mime_parts[1:]:
1931    name, value = part.split('=', 1)
1932    parameters[name] = value
1933  return type_subtype, parameters
1934
1935def calculate_mime_type(request, path_parts, rev):
1936  """Return a 2-tuple carrying the MIME content type and character
1937  encoding for the file represented by PATH_PARTS in REV.  Use REQUEST
1938  for repository access as necessary."""
1939  if not path_parts:
1940    return None, None
1941  mime_type = encoding = None
1942  if request.roottype == 'svn' \
1943     and (not request.cfg.options.svn_ignore_mimetype):
1944    try:
1945      itemprops = request.repos.itemprops(path_parts, rev)
1946      mime_type = itemprops.get('svn:mime-type')
1947      if mime_type:
1948        mime_type, parameters = parse_mime_type(mime_type)
1949        return mime_type, parameters.get('charset')
1950    except:
1951      pass
1952  return guess_mime(path_parts[-1]), None
1953
1954def assert_viewable_filesize(cfg, filesize):
1955  if cfg.options.max_filesize_kbytes \
1956     and filesize != -1 \
1957     and filesize > (1024 * cfg.options.max_filesize_kbytes):
1958    raise ViewVCException('Display of files larger than %d KB '
1959                          'disallowed by configuration'
1960                          % (cfg.options.max_filesize_kbytes),
1961                          '403 Forbidden')
1962
1963def markup_or_annotate(request, is_annotate):
1964  cfg = request.cfg
1965  path, rev = _orig_path(request, is_annotate and 'annotate' or 'revision')
1966  is_binary = False
1967  lines = fp = image_src_href = None
1968  annotation = 'none'
1969  revision = None
1970  mime_type, encoding = calculate_mime_type(request, path, rev)
1971
1972  # Is this display blocked by 'binary_mime_types' configuration?
1973  if is_binary_file_mime_type(mime_type, cfg):
1974    fp, revision = request.repos.openfile(path, rev, {})
1975    fp.close()
1976    if check_freshness(request, None, revision, weak=1):
1977      return
1978    is_binary = True
1979    if is_annotate:
1980      annotation = 'binary'
1981
1982  # If this is viewable image that we're allowed to show embedded, we
1983  # need only resolve its revision and generate an image src=
1984  # attribute URL for it.
1985  elif is_viewable_image(mime_type) and 'co' in cfg.options.allowed_views:
1986    fp, revision = request.repos.openfile(path, rev, {})
1987    fp.close()
1988    if check_freshness(request, None, revision, weak=1):
1989      return
1990    if is_annotate:
1991      annotation = 'binary'
1992    image_src_href = request.get_url(view_func=view_checkout,
1993                                     params={'revision': rev}, escape=1)
1994
1995  # If we get here, the request is not for an image that we can
1996  # display embedded.
1997  else:
1998    # If configuration disallows display of large files, try to honor
1999    # that request.
2000    filesize = request.repos.filesize(path, rev)
2001    assert_viewable_filesize(cfg, filesize)
2002
2003    # If this was an annotation request, try to annotate this file.
2004    # If something goes wrong, that's okay -- we'll gracefully revert
2005    # to a plain markup display.
2006    blame_data = None
2007    if is_annotate:
2008      try:
2009        blame_source, revision = request.repos.annotate(path, rev, False)
2010        if check_freshness(request, None, revision, weak=1):
2011          return
2012        # Create BLAME_DATA list from BLAME_SOURCE, adding diff_href
2013        # items to each relevant "line".
2014        blame_data = []
2015        for item in blame_source:
2016          item.diff_href = None
2017          if item.prev_rev:
2018            item.diff_href = request.get_url(view_func=view_diff,
2019                                             params={'r1': item.prev_rev,
2020                                                     'r2': item.rev},
2021                                             escape=1, partial=1)
2022          blame_data.append(item)
2023        annotation = 'annotated'
2024      except vclib.NonTextualFileContents:
2025        annotation = 'binary'
2026      except:
2027        annotation = 'error'
2028
2029    # Grab the file contents.
2030    fp, revision = request.repos.openfile(path, rev, {'cvs_oldkeywords' : 1})
2031    if check_freshness(request, None, revision, weak=1):
2032      fp.close()
2033      return
2034
2035    # If we're limiting by filesize but couldn't pull off the cheap
2036    # check above, we'll try to do so line by line here (while
2037    # building our file_lines array).
2038    if cfg.options.max_filesize_kbytes and filesize == -1:
2039      file_lines = []
2040      filesize = 0
2041      while 1:
2042        line = fp.readline()
2043        if not line:
2044          break
2045        filesize = filesize + len(line)
2046        assert_viewable_filesize(cfg, filesize)
2047        file_lines.append(line)
2048    else:
2049      file_lines = fp.readlines()
2050    fp.close()
2051
2052    # If allowed by configuration, try to detect the source encoding
2053    # for this file.  We'll assemble a block of data from the file
2054    # contents to do so... 1024 bytes should be enough.
2055    if not encoding and cfg.options.detect_encoding:
2056      block_size = 0
2057      text_block = b''
2058      for i in range(len(file_lines)):
2059        text_block = text_block + file_lines[i]
2060        if len(text_block) >= 2048:
2061          break
2062      encoding = detect_encoding(text_block)
2063    if not encoding:
2064      encoding = request.repos.encoding
2065
2066    # Decode the file's lines from the detected encoding to Unicode.
2067    try:
2068      for i in range(len(file_lines)):
2069        line = file_lines[i]
2070        try:
2071          line = line.decode(encoding)
2072        except UnicodeDecodeError:
2073          if not cfg.options.allow_mojibake:
2074            raise
2075          line = line.decode(encoding, 'surrogateescape')
2076        file_lines[i] = line
2077    except:
2078      is_binary = True
2079
2080    # Unless we've determined that the file is binary, try to colorize
2081    # the file contents.  If that fails, we'll give it another shot
2082    # with colorization disabled.
2083    if not is_binary:
2084      colorize = cfg.options.enable_syntax_coloration
2085      try:
2086        lines = markup_file_contents(request, cfg, file_lines, path[-1],
2087                                     mime_type, encoding, colorize)
2088      except:
2089        if colorize:
2090          lines = markup_file_contents(request, cfg, file_lines, path[-1],
2091                                       mime_type, encoding, False)
2092        else:
2093          raise ViewVCException('Error displaying file contents',
2094                                '500 Internal Server Error')
2095
2096      # Now, try to match up the annotation data (if any) with the file
2097      # lines.
2098      lines, errorful = merge_blame_data(lines, blame_data)
2099      if errorful:
2100        annotation = 'error'
2101
2102  data = common_template_data(request, revision, mime_type)
2103  data.merge(TemplateData({
2104    'mime_type' : mime_type,
2105    'log' : None,
2106    'date' : None,
2107    'ago' : None,
2108    'author' : None,
2109    'branches' : None,
2110    'tags' : None,
2111    'branch_points' : None,
2112    'changed' : None,
2113    'size' : None,
2114    'state' : None,
2115    'vendor_branch' : None,
2116    'prev' : None,
2117    'orig_path' : None,
2118    'orig_href' : None,
2119    'image_src_href' : image_src_href,
2120    'lines' : lines,
2121    'properties' : get_itemprops(request, path, rev),
2122    'is_binary' : ezt.boolean(is_binary),
2123    'annotation' : annotation,
2124    }))
2125
2126  if cfg.options.show_log_in_markup:
2127    options = {
2128      'svn_latest_log': 1, ### FIXME: Use of this magical value is uncool.
2129      'svn_cross_copies': 1,
2130      }
2131    revs = request.repos.itemlog(path, revision, vclib.SORTBY_REV,
2132                                 0, 1, options)
2133    entry = revs[-1]
2134    lf = LogFormatter(request, entry.log)
2135
2136    data['date'] = make_time_string(entry.date, cfg)
2137    data['author'] = entry.author
2138    data['changed'] = entry.changed
2139    data['log'] = lf.get(maxlen=0, htmlize=1)
2140    data['size'] = entry.size
2141
2142    if entry.date is not None:
2143      data['ago'] = html_time(request, entry.date, 1)
2144
2145    if request.roottype == 'cvs':
2146      branch = entry.branch_number
2147      prev = entry.prev or entry.parent
2148      data['state'] = entry.dead and 'dead'
2149      data['prev'] = prev and prev.string
2150      data['vendor_branch'] = ezt.boolean(branch and branch[2] % 2 == 1)
2151
2152      ### TODO:  Should this be using prep_tags() instead?
2153      data['branches'] = make_comma_sep_list_string(entry.branches)
2154      data['tags'] = make_comma_sep_list_string(entry.tags)
2155      data['branch_points']= make_comma_sep_list_string(entry.branch_points)
2156
2157  if path != request.path_parts:
2158    orig_path = _path_join(path)
2159    data['orig_path'] = orig_path
2160    data['orig_href'] = request.get_url(view_func=view_log,
2161                                        where=orig_path,
2162                                        pathtype=vclib.FILE,
2163                                        params={'pathrev': revision},
2164                                        escape=1)
2165
2166  generate_page(request, "file", data)
2167
2168def view_markup(request):
2169  if 'markup' not in request.cfg.options.allowed_views:
2170    raise ViewVCException('Markup view is disabled',
2171                          '403 Forbidden')
2172  if request.pathtype != vclib.FILE:
2173    raise ViewVCException('Unsupported feature: markup view on directory',
2174                          '400 Bad Request')
2175  markup_or_annotate(request, 0)
2176
2177def view_annotate(request):
2178  if 'annotate' not in request.cfg.options.allowed_views:
2179    raise ViewVCException('Annotation view is disabled',
2180                          '403 Forbidden')
2181  if request.pathtype != vclib.FILE:
2182    raise ViewVCException('Unsupported feature: annotate view on directory',
2183                          '400 Bad Request')
2184  markup_or_annotate(request, 1)
2185
2186def revcmp(rev1, rev2):
2187  rev1 = list(map(int, rev1.split('.')))
2188  rev2 = list(map(int, rev2.split('.')))
2189  return cmp(rev1, rev2)
2190
2191def sort_file_data(file_data, roottype, sortdir, sortby, group_dirs):
2192  # convert sortdir into a sign bit
2193  s = sortdir == "down" and -1 or 1
2194
2195  # in cvs, revision numbers can't be compared meaningfully between
2196  # files, so try to do the right thing and compare dates instead
2197  if roottype == "cvs" and sortby == "rev":
2198    sortby = "date"
2199
2200  def file_sort_sortby(file1, file2, sortby):
2201    # sort according to sortby
2202    if sortby == 'rev':
2203      return s * revcmp(file1.rev, file2.rev)
2204    elif sortby == 'date':
2205      return s * cmp(file2.date, file1.date)        # latest date is first
2206    elif sortby == 'log':
2207      return s * cmp(file1.log, file2.log)
2208    elif sortby == 'author':
2209      return s * cmp(file1.author, file2.author)
2210    return s * cmp(file1.name, file2.name)
2211
2212  def file_sort_cmp(file1, file2, sortby=sortby, group_dirs=group_dirs, s=s):
2213    # if we're grouping directories together, sorting is pretty
2214    # simple.  a directory sorts "higher" than a non-directory, and
2215    # two directories are sorted as normal.
2216    if group_dirs:
2217      if file1.kind == vclib.DIR:
2218        if file2.kind == vclib.DIR:
2219          # two directories, no special handling.
2220          return file_sort_sortby(file1, file2, sortby)
2221        else:
2222          # file1 is a directory, it sorts first.
2223          return -1
2224      elif file2.kind == vclib.DIR:
2225        # file2 is a directory, it sorts first.
2226        return 1
2227
2228    # we should have data on these. if not, then it is because we requested
2229    # a specific tag and that tag is not present on the file.
2230    if file1.rev is not None and file2.rev is not None:
2231      return file_sort_sortby(file1, file2, sortby)
2232    elif file1.rev is not None:
2233      return -1
2234    elif file2.rev is not None:
2235      return 1
2236
2237    # sort by file name
2238    return s * cmp(file1.name, file2.name)
2239
2240  file_data.sort(key=functools.cmp_to_key(file_sort_cmp))
2241
2242def icmp(x, y):
2243  """case insensitive comparison"""
2244  return cmp(x.lower(), y.lower())
2245
2246def view_roots(request):
2247  if 'roots' not in request.cfg.options.allowed_views:
2248    raise ViewVCException('Root listing view is disabled',
2249                          '403 Forbidden')
2250
2251  # add in the roots for the selection
2252  roots = []
2253  expand_root_parents(request.cfg)
2254  allroots = list_roots(request)
2255  if len(allroots):
2256    rootnames = sorted(allroots.keys(), key=functools.cmp_to_key(icmp))
2257    for rootname in rootnames:
2258      root_path, root_type, lastmod = allroots[rootname]
2259      href = request.get_url(view_func=view_directory,
2260                             where='', pathtype=vclib.DIR,
2261                             params={'root': rootname}, escape=1)
2262      if root_type == vclib.SVN:
2263        log_href = request.get_url(view_func=view_log,
2264                                   where='', pathtype=vclib.DIR,
2265                                   params={'root': rootname}, escape=1)
2266      else:
2267        log_href = None
2268      roots.append(_item(name=request.server.escape(rootname),
2269                         type=root_type,
2270                         path=root_path,
2271                         author=lastmod and lastmod.author or None,
2272                         ago=lastmod and lastmod.ago or None,
2273                         date=lastmod and lastmod.date or None,
2274                         log=lastmod and lastmod.log or None,
2275                         short_log=lastmod and lastmod.short_log or None,
2276                         rev=lastmod and lastmod.rev or None,
2277                         href=href,
2278                         log_href=log_href))
2279
2280  data = common_template_data(request)
2281  data.merge(TemplateData({
2282    'roots' : roots,
2283    'roots_shown' : len(roots),
2284    }))
2285  generate_page(request, "roots", data)
2286
2287def view_directory(request):
2288  cfg = request.cfg
2289
2290  # For Subversion repositories, the revision acts as a weak validator for
2291  # the directory listing (to take into account template changes or
2292  # revision property changes).
2293  if request.roottype == 'svn':
2294    try:
2295      rev = request.repos._getrev(request.pathrev)
2296    except vclib.InvalidRevision:
2297      raise ViewVCException('Invalid revision', '404 Not Found')
2298    tree_rev = request.repos.created_rev(request.where, rev)
2299    if check_freshness(request, None, str(tree_rev), weak=1):
2300      return
2301
2302  # List current directory
2303  options = {}
2304  if request.roottype == 'cvs':
2305    hideattic = int(request.query_dict.get('hideattic',
2306                                           cfg.options.hide_attic))
2307    options["cvs_subdirs"] = (cfg.options.show_subdir_lastmod and
2308                              cfg.options.show_logs)
2309  file_data = request.repos.listdir(request.path_parts, request.pathrev,
2310                                    options)
2311
2312  # sort with directories first, and using the "sortby" criteria
2313  sortby = request.query_dict.get('sortby', cfg.options.sort_by) or 'file'
2314  sortdir = request.query_dict.get('sortdir', 'up')
2315
2316  # when paging and sorting by filename, we can greatly improve
2317  # performance by "cheating" -- first, we sort (we already have the
2318  # names), then we just fetch dirlogs for the needed entries.
2319  # however, when sorting by other properties or not paging, we've no
2320  # choice but to fetch dirlogs for everything.
2321  if cfg.options.dir_pagesize and sortby == 'file':
2322    dirlogs_first = int(request.query_dict.get('dir_pagestart', 0))
2323    if dirlogs_first > len(file_data):
2324      dirlogs_first = 0
2325    dirlogs_last = dirlogs_first + cfg.options.dir_pagesize
2326    for file in file_data:
2327      file.rev = None
2328      file.date = None
2329      file.log = None
2330      file.author = None
2331      file.size = None
2332      file.lockinfo = None
2333      file.dead = None
2334    sort_file_data(file_data, request.roottype, sortdir, sortby,
2335                   cfg.options.sort_group_dirs)
2336    # request dirlogs only for the slice of files in "this page"
2337    request.repos.dirlogs(request.path_parts, request.pathrev,
2338                          file_data[dirlogs_first:dirlogs_last], options)
2339  else:
2340    request.repos.dirlogs(request.path_parts, request.pathrev,
2341                          file_data, options)
2342    sort_file_data(file_data, request.roottype, sortdir, sortby,
2343                   cfg.options.sort_group_dirs)
2344
2345  # If a regex is specified, build a compiled form thereof for filtering
2346  searchstr = None
2347  search_re = request.query_dict.get('search', '')
2348  if cfg.options.use_re_search and search_re:
2349    searchstr = re.compile(search_re)
2350
2351  # loop through entries creating rows and changing these values
2352  rows = [ ]
2353  dirs_displayed = files_displayed = 0
2354  num_dead = 0
2355
2356  # set some values to be used inside loop
2357  where = request.where
2358  where_prefix = where and where + '/'
2359
2360  for file in file_data:
2361    if is_dir_ignored_file(file.name, cfg):
2362      continue
2363    row = _item(author=None, log=None, short_log=None, state=None, size=None,
2364                log_file=None, log_rev=None, graph_href=None, mime_type=None,
2365                date=None, ago=None, view_href=None, log_href=None,
2366                revision_href=None, annotate_href=None, download_href=None,
2367                download_text_href=None, prefer_markup=ezt.boolean(0),
2368                is_viewable_image=ezt.boolean(0), is_binary=ezt.boolean(0))
2369    if request.roottype == 'cvs':
2370      if file.absent:
2371        continue
2372      disp_name = transcode_path_for_display(file.name, request.repos.encoding)
2373    else:
2374      disp_name = file.name
2375    if cfg.options.hide_errorful_entries and file.errors:
2376      continue
2377    row.rev = file.rev
2378    row.author = file.author
2379    row.state = (request.roottype == 'cvs' and file.dead) and 'dead' or ''
2380    if file.date is not None:
2381      row.date = make_time_string(file.date, cfg)
2382      row.ago = html_time(request, file.date)
2383    if cfg.options.show_logs:
2384      lf = LogFormatter(request, file.log)
2385      row.log = lf.get(maxlen=0, htmlize=1)
2386      row.short_log = lf.get(maxlen=cfg.options.short_log_len, htmlize=1)
2387    row.lockinfo = file.lockinfo
2388    row.anchor = request.server.escape(file.name)
2389    row.name = request.server.escape(disp_name)
2390    row.pathtype = (file.kind == vclib.FILE and 'file') or \
2391                   (file.kind == vclib.DIR and 'dir')
2392    row.errors = file.errors
2393
2394    if file.kind == vclib.DIR:
2395      if cfg.options.hide_cvsroot \
2396         and is_cvsroot_path(request.roottype,
2397                             request.path_parts + [file.name]):
2398        continue
2399
2400      dirs_displayed += 1
2401
2402      row.view_href = request.get_url(view_func=view_directory,
2403                                      where=where_prefix+file.name,
2404                                      pathtype=vclib.DIR,
2405                                      params={},
2406                                      escape=1)
2407
2408      if request.roottype == 'svn':
2409        row.revision_href = request.get_url(view_func=view_revision,
2410                                            params={'revision': file.rev},
2411                                            escape=1)
2412
2413      if request.roottype == 'cvs' and file.rev is not None:
2414        row.rev = None
2415        if cfg.options.show_logs:
2416          row.log_file = request.server.escape(file.newest_file)
2417          row.log_rev = file.rev
2418
2419      if request.roottype == 'svn':
2420        row.log_href = request.get_url(view_func=view_log,
2421                                       where=where_prefix + file.name,
2422                                       pathtype=vclib.DIR,
2423                                       params={},
2424                                       escape=1)
2425
2426    elif file.kind == vclib.FILE:
2427      if searchstr is not None:
2428        if request.roottype == 'cvs' and (file.errors or file.dead):
2429          continue
2430        if not search_file(request.repos, request.path_parts + [file.name],
2431                           request.pathrev, searchstr):
2432          continue
2433      if request.roottype == 'cvs' and file.dead:
2434        num_dead = num_dead + 1
2435        if hideattic:
2436          continue
2437
2438      files_displayed += 1
2439
2440      file_where = where_prefix + file.name
2441      if request.roottype == 'svn':
2442        row.size = file.size
2443
2444      row.mime_type, encoding = calculate_mime_type(request,
2445                                                    _path_parts(file_where),
2446                                                    file.rev)
2447      fvi = get_file_view_info(request, file_where, file.rev, row.mime_type)
2448      row.view_href = fvi.view_href
2449      row.download_href = fvi.download_href
2450      row.download_text_href = fvi.download_text_href
2451      row.annotate_href = fvi.annotate_href
2452      row.revision_href = fvi.revision_href
2453      row.prefer_markup = fvi.prefer_markup
2454      row.is_viewable_image = fvi.is_viewable_image
2455      row.is_binary = fvi.is_binary
2456      row.log_href = request.get_url(view_func=view_log,
2457                                     where=file_where,
2458                                     pathtype=vclib.FILE,
2459                                     params={},
2460                                     escape=1)
2461      if cfg.options.use_cvsgraph and request.roottype == 'cvs':
2462         row.graph_href = request.get_url(view_func=view_cvsgraph,
2463                                          where=file_where,
2464                                          pathtype=vclib.FILE,
2465                                          params={},
2466                                          escape=1)
2467
2468    rows.append(row)
2469
2470  # Prepare the data that will be passed to the template, based on the
2471  # common template data.
2472  data = common_template_data(request)
2473  data.merge(TemplateData({
2474    'entries' : rows,
2475    'sortby' : sortby,
2476    'sortdir' : sortdir,
2477    'search_re' : request.server.escape(search_re),
2478    'dir_pagestart' : None,
2479    'sortby_file_href' :   request.get_url(params={'sortby': 'file',
2480                                                   'sortdir': None},
2481                                           escape=1),
2482    'sortby_rev_href' :    request.get_url(params={'sortby': 'rev',
2483                                                   'sortdir': None},
2484                                           escape=1),
2485    'sortby_date_href' :   request.get_url(params={'sortby': 'date',
2486                                                   'sortdir': None},
2487                                           escape=1),
2488    'sortby_author_href' : request.get_url(params={'sortby': 'author',
2489                                                   'sortdir': None},
2490                                           escape=1),
2491    'sortby_log_href' :    request.get_url(params={'sortby': 'log',
2492                                                   'sortdir': None},
2493                                           escape=1),
2494    'files_shown' : files_displayed,
2495    'dirs_shown' : dirs_displayed,
2496    'num_dead' : num_dead,
2497    'youngest_rev' : None,
2498    'youngest_rev_href' : None,
2499    'selection_form' : None,
2500    'attic_showing' : None,
2501    'show_attic_href' : None,
2502    'hide_attic_href' : None,
2503    'branch_tags': None,
2504    'plain_tags': None,
2505    'properties': get_itemprops(request, request.path_parts, request.pathrev),
2506    'tree_rev' : None,
2507    'tree_rev_href' : None,
2508    'dir_paging_action' : None,
2509    'dir_paging_hidden_values' : [],
2510    'search_re_action' : None,
2511    'search_re_hidden_values' : [],
2512
2513    # Populated by paging()/paging_sws()
2514    'picklist' : [],
2515    'picklist_len' : 0,
2516
2517    # Populated by pathrev_form()
2518    'pathrev_action' : None,
2519    'pathrev_hidden_values' : [],
2520    'pathrev_clear_action' : None,
2521    'pathrev_clear_hidden_values' : [],
2522    'pathrev' : None,
2523    'lastrev' : None,
2524  }))
2525
2526  # clicking on sort column reverses sort order
2527  if sortdir == 'down':
2528    revsortdir = None # 'up'
2529  else:
2530    revsortdir = 'down'
2531  if sortby in ['file', 'rev', 'date', 'log', 'author']:
2532    data['sortby_%s_href' % sortby] = request.get_url(params={'sortdir':
2533                                                              revsortdir},
2534                                                      escape=1)
2535  # CVS doesn't support sorting by rev
2536  if request.roottype == "cvs":
2537    data['sortby_rev_href'] = None
2538
2539  # set cvs-specific fields
2540  if request.roottype == 'cvs':
2541    plain_tags = options['cvs_tags']
2542    plain_tags.sort(key=functools.cmp_to_key(icmp), reverse=True)
2543    data['plain_tags'] = []
2544    for plain_tag in plain_tags:
2545      data['plain_tags'].append(_item(name=plain_tag, revision=None))
2546
2547    branch_tags = options['cvs_branches']
2548    branch_tags.sort(key=functools.cmp_to_key(icmp), reverse=True)
2549    data['branch_tags'] = []
2550    for branch_tag in branch_tags:
2551      data['branch_tags'].append(_item(name=branch_tag, revision=None))
2552
2553    data['attic_showing'] = ezt.boolean(not hideattic)
2554    data['show_attic_href'] = request.get_url(params={'hideattic': 0},
2555                                              escape=1)
2556    data['hide_attic_href'] = request.get_url(params={'hideattic': 1},
2557                                              escape=1)
2558
2559  # set svn-specific fields
2560  elif request.roottype == 'svn':
2561    data['tree_rev'] = tree_rev
2562    data['tree_rev_href'] = request.get_url(view_func=view_revision,
2563                                            params={'revision': tree_rev},
2564                                            escape=1)
2565    data['youngest_rev'] = request.repos.get_youngest_revision()
2566    data['youngest_rev_href'] = request.get_url(view_func=view_revision,
2567                                                params={},
2568                                                escape=1)
2569
2570  if cfg.options.dir_pagesize:
2571    data['dir_paging_action'], data['dir_paging_hidden_values'] = \
2572      request.get_form(params={'dir_pagestart': None})
2573
2574  pathrev_form(request, data)
2575
2576  if cfg.options.use_re_search:
2577    data['search_re_action'], data['search_re_hidden_values'] = \
2578      request.get_form(params={'search': None})
2579
2580  if cfg.options.dir_pagesize:
2581    data['dir_pagestart'] = int(request.query_dict.get('dir_pagestart',0))
2582    data['entries'] = paging(data, 'entries', data['dir_pagestart'], 'name',
2583                             cfg.options.dir_pagesize)
2584
2585  generate_page(request, "directory", data)
2586
2587def paging(data, key, pagestart, local_name, pagesize):
2588  # Implement paging
2589  # Create the picklist
2590  picklist = data['picklist'] = []
2591  for i in range(0, len(data[key]), pagesize):
2592    pick = _item(start=None, end=None, count=None, more=ezt.boolean(0))
2593    pick.start = getattr(data[key][i], local_name)
2594    pick.count = i
2595    pick.page = (i // pagesize) + 1
2596    try:
2597      pick.end = getattr(data[key][i+pagesize-1], local_name)
2598    except IndexError:
2599      pick.end = getattr(data[key][-1], local_name)
2600    picklist.append(pick)
2601  data['picklist_len'] = len(picklist)
2602  # Need to fix
2603  # pagestart can be greater than the length of data[key] if you
2604  # select a tag or search while on a page other than the first.
2605  # Should reset to the first page, this test won't do that every
2606  # time that it is needed.
2607  # Problem might go away if we don't hide non-matching files when
2608  # selecting for tags or searching.
2609  if pagestart > len(data[key]):
2610    pagestart = 0
2611  pageend = pagestart + pagesize
2612  # Slice
2613  return data[key][pagestart:pageend]
2614
2615def paging_sws(data, key, pagestart, local_name, pagesize,
2616               extra_pages, offset):
2617  """Implement sliding window-style paging."""
2618  # Create the picklist
2619  last_requested = pagestart + (extra_pages * pagesize)
2620  picklist = data['picklist'] = []
2621  has_more = ezt.boolean(0)
2622  for i in range(0, len(data[key]), pagesize):
2623    pick = _item(start=None, end=None, count=None, more=ezt.boolean(0))
2624    pick.start = getattr(data[key][i], local_name)
2625    pick.count = offset + i
2626    pick.page = (pick.count // pagesize) + 1
2627    try:
2628      pick.end = getattr(data[key][i+pagesize-1], local_name)
2629    except IndexError:
2630      pick.end = getattr(data[key][-1], local_name)
2631    picklist.append(pick)
2632    if pick.count >= last_requested:
2633      pick.more = ezt.boolean(1)
2634      break
2635  data['picklist_len'] = len(picklist)
2636  first = pagestart - offset
2637  # FIXME: first can be greater than the length of data[key] if
2638  # you select a tag or search while on a page other than the first.
2639  # Should reset to the first page, but this test won't do that every
2640  # time that it is needed.  Problem might go away if we don't hide
2641  # non-matching files when selecting for tags or searching.
2642  if first > len(data[key]):
2643    pagestart = 0
2644  pageend = first + pagesize
2645  # Slice
2646  return data[key][first:pageend]
2647
2648def pathrev_form(request, data):
2649  lastrev = None
2650
2651  if request.roottype == 'svn':
2652    data['pathrev_action'], data['pathrev_hidden_values'] = \
2653      request.get_form(view_func=redirect_pathrev,
2654                       params={'pathrev': None,
2655                               'orig_path': request.where,
2656                               'orig_pathtype': request.pathtype,
2657                               'orig_pathrev': request.pathrev,
2658                               'orig_view': _view_codes.get(request.view_func)})
2659
2660    if request.pathrev:
2661      youngest = request.repos.get_youngest_revision()
2662      lastrev = request.repos.last_rev(request.where, request.pathrev,
2663                                       youngest)[0]
2664
2665      if lastrev == youngest:
2666        lastrev = None
2667
2668  data['pathrev'] = request.pathrev
2669  data['lastrev'] = lastrev
2670
2671  action, hidden_values = request.get_form(params={'pathrev': lastrev})
2672  if request.roottype != 'svn':
2673    data['pathrev_action'] = action
2674    data['pathrev_hidden_values'] = hidden_values
2675  data['pathrev_clear_action'] = action
2676  data['pathrev_clear_hidden_values'] = hidden_values
2677
2678  return lastrev
2679
2680def redirect_pathrev(request):
2681  assert request.roottype == 'svn'
2682  new_pathrev = request.query_dict.get('pathrev') or None
2683  path = request.query_dict.get('orig_path', '')
2684  pathtype = request.query_dict.get('orig_pathtype')
2685  pathrev = request.query_dict.get('orig_pathrev')
2686  view = _views.get(request.query_dict.get('orig_view'))
2687
2688  youngest = request.repos.get_youngest_revision()
2689
2690  # go out of the way to allow revision numbers higher than youngest
2691  try:
2692    new_pathrev = int(new_pathrev)
2693  except ValueError:
2694    new_pathrev = youngest
2695  except TypeError:
2696    pass
2697  else:
2698    if new_pathrev > youngest:
2699      new_pathrev = youngest
2700
2701  if _repos_pathtype(request.repos, _path_parts(path), new_pathrev):
2702    pathrev = new_pathrev
2703  else:
2704    pathrev, path = request.repos.last_rev(path, pathrev, new_pathrev)
2705    # allow clearing sticky revision by submitting empty string
2706    if new_pathrev is None and pathrev == youngest:
2707      pathrev = None
2708
2709  request.server.redirect(request.get_url(view_func=view,
2710                                          where=path,
2711                                          pathtype=pathtype,
2712                                          params={'pathrev': pathrev}))
2713
2714def view_log(request):
2715  cfg = request.cfg
2716  diff_format = request.query_dict.get('diff_format', cfg.options.diff_format)
2717  pathtype = request.pathtype
2718
2719  if pathtype is vclib.DIR:
2720    if request.roottype == 'cvs':
2721      raise ViewVCException('Unsupported feature: log view on CVS directory',
2722                            '400 Bad Request')
2723    mime_type = encoding = None
2724  else:
2725    mime_type, encoding = calculate_mime_type(request,
2726                                              request.path_parts,
2727                                              request.pathrev)
2728
2729  options = {}
2730  options['svn_show_all_dir_logs'] = 1 ### someday make this optional?
2731  options['svn_cross_copies'] = cfg.options.cross_copies
2732
2733  logsort = request.query_dict.get('logsort', cfg.options.log_sort)
2734  if request.roottype == "svn":
2735    sortby = vclib.SORTBY_DEFAULT
2736    logsort = None
2737  else:
2738    if logsort == 'date':
2739      sortby = vclib.SORTBY_DATE
2740    elif logsort == 'rev':
2741      sortby = vclib.SORTBY_REV
2742    else:
2743      sortby = vclib.SORTBY_DEFAULT
2744
2745  first = last = 0
2746  log_pagestart = None
2747  if cfg.options.log_pagesize:
2748    log_pagestart = int(request.query_dict.get('log_pagestart', 0))
2749    total = cfg.options.log_pagesextra * cfg.options.log_pagesize
2750    first = log_pagestart - min(log_pagestart, total)
2751    last = log_pagestart + (total + cfg.options.log_pagesize) + 1
2752  show_revs = request.repos.itemlog(request.path_parts, request.pathrev,
2753                                    sortby, first, last - first, options)
2754
2755  # selected revision
2756  selected_rev = request.query_dict.get('r1')
2757
2758  entries = [ ]
2759  name_printed = { }
2760  cvs = request.roottype == 'cvs'
2761  for rev in show_revs:
2762    entry = _item()
2763    entry.rev = rev.string
2764    entry.state = (cvs and rev.dead and 'dead')
2765    entry.author = rev.author
2766    entry.changed = rev.changed
2767    entry.date = make_time_string(rev.date, cfg)
2768    entry.ago = None
2769    if rev.date is not None:
2770      entry.ago = html_time(request, rev.date, 1)
2771    entry.size = rev.size
2772    entry.lockinfo = rev.lockinfo
2773    entry.branch_point = None
2774    entry.next_main = None
2775    entry.orig_path = None
2776    entry.copy_path = None
2777
2778    lf = LogFormatter(request, rev.log or '')
2779    entry.log = lf.get(maxlen=0, htmlize=1)
2780
2781    entry.view_href = None
2782    entry.download_href = None
2783    entry.download_text_href = None
2784    entry.annotate_href = None
2785    entry.revision_href = None
2786    entry.sel_for_diff_href = None
2787    entry.diff_to_sel_href = None
2788    entry.diff_to_prev_href = None
2789    entry.diff_to_branch_href = None
2790    entry.diff_to_main_href = None
2791
2792    if request.roottype == 'cvs':
2793      prev = rev.prev or rev.parent
2794      entry.prev = prev and prev.string
2795
2796      branch = rev.branch_number
2797      entry.vendor_branch = ezt.boolean(branch and branch[2] % 2 == 1)
2798
2799      entry.branches = prep_tags(request, rev.branches)
2800      entry.tags = prep_tags(request, rev.tags)
2801      entry.branch_points = prep_tags(request, rev.branch_points)
2802
2803      entry.tag_names = [x.name for x in rev.tags]
2804      if branch and branch not in name_printed:
2805        entry.branch_names = [x.name for x in rev.branches]
2806        name_printed[branch] = 1
2807      else:
2808        entry.branch_names = [ ]
2809
2810      if rev.parent and rev.parent is not prev and not entry.vendor_branch:
2811        entry.branch_point = rev.parent.string
2812
2813      # if it's the last revision on a branch then diff against the
2814      # last revision on the higher branch (e.g. change is committed and
2815      # brought over to -stable)
2816      if not rev.next and rev.parent and rev.parent.next:
2817        r = rev.parent.next
2818        while r.next:
2819          r = r.next
2820        entry.next_main = r.string
2821
2822    elif request.roottype == 'svn':
2823      entry.prev = rev.prev and rev.prev.string
2824      entry.branches = entry.tags = entry.branch_points = [ ]
2825      entry.tag_names = entry.branch_names = [ ]
2826      entry.vendor_branch = None
2827      if rev.filename != request.where:
2828        entry.orig_path = rev.filename
2829      entry.copy_path = rev.copy_path
2830      entry.copy_rev = rev.copy_rev
2831
2832      if entry.orig_path:
2833        entry.orig_href = request.get_url(view_func=view_log,
2834                                          where=entry.orig_path,
2835                                          pathtype=vclib.FILE,
2836                                          params={'pathrev': rev.string},
2837                                          escape=1)
2838
2839      if rev.copy_path:
2840        entry.copy_href = request.get_url(view_func=view_log,
2841                                          where=rev.copy_path,
2842                                          pathtype=vclib.FILE,
2843                                          params={'pathrev': rev.copy_rev},
2844                                          escape=1)
2845
2846
2847    # view/download links
2848    if pathtype is vclib.FILE:
2849      fvi = get_file_view_info(request, request.where, rev.string, mime_type)
2850      entry.view_href = fvi.view_href
2851      entry.download_href = fvi.download_href
2852      entry.download_text_href = fvi.download_text_href
2853      entry.annotate_href = fvi.annotate_href
2854      entry.revision_href = fvi.revision_href
2855      entry.prefer_markup = fvi.prefer_markup
2856    else:
2857      entry.revision_href = request.get_url(view_func=view_revision,
2858                                            params={'revision': rev.string},
2859                                            escape=1)
2860      entry.view_href = request.get_url(view_func=view_directory,
2861                                        where=rev.filename,
2862                                        pathtype=vclib.DIR,
2863                                        params={'pathrev': rev.string},
2864                                        escape=1)
2865
2866    # calculate diff links
2867    if selected_rev != entry.rev:
2868      entry.sel_for_diff_href = \
2869        request.get_url(view_func=view_log,
2870                        params={'r1': entry.rev,
2871                                'log_pagestart': log_pagestart},
2872                        escape=1)
2873    if entry.prev is not None:
2874      entry.diff_to_prev_href = \
2875        request.get_url(view_func=view_diff,
2876                        params={'r1': entry.prev,
2877                                'r2': entry.rev,
2878                                'diff_format': None},
2879                        escape=1)
2880    if selected_rev and \
2881           selected_rev != str(entry.rev) and \
2882           selected_rev != str(entry.prev) and \
2883           selected_rev != str(entry.branch_point) and \
2884           selected_rev != str(entry.next_main):
2885      entry.diff_to_sel_href = \
2886        request.get_url(view_func=view_diff,
2887                        params={'r1': selected_rev,
2888                                'r2': entry.rev,
2889                                'diff_format': None},
2890                        escape=1)
2891
2892    if entry.next_main:
2893      entry.diff_to_main_href = \
2894        request.get_url(view_func=view_diff,
2895                        params={'r1': entry.next_main,
2896                                'r2': entry.rev,
2897                                'diff_format': None},
2898                        escape=1)
2899    if entry.branch_point:
2900      entry.diff_to_branch_href = \
2901        request.get_url(view_func=view_diff,
2902                        params={'r1': entry.branch_point,
2903                                'r2': entry.rev,
2904                                'diff_format': None},
2905                        escape=1)
2906
2907    # Save our escaping until the end so stuff above works
2908    if entry.orig_path:
2909      entry.orig_path = request.server.escape(entry.orig_path)
2910    if entry.copy_path:
2911      entry.copy_path = request.server.escape(entry.copy_path)
2912    entries.append(entry)
2913
2914  diff_select_action, diff_select_hidden_values = \
2915    request.get_form(view_func=view_diff,
2916                     params={'r1': None, 'r2': None, 'tr1': None,
2917                             'tr2': None, 'diff_format': None})
2918  logsort_action, logsort_hidden_values = \
2919    request.get_form(params={'logsort': None})
2920
2921
2922  data = common_template_data(request)
2923  data.merge(TemplateData({
2924    'default_branch' : None,
2925    'mime_type' : mime_type,
2926    'rev_selected' : selected_rev,
2927    'diff_format' : diff_format,
2928    'logsort' : logsort,
2929    'human_readable' : ezt.boolean(diff_format in ('f', 'h', 'l')),
2930    'log_pagestart' : None,
2931    'log_paging_action' : None,
2932    'log_paging_hidden_values' : [],
2933    'entries': entries,
2934    'head_prefer_markup' : ezt.boolean(0),
2935    'head_view_href' : None,
2936    'head_download_href': None,
2937    'head_download_text_href': None,
2938    'head_annotate_href': None,
2939    'tag_prefer_markup' : ezt.boolean(0),
2940    'tag_view_href' : None,
2941    'tag_download_href': None,
2942    'tag_download_text_href': None,
2943    'tag_annotate_href': None,
2944    'diff_select_action' : diff_select_action,
2945    'diff_select_hidden_values' : diff_select_hidden_values,
2946    'logsort_action' : logsort_action,
2947    'logsort_hidden_values' : logsort_hidden_values,
2948    'tags' : [],
2949    'branch_tags' : [],
2950    'plain_tags' : [],
2951
2952    # Populated by paging()/paging_sws()
2953    'picklist' : [],
2954    'picklist_len' : 0,
2955
2956    # Populated by pathrev_form()
2957    'pathrev_action' : None,
2958    'pathrev_hidden_values' : [],
2959    'pathrev_clear_action' : None,
2960    'pathrev_clear_hidden_values' : [],
2961    'pathrev' : None,
2962    'lastrev' : None,
2963  }))
2964
2965  lastrev = pathrev_form(request, data)
2966
2967  if pathtype is vclib.FILE:
2968    if not request.pathrev or lastrev is None:
2969      fvi = get_file_view_info(request, request.where, None, mime_type, None)
2970      data['head_view_href']= fvi.view_href
2971      data['head_download_href']= fvi.download_href
2972      data['head_download_text_href']= fvi.download_text_href
2973      data['head_annotate_href']= fvi.annotate_href
2974      data['head_prefer_markup']= fvi.prefer_markup
2975
2976    if request.pathrev and request.roottype == 'cvs':
2977      fvi = get_file_view_info(request, request.where, None, mime_type)
2978      data['tag_view_href']= fvi.view_href
2979      data['tag_download_href']= fvi.download_href
2980      data['tag_download_text_href']= fvi.download_text_href
2981      data['tag_annotate_href']= fvi.annotate_href
2982      data['tag_prefer_markup']= fvi.prefer_markup
2983  else:
2984    data['head_view_href'] = request.get_url(view_func=view_directory,
2985                                             params={}, escape=1)
2986
2987  taginfo = options.get('cvs_tags', {})
2988  tagitems = sorted(taginfo.items(), reverse=True)
2989
2990  main = taginfo.get('MAIN')
2991  if main:
2992    # Default branch may have multiple names so we list them
2993    branches = []
2994    for branch in main.aliases:
2995      # Don't list MAIN
2996      if branch is not main:
2997        branches.append(branch)
2998    data['default_branch'] = prep_tags(request, branches)
2999
3000  for tag, rev in tagitems:
3001    rev_str = None
3002    if rev.number:
3003      rev_str = '.'.join(map(str, rev.number))
3004
3005    if rev.co_rev:
3006      data['tags'].append(_item(rev=rev.co_rev.string, name=tag))
3007    if rev.is_branch:
3008      data['branch_tags'].append(_item(name=tag, revision=rev_str))
3009    else:
3010      data['plain_tags'].append(_item(name=tag, revision=rev_str))
3011
3012  if cfg.options.log_pagesize:
3013    data['log_paging_action'], data['log_paging_hidden_values'] = \
3014      request.get_form(params={'log_pagestart': None,
3015                               'r1': selected_rev,
3016                               })
3017    data['log_pagestart'] = int(request.query_dict.get('log_pagestart',0))
3018    data['entries'] = paging_sws(data, 'entries', data['log_pagestart'],
3019                                 'rev', cfg.options.log_pagesize,
3020                                 cfg.options.log_pagesextra, first)
3021
3022  generate_page(request, "log", data)
3023
3024def view_checkout(request):
3025
3026  cfg = request.cfg
3027
3028  if 'co' not in cfg.options.allowed_views:
3029    raise ViewVCException('Checkout view is disabled',
3030                          '403 Forbidden')
3031  if request.pathtype != vclib.FILE:
3032    raise ViewVCException('Unsupported feature: checkout view on directory',
3033                          '400 Bad Request')
3034
3035  path, rev = _orig_path(request)
3036  fp, revision = request.repos.openfile(path, rev, {})
3037
3038  # The revision number acts as a strong validator.
3039  if not check_freshness(request, None, revision):
3040    mime_type, encoding = calculate_mime_type(request, path, rev)
3041    mime_type = request.query_dict.get('content-type') \
3042                or mime_type \
3043                or 'text/plain'
3044    server_fp = get_writeready_server_file(request, mime_type, encoding)
3045    copy_stream(fp, server_fp)
3046  fp.close()
3047
3048def cvsgraph_make_reqopt(request, cfgname, queryparam, optvalue):
3049  # Return a cvsgraph custom option substring bit OPTVALUE based on
3050  # CFGNAME's presence in the allowed list of user-configurable
3051  # options and QUERYPARAM's presence and boolean interpretation in
3052  # the actual request; otherwise, return the empty string for options
3053  # that either aren't overridden or aren't allowed to be overridden.
3054
3055  if (cfgname in request.cfg.options.allowed_cvsgraph_useropts) \
3056     and (int(request.query_dict.get(queryparam, 0))):
3057    return optvalue
3058  return ''
3059
3060def cvsgraph_normalize_gshow(request):
3061  # Return the effective value of the 'gshow' query parameter, noting
3062  # that a missing parameter is the same as gshow=all, and treating a
3063  # bogus parameter value as the same as gshow=all, too.
3064  gshow = request.query_dict.get('gshow', 'all')
3065  if gshow not in ('all', 'inittagged', 'tagged'):
3066    gshow = 'all'
3067  return gshow
3068
3069def cvsgraph_extraopts(request):
3070  # Build a set of -O options for controlling cvsgraph's behavior,
3071  # based on what the user has requested and filtered against what the
3072  # user is allowed to request.
3073
3074  cfg = request.cfg
3075
3076  ep = '-O'
3077
3078  # Simple mappings of boolean flags
3079  ep = ep + cvsgraph_make_reqopt(request, 'invert', 'gflip',
3080                                 ';upside_down=true')
3081  ep = ep + cvsgraph_make_reqopt(request, 'branchbox', 'gbbox',
3082                                 ';branch_dupbox=true')
3083  ep = ep + cvsgraph_make_reqopt(request, 'rotate', 'gleft',
3084                                 ';left_right=true')
3085
3086  # Stripping is a little more complex.
3087  if ('show' in request.cfg.options.allowed_cvsgraph_useropts):
3088    gshow = cvsgraph_normalize_gshow(request)
3089    if gshow == 'inittagged':
3090      ep = ep + ';strip_untagged=true'
3091    elif gshow == 'tagged':
3092      ep = ep + ';strip_untagged=true;strip_first_rev=true'
3093
3094  # And tag limitation has a user-supplied value to mess with.
3095  if ('limittags' in request.cfg.options.allowed_cvsgraph_useropts) \
3096     and 'gmaxtag' in request.query_dict:
3097    ep = ep + ';rev_maxtags=' + request.query_dict['gmaxtag']
3098
3099  return ep + ';'
3100
3101def view_cvsgraph_image(request):
3102  "output the image rendered by cvsgraph"
3103  # this function is derived from cgi/cvsgraphmkimg.cgi
3104
3105  cfg = request.cfg
3106
3107  if not cfg.options.use_cvsgraph:
3108    raise ViewVCException('Graph view is disabled', '403 Forbidden')
3109
3110  # If cvsgraph can't find its supporting libraries, uncomment and set
3111  # accordingly.  Do the same in view_cvsgraph().
3112  #os.environ['LD_LIBRARY_PATH'] = '/usr/lib:/usr/local/lib:/path/to/cvsgraph'
3113
3114  rcsfile = request.repos.rcsfile(request.path_parts)
3115  fp = popen.popen(cfg.utilities.cvsgraph or 'cvsgraph',
3116                   ("-c", cfg.path(cfg.options.cvsgraph_conf),
3117                    "-r", request.repos.rootpath,
3118                    cvsgraph_extraopts(request),
3119                    rcsfile))
3120  copy_stream(fp, get_writeready_server_file(request, 'image/png'))
3121  fp.close()
3122
3123def view_cvsgraph(request):
3124  "output a page containing an image rendered by cvsgraph"
3125
3126  cfg = request.cfg
3127
3128  if not cfg.options.use_cvsgraph:
3129    raise ViewVCException('Graph view is disabled', '403 Forbidden')
3130
3131  # If cvsgraph can't find its supporting libraries, uncomment and set
3132  # accordingly.  Do the same in view_cvsgraph_image().
3133  #os.environ['LD_LIBRARY_PATH'] = '/usr/lib:/usr/local/lib:/path/to/cvsgraph'
3134
3135  imagesrc = request.get_url(view_func=view_cvsgraph_image, escape=1)
3136  mime_type = guess_mime(request.where)
3137  view = default_view(mime_type, cfg)
3138  up_where = _path_join(request.path_parts[:-1])
3139
3140  # Create an image map
3141  rcsfile = request.repos.rcsfile(request.path_parts)
3142  fp = popen.popen(cfg.utilities.cvsgraph or 'cvsgraph',
3143                   ("-i",
3144                    "-c", cfg.path(cfg.options.cvsgraph_conf),
3145                    "-r", request.repos.rootpath,
3146                    "-x", "x",
3147                    "-3", request.get_url(view_func=view_log, params={},
3148                                          escape=1),
3149                    "-4", request.get_url(view_func=view,
3150                                          params={'revision': None},
3151                                          escape=1, partial=1),
3152                    "-5", request.get_url(view_func=view_diff,
3153                                          params={'r1': None, 'r2': None},
3154                                          escape=1, partial=1),
3155                    "-6", request.get_url(view_func=view_directory,
3156                                          where=up_where,
3157                                          pathtype=vclib.DIR,
3158                                          params={'pathrev': None},
3159                                          escape=1, partial=1),
3160                    cvsgraph_extraopts(request),
3161                    rcsfile))
3162
3163  graph_action, graph_hidden_values = \
3164    request.get_form(view_func=view_cvsgraph, params={})
3165
3166  data = common_template_data(request)
3167  data.merge(TemplateData({
3168    'imagemap' : fp,
3169    'imagesrc' : imagesrc,
3170    'graph_action' : graph_action,
3171    'graph_hidden_values' : graph_hidden_values,
3172    'opt_gflip' : ezt.boolean('invert' in cfg.options.allowed_cvsgraph_useropts),
3173    'opt_gbbox' : ezt.boolean('branchbox' in cfg.options.allowed_cvsgraph_useropts),
3174    'opt_gshow' : ezt.boolean('show' in cfg.options.allowed_cvsgraph_useropts),
3175    'opt_gleft' : ezt.boolean('rotate' in cfg.options.allowed_cvsgraph_useropts),
3176    'opt_gmaxtag' : ezt.boolean('limittags' in cfg.options.allowed_cvsgraph_useropts),
3177    'gflip' : ezt.boolean(int(request.query_dict.get('gflip', 0))),
3178    'gbbox' : ezt.boolean(int(request.query_dict.get('gbbox', 0))),
3179    'gleft' : ezt.boolean(int(request.query_dict.get('gleft', 0))),
3180    'gmaxtag' : request.query_dict.get('gmaxtag', 0),
3181    'gshow' : cvsgraph_normalize_gshow(request),
3182    }))
3183  generate_page(request, "graph", data)
3184
3185def search_file(repos, path_parts, rev, search_re):
3186  """Return 1 iff the contents of the file at PATH_PARTS in REPOS as
3187  of revision REV matches regular expression SEARCH_RE."""
3188
3189  # Read in each line of a checked-out file, and then use re.search to
3190  # search line.
3191  fp = repos.openfile(path_parts, rev, {})[0]
3192  matches = 0
3193  while 1:
3194    line = fp.readline()
3195    if not line:
3196      break
3197    # FIXME: Is there what can we do about file encoding?
3198    if search_re.search(line.decode('utf-8', 'surrogateescape')):
3199      matches = 1
3200      fp.close()
3201      break
3202  return matches
3203
3204def view_doc(request):
3205  """Serve ViewVC static content locally.
3206
3207  Using this avoids the need for modifying the setup of the web server.
3208  """
3209  cfg = request.cfg
3210  document = request.where
3211  filename = cfg.path(os.path.join(cfg.options.template_dir,
3212                                   "docroot", document))
3213
3214  # Stat the file to get content length and last-modified date.
3215  try:
3216    info = os.stat(filename)
3217  except OSError as v:
3218    raise ViewVCException('Static file "%s" not available (%s)'
3219                          % (document, str(v)), '404 Not Found')
3220  content_length = str(info[stat.ST_SIZE])
3221  last_modified = info[stat.ST_MTIME]
3222
3223  # content_length + mtime makes a pretty good etag.
3224  if check_freshness(request, last_modified,
3225                     "%s-%s" % (content_length, last_modified)):
3226    return
3227
3228  try:
3229    fp = open(filename, "rb")
3230  except IOError as v:
3231    raise ViewVCException('Static file "%s" not available (%s)'
3232                          % (document, str(v)), '404 Not Found')
3233
3234  if document[-3:] == 'png':
3235    mime_type = 'image/png'
3236  elif document[-3:] == 'jpg':
3237    mime_type = 'image/jpeg'
3238  elif document[-3:] == 'gif':
3239    mime_type = 'image/gif'
3240  elif document[-3:] == 'css':
3241    mime_type = 'text/css'
3242  else: # assume HTML:
3243    mime_type = None
3244  copy_stream(fp, get_writeready_server_file(request, mime_type,
3245                                             content_length=content_length))
3246  fp.close()
3247
3248def rcsdiff_date_reformat(date_str, cfg):
3249  if date_str is None:
3250    return None
3251  try:
3252    date = vclib.ccvs.cvs_strptime(date_str)
3253  except ValueError:
3254    return date_str
3255  return make_time_string(calendar.timegm(date), cfg)
3256
3257_re_extract_rev = re.compile(r'^[-+*]{3} [^\t]+\t([^\t]+)\t((\d+\.)*\d+)$')
3258_re_extract_info = re.compile(r'@@ \-([0-9]+).*\+([0-9]+).*@@(.*)')
3259
3260class DiffSource:
3261  def __init__(self, fp, cfg):
3262    self.fp = fp
3263    self.cfg = cfg
3264    self.save_line = None
3265    self.line_number = None
3266    self.prev_line_number = None
3267
3268    # keep track of where we are during an iteration
3269    self.idx = -1
3270    self.last = None
3271
3272    # these will be set once we start reading
3273    self.state = 'no-changes'
3274    self.left_col = [ ]
3275    self.right_col = [ ]
3276
3277  def __getitem__(self, idx):
3278    if idx == self.idx:
3279      return self.last
3280    if idx != self.idx + 1:
3281      raise DiffSequencingError()
3282
3283    # keep calling _get_row until it gives us something. sometimes, it
3284    # doesn't return a row immediately because it is accumulating changes.
3285    # when it is out of data, _get_row will raise IndexError.
3286    while 1:
3287      item = self._get_row()
3288      if item:
3289        self.idx = idx
3290        self.last = item
3291        return item
3292
3293  def _format_text(self, text):
3294    text = text.rstrip('\r\n')
3295    if self.cfg.options.tabsize > 0:
3296      text = text.expandtabs(self.cfg.options.tabsize)
3297    hr_breakable = self.cfg.options.hr_breakable
3298
3299    # in the code below, "\x01" will be our stand-in for "&". We don't want
3300    # to insert "&" because it would get escaped by sapi.escape().  Similarly,
3301    # we use "\x02" as a stand-in for "<br>"
3302
3303    if hr_breakable > 1 and len(text) > hr_breakable:
3304      text = re.sub('(' + ('.' * hr_breakable) + ')', '\\1\x02', text)
3305    if hr_breakable:
3306      # make every other space "breakable"
3307      text = text.replace('  ', ' \x01nbsp;')
3308    else:
3309      text = text.replace(' ', '\x01nbsp;')
3310    text = sapi.escape(text)
3311    text = text.replace('\x01', '&')
3312    text = text.replace('\x02', '<span style="color:red">\</span><br />')
3313    return text
3314
3315  def _get_row(self):
3316    if self.state[:5] == 'flush':
3317      item = self._flush_row()
3318      if item:
3319        return item
3320      self.state = 'dump'
3321
3322    if self.save_line:
3323      line = self.save_line
3324      self.save_line = None
3325    else:
3326      line = self.fp.readline()
3327
3328    if not line:
3329      if self.state == 'no-changes':
3330        self.state = 'done'
3331        return _item(type=_RCSDIFF_NO_CHANGES)
3332
3333      # see if there are lines to flush
3334      if self.left_col or self.right_col:
3335        # move into the flushing state
3336        self.state = 'flush-' + self.state
3337        return None
3338
3339      # nothing more to return
3340      raise IndexError
3341
3342    if line[:2] == '@@':
3343      self.state = 'dump'
3344      self.left_col = [ ]
3345      self.right_col = [ ]
3346
3347      match = _re_extract_info.match(line)
3348      self.line_number = int(match.group(2)) - 1
3349      self.prev_line_number = int(match.group(1)) - 1
3350      return _item(type='header',
3351                   line_info_left=match.group(1),
3352                   line_info_right=match.group(2),
3353                   line_info_extra=self._format_text(match.group(3)))
3354
3355    if line[0] == '\\':
3356      # \ No newline at end of file
3357      # Just skip. This code used to move to flush state, but that resulted in
3358      # changes being displayed as removals-and-readditions.
3359      return None
3360
3361    diff_code = line[0]
3362    output = self._format_text(line[1:])
3363
3364    if diff_code == '+':
3365      if self.state == 'dump':
3366        self.line_number = self.line_number + 1
3367        return _item(type='add', right=output, line_number=self.line_number)
3368
3369      self.state = 'pre-change-add'
3370      self.right_col.append(output)
3371      return None
3372
3373    if diff_code == '-':
3374      self.state = 'pre-change-remove'
3375      self.left_col.append(output)
3376      return None  # early exit to avoid line in
3377
3378    if self.left_col or self.right_col:
3379      # save the line for processing again later, and move into the
3380      # flushing state
3381      self.save_line = line
3382      self.state = 'flush-' + self.state
3383      return None
3384
3385    self.line_number = self.line_number + 1
3386    self.prev_line_number = self.prev_line_number + 1
3387    return _item(type='context', left=output, right=output,
3388                 line_number=self.line_number)
3389
3390  def _flush_row(self):
3391    if not self.left_col and not self.right_col:
3392      # nothing more to flush
3393      return None
3394
3395    if self.state == 'flush-pre-change-remove':
3396      self.prev_line_number = self.prev_line_number + 1
3397      return _item(type='remove', left=self.left_col.pop(0),
3398                   line_number=self.prev_line_number)
3399
3400    # state == flush-pre-change-add
3401    item = _item(type='change',
3402                 have_left=ezt.boolean(0),
3403                 have_right=ezt.boolean(0))
3404    if self.left_col:
3405      self.prev_line_number = self.prev_line_number + 1
3406      item.have_left = ezt.boolean(1)
3407      item.left = self.left_col.pop(0)
3408      item.line_number = self.prev_line_number
3409    if self.right_col:
3410      self.line_number = self.line_number + 1
3411      item.have_right = ezt.boolean(1)
3412      item.right = self.right_col.pop(0)
3413      item.line_number = self.line_number
3414    return item
3415
3416class DiffSequencingError(Exception):
3417  pass
3418
3419def diff_parse_headers(fp, diff_type, path1, path2, rev1, rev2,
3420                       sym1=None, sym2=None):
3421  date1 = date2 = log_rev1 = log_rev2 = flag = None
3422  header_lines = []
3423
3424  if diff_type == vclib.UNIFIED:
3425    f1 = '--- '
3426    f2 = '+++ '
3427  elif diff_type == vclib.CONTEXT:
3428    f1 = '*** '
3429    f2 = '--- '
3430  else:
3431    f1 = f2 = None
3432
3433  # If we're parsing headers, then parse and tweak the diff headers,
3434  # collecting them in an array until we've read and handled them all.
3435  if f1 and f2:
3436    parsing = 1
3437    flag = _RCSDIFF_NO_CHANGES
3438    len_f1 = len(f1)
3439    len_f2 = len(f2)
3440    while parsing:
3441      line = fp.readline()
3442      if not line:
3443        break
3444
3445      # Saw at least one line in the stream
3446      flag = None
3447
3448      if line[:len(f1)] == f1:
3449        match = _re_extract_rev.match(line)
3450        if match:
3451          date1 = match.group(1)
3452          log_rev1 = match.group(2)
3453          line = '%s%s\t%s\t%s%s\n' % (f1, path1, date1, log_rev1,
3454                                       sym1 and ' ' + sym1 or '')
3455      elif line[:len(f2)] == f2:
3456        match = _re_extract_rev.match(line)
3457        if match:
3458          date2 = match.group(1)
3459          log_rev2 = match.group(2)
3460          line = '%s%s\t%s\t%s%s\n' % (f2, path2, date2, log_rev2,
3461                                       sym2 and ' ' + sym2 or '')
3462        parsing = 0
3463      elif line[:3] == 'Bin':
3464        flag = _RCSDIFF_IS_BINARY
3465        parsing = 0
3466      elif (line.find('not found') != -1 or
3467            line.find('illegal option') != -1):
3468        flag = _RCSDIFF_ERROR
3469        parsing = 0
3470      header_lines.append(line)
3471
3472  if (log_rev1 and log_rev1 != rev1):
3473    raise ViewVCException('rcsdiff found revision %s, but expected '
3474                          'revision %s' % (log_rev1, rev1),
3475                          '500 Internal Server Error')
3476  if (log_rev2 and log_rev2 != rev2):
3477    raise ViewVCException('rcsdiff found revision %s, but expected '
3478                          'revision %s' % (log_rev2, rev2),
3479                          '500 Internal Server Error')
3480
3481  return date1, date2, flag, ''.join(header_lines)
3482
3483
3484def _get_diff_path_parts(request, query_key, rev, base_rev):
3485  repos = request.repos
3486  if query_key in request.query_dict:
3487    parts = _path_parts(request.query_dict[query_key])
3488  elif request.roottype == 'svn':
3489    try:
3490      parts = _path_parts(repos.get_location(request.where,
3491                                             repos._getrev(base_rev),
3492                                             repos._getrev(rev)))
3493    except vclib.InvalidRevision:
3494      raise ViewVCException('Invalid path(s) or revision(s) passed to diff',
3495                            '400 Bad Request')
3496    except vclib.ItemNotFound:
3497      raise ViewVCException('Invalid path(s) or revision(s) passed to diff',
3498                            '400 Bad Request')
3499  else:
3500    parts = request.path_parts
3501  return parts
3502
3503
3504def setup_diff(request):
3505  query_dict = request.query_dict
3506
3507  rev1 = r1 = query_dict['r1']
3508  rev2 = r2 = query_dict['r2']
3509  sym1 = sym2 = None
3510
3511  # hack on the diff revisions
3512  if r1 == 'text':
3513    rev1 = query_dict.get('tr1', None)
3514    if not rev1:
3515      raise ViewVCException('Missing revision from the diff form text field',
3516                            '400 Bad Request')
3517  else:
3518    idx = r1.find(':')
3519    if idx == -1:
3520      rev1 = r1
3521    else:
3522      rev1 = r1[:idx]
3523      sym1 = r1[idx+1:]
3524
3525  if r2 == 'text':
3526    rev2 = query_dict.get('tr2', None)
3527    if not rev2:
3528      raise ViewVCException('Missing revision from the diff form text field',
3529                            '400 Bad Request')
3530    sym2 = ''
3531  else:
3532    idx = r2.find(':')
3533    if idx == -1:
3534      rev2 = r2
3535    else:
3536      rev2 = r2[:idx]
3537      sym2 = r2[idx+1:]
3538
3539  if request.roottype == 'svn':
3540    try:
3541      rev1 = str(request.repos._getrev(rev1))
3542      rev2 = str(request.repos._getrev(rev2))
3543    except vclib.InvalidRevision:
3544      raise ViewVCException('Invalid revision(s) passed to diff',
3545                            '400 Bad Request')
3546
3547  p1 = _get_diff_path_parts(request, 'p1', rev1, request.pathrev)
3548  p2 = _get_diff_path_parts(request, 'p2', rev2, request.pathrev)
3549
3550  try:
3551    if revcmp(rev1, rev2) > 0:
3552      rev1, rev2 = rev2, rev1
3553      sym1, sym2 = sym2, sym1
3554      p1, p2 = p2, p1
3555  except ValueError:
3556    raise ViewVCException('Invalid revision(s) passed to diff',
3557                          '400 Bad Request')
3558  return p1, p2, rev1, rev2, sym1, sym2
3559
3560
3561def view_patch(request):
3562  if 'diff' not in request.cfg.options.allowed_views:
3563    raise ViewVCException('Diff generation is disabled',
3564                          '403 Forbidden')
3565
3566  cfg = request.cfg
3567  query_dict = request.query_dict
3568  p1, p2, rev1, rev2, sym1, sym2 = setup_diff(request)
3569
3570  mime_type1, encoding1 = calculate_mime_type(request, p1, rev1)
3571  mime_type2, encoding2 = calculate_mime_type(request, p2, rev2)
3572  if is_binary_file_mime_type(mime_type1, cfg) or \
3573     is_binary_file_mime_type(mime_type2, cfg):
3574    raise ViewVCException('Display of binary file content disabled '
3575                          'by configuration', '403 Forbidden')
3576
3577  # In the absence of a format dictation in the CGI params, we'll let
3578  # use the configured diff format, allowing 'c' to mean 'c' and
3579  # anything else to mean 'u'.
3580  format = query_dict.get('diff_format',
3581                          cfg.options.diff_format == 'c' and 'c' or 'u')
3582  if format == 'c':
3583    diff_type = vclib.CONTEXT
3584  elif format == 'u':
3585    diff_type = vclib.UNIFIED
3586  else:
3587    raise ViewVCException('Diff format %s not understood' % (format),
3588                          '400 Bad Request')
3589
3590  # Set some diff options.  (Are there other options folks might want?
3591  # Maybe not.  For a patch, perhaps the precise change is ideal.)
3592  diff_options = {}
3593  diff_options['funout'] = cfg.options.hr_funout
3594
3595  try:
3596    fp = request.repos.rawdiff(p1, rev1, p2, rev2, diff_type, diff_options)
3597  except vclib.InvalidRevision:
3598    raise ViewVCException('Invalid path(s) or revision(s) passed to diff',
3599                          '400 Bad Request')
3600
3601  path_left = _path_join(p1)
3602  path_right = _path_join(p2)
3603  date1, date2, flag, headers = diff_parse_headers(fp, diff_type,
3604                                                   path_left, path_right,
3605                                                   rev1, rev2, sym1, sym2)
3606
3607  server_fp = get_writeready_server_file(request, 'text/plain', is_text=True)
3608  server_fp.write(headers)
3609  copy_stream(fp, server_fp)
3610  fp.close()
3611
3612
3613def diff_side_item(request, path_comp, rev, sym):
3614  '''Prepare information about left/right side of the diff. Prepare two flavors,
3615  for content and for property diffs.'''
3616
3617  # TODO: Is the slice necessary, or is limit enough?
3618  options = {'svn_show_all_dir_logs': 1}
3619  log_entry = request.repos.itemlog(path_comp, rev, vclib.SORTBY_REV,
3620                                    0, 1, options)[-1]
3621  ago = log_entry.date is not None \
3622         and html_time(request, log_entry.date, 1) or None
3623  path_joined = _path_join(path_comp)
3624
3625  lf = LogFormatter(request, log_entry.log)
3626
3627  # Item for property diff: no hrefs, there's no view
3628  # to download/annotate property
3629  i_prop = _item(log_entry=log_entry,
3630                 date=make_time_string(log_entry.date, request.cfg),
3631                 author=log_entry.author,
3632                 log = lf.get(maxlen=0, htmlize=1),
3633                 size=log_entry.size,
3634                 ago=ago,
3635                 path=path_joined,
3636                 path_comp=path_comp,
3637                 rev=rev,
3638                 tag=sym,
3639                 view_href=None,
3640                 download_href=None,
3641                 download_text_href=None,
3642                 annotate_href=None,
3643                 revision_href=None,
3644                 prefer_markup=ezt.boolean(0))
3645
3646  # Content diff item is based on property diff, with URIs added
3647  fvi = get_file_view_info(request, path_joined, rev)
3648  i_content = copy.copy(i_prop)
3649  i_content.view_href = fvi.view_href
3650  i_content.download_href = fvi.download_href
3651  i_content.download_text_href = fvi.download_text_href
3652  i_content.annotate_href = fvi.annotate_href
3653  i_content.revision_href = fvi.revision_href
3654  i_content.prefer_markup = fvi.prefer_markup
3655
3656  # Property diff item has properties hash, naturally. Content item doesn't.
3657  i_content.properties = None
3658  i_prop.properties = request.repos.itemprops(path_comp, rev)
3659  return i_content, i_prop
3660
3661
3662class DiffDescription:
3663  def __init__(self, request):
3664    cfg = request.cfg
3665    query_dict = request.query_dict
3666
3667    self.diff_format = query_dict.get('diff_format', cfg.options.diff_format)
3668    self.human_readable = 0
3669    self.hide_legend = 0
3670    self.line_differ = None
3671    self.fp_differ = None
3672    self.request = request
3673    self.context = -1
3674    self.changes = []
3675
3676    if self.diff_format == 'c':
3677      self.diff_type = vclib.CONTEXT
3678      self.hide_legend = 1
3679    elif self.diff_format == 's':
3680      self.diff_type = vclib.SIDE_BY_SIDE
3681      self.hide_legend = 1
3682    elif self.diff_format == 'l':
3683      self.diff_type = vclib.UNIFIED
3684      self.context = 15
3685      self.human_readable = 1
3686    elif self.diff_format == 'f':
3687      self.diff_type = vclib.UNIFIED
3688      self.context = None
3689      self.human_readable = 1
3690    elif self.diff_format == 'h':
3691      self.diff_type = vclib.UNIFIED
3692      self.human_readable = 1
3693    elif self.diff_format == 'u':
3694      self.diff_type = vclib.UNIFIED
3695      self.hide_legend = 1
3696    else:
3697      raise ViewVCException('Diff format %s not understood' % self.diff_format,
3698                            '400 Bad Request')
3699
3700    # Determine whether idiff is avaialble and whether it could be used.
3701    # idiff only supports side-by-side (conditionally) and unified formats,
3702    # and is only used if intra-line diffs are requested.
3703    if (cfg.options.hr_intraline and idiff
3704        and ((self.human_readable and idiff.sidebyside)
3705             or (not self.human_readable and self.diff_type == vclib.UNIFIED))):
3706      # Override hiding legend for unified format. It is not marked 'human
3707      # readable', and it is displayed differently depending on whether
3708      # hr_intraline is disabled (displayed as raw diff) or enabled
3709      # (displayed as colored). What a royal mess... Issue #301 should
3710      # at some time address it; at that time, human_readable and hide_legend
3711      # controls should both be merged into one, 'is_colored' or something.
3712      self.hide_legend = 0
3713      if self.human_readable:
3714        self.line_differ = self._line_idiff_sidebyside
3715        self.diff_block_format = 'sidebyside-2'
3716      else:
3717        self.line_differ = self._line_idiff_unified
3718        self.diff_block_format = 'unified'
3719    else:
3720      if self.human_readable:
3721        self.diff_block_format = 'sidebyside-1'
3722        self.fp_differ = self._fp_vclib_hr
3723      else:
3724        self.diff_block_format = 'raw'
3725        self.fp_differ = self._fp_vclib_raw
3726
3727  def anchor(self, anchor_name):
3728    self.changes.append(_item(diff_block_format='anchor', anchor=anchor_name))
3729
3730  def get_content_diff(self, left, right):
3731    cfg = self.request.cfg
3732    diff_options = {}
3733    if self.context != -1:
3734      diff_options['context'] = self.context
3735    if self.human_readable or self.diff_format == 'u':
3736      diff_options['funout'] = cfg.options.hr_funout
3737    if self.human_readable:
3738      diff_options['ignore_white'] = cfg.options.hr_ignore_white
3739      diff_options['ignore_keyword_subst'] = \
3740                      cfg.options.hr_ignore_keyword_subst
3741    self._get_diff(left, right, self._content_lines, self._content_fp,
3742                   diff_options, None)
3743
3744  def get_prop_diff(self, left, right):
3745    diff_options = {}
3746    if self.context != -1:
3747      diff_options['context'] = self.context
3748    if self.human_readable:
3749      cfg = self.request.cfg
3750      diff_options['ignore_white'] = cfg.options.hr_ignore_white
3751    for name in self._uniq(list(left.properties.keys())
3752                           + list(right.properties.keys())):
3753      # Skip non-utf8 property names
3754      if is_undisplayable(name):
3755        continue
3756      val_left = left.properties.get(name, '')
3757      val_right = right.properties.get(name, '')
3758      # Skip non-changed properties
3759      if val_left == val_right:
3760        continue
3761      # Check for binary properties
3762      if is_undisplayable(val_left) or is_undisplayable(val_right):
3763        self.changes.append(_item(left=left,
3764                                  right=right,
3765                                  diff_block_format=self.diff_block_format,
3766                                  changes=[ _item(type=_RCSDIFF_IS_BINARY) ],
3767                                  propname=name))
3768        continue
3769      self._get_diff(left, right, self._prop_lines, self._prop_fp,
3770                     diff_options, name)
3771
3772  def _get_diff(self, left, right, get_lines, get_fp, diff_options, propname):
3773    if self.fp_differ is not None:
3774      fp = get_fp(left, right, propname, diff_options)
3775      changes = self.fp_differ(left, right, fp, propname)
3776    else:
3777      lines_left = get_lines(left, propname)
3778      lines_right = get_lines(right, propname)
3779      changes = self.line_differ(lines_left, lines_right, diff_options)
3780    self.changes.append(_item(left=left,
3781                              right=right,
3782                              changes=changes,
3783                              diff_block_format=self.diff_block_format,
3784                              propname=propname))
3785
3786  def _line_idiff_sidebyside(self, lines_left, lines_right, diff_options):
3787    return idiff.sidebyside(lines_left, lines_right,
3788                            diff_options.get("context", 5))
3789
3790  def _line_idiff_unified(self, lines_left, lines_right, diff_options):
3791    return idiff.unified(lines_left, lines_right,
3792                         diff_options.get("context", 2))
3793
3794  def _fp_vclib_hr(self, left, right, fp, propname):
3795    date1, date2, flag, headers = \
3796                    diff_parse_headers(fp, self.diff_type,
3797                                       self._property_path(left, propname),
3798                                       self._property_path(right, propname),
3799                                       left.rev, right.rev, left.tag, right.tag)
3800    if flag is not None:
3801      return [ _item(type=flag) ]
3802    else:
3803      return DiffSource(fp, self.request.cfg)
3804
3805  def _fp_vclib_raw(self, left, right, fp, propname):
3806    date1, date2, flag, headers = \
3807                    diff_parse_headers(fp, self.diff_type,
3808                                       self._property_path(left, propname),
3809                                       self._property_path(right, propname),
3810                                       left.rev, right.rev, left.tag, right.tag)
3811    if flag is not None:
3812      return _item(type=flag)
3813    else:
3814      return _item(type='raw', raw=MarkupPipeWrapper(fp,
3815              self.request.server.escape(headers), None, 1))
3816
3817  def _content_lines(self, side, propname):
3818    f = self.request.repos.openfile(side.path_comp, side.rev, {})[0]
3819    try:
3820      lines = f.readlines()
3821    finally:
3822      f.close()
3823    return lines
3824
3825  def _content_fp(self, left, right, propname, diff_options):
3826    return self.request.repos.rawdiff(left.path_comp, left.rev,
3827                                      right.path_comp, right.rev,
3828                                      self.diff_type, diff_options)
3829
3830  def _prop_lines(self, side, propname):
3831    val = side.properties.get(propname, '')
3832    # FIXME: dirty hack for Python 3: we need bytes as return value
3833    return val.encode('utf-8','surrogateescape').splitlines()
3834
3835  def _prop_fp(self, left, right, propname, diff_options):
3836    fn_left = self._temp_file(left.properties.get(propname))
3837    fn_right = self._temp_file(right.properties.get(propname))
3838    diff_args = vclib._diff_args(self.diff_type, diff_options)
3839    info_left = self._property_path(left, propname), \
3840                left.log_entry.date, left.rev
3841    info_right = self._property_path(right, propname), \
3842                 right.log_entry.date, right.rev
3843    return vclib._diff_fp(fn_left, fn_right, info_left, info_right,
3844                          self.request.cfg.utilities.diff or 'diff', diff_args)
3845
3846  def _temp_file(self, val):
3847    '''Create a temporary file with content from val'''
3848    fd, fn = tempfile.mkstemp()
3849    fp = os.fdopen(fd, "wb")
3850    if val:
3851      if not isinstance(val, bytes):
3852        fp.write(val.encode('utf-8', 'surrogateescape'))
3853      else:
3854        fp.write(val)
3855    fp.close()
3856    return fn
3857
3858  def _uniq(self, lst):
3859    '''Determine unique set of list elements'''
3860    h = {}
3861    for e in lst:
3862      h[e] = 1
3863    return sorted(h.keys())
3864
3865  def _property_path(self, side, propname):
3866    '''Return path to be displayed in raw diff - possibly augmented with
3867    property name'''
3868    if propname is None:
3869      return side.path
3870    else:
3871      return "%s:property(%s)" % (side.path, propname)
3872
3873
3874def view_diff(request):
3875  if 'diff' not in request.cfg.options.allowed_views:
3876    raise ViewVCException('Diff generation is disabled',
3877                          '403 Forbidden')
3878
3879  cfg = request.cfg
3880  p1, p2, rev1, rev2, sym1, sym2 = setup_diff(request)
3881
3882  mime_type1, encoding1 = calculate_mime_type(request, p1, rev1)
3883  mime_type2, encoding2 = calculate_mime_type(request, p2, rev2)
3884  if is_binary_file_mime_type(mime_type1, cfg) or \
3885     is_binary_file_mime_type(mime_type2, cfg):
3886    raise ViewVCException('Display of binary file content disabled '
3887                          'by configuration', '403 Forbidden')
3888
3889  # since templates are in use and subversion allows changes to the dates,
3890  # we can't provide a strong etag
3891  if check_freshness(request, None, '%s-%s' % (rev1, rev2), weak=1):
3892    return
3893
3894  left_side_content, left_side_prop = diff_side_item(request, p1, rev1, sym1)
3895  right_side_content, right_side_prop = diff_side_item(request, p2, rev2, sym2)
3896
3897  desc = DiffDescription(request)
3898
3899  try:
3900    if request.pathtype == vclib.FILE:
3901      # Get file content diff
3902      desc.anchor("content")
3903      desc.get_content_diff(left_side_content, right_side_content)
3904
3905    # Get property list and diff each property
3906    desc.anchor("properties")
3907    desc.get_prop_diff(left_side_prop, right_side_prop)
3908
3909  except vclib.InvalidRevision:
3910    raise ViewVCException('Invalid path(s) or revision(s) passed to diff',
3911                          '400 Bad Request')
3912
3913  no_format_params = request.query_dict.copy()
3914  no_format_params['diff_format'] = None
3915  diff_format_action, diff_format_hidden_values = \
3916    request.get_form(params=no_format_params)
3917
3918  data = common_template_data(request)
3919  data.merge(TemplateData({
3920    'diffs' : desc.changes,
3921    'diff_format' : desc.diff_format,
3922    'hide_legend' : ezt.boolean(desc.hide_legend),
3923    'patch_href' : request.get_url(view_func=view_patch,
3924                                   params=no_format_params,
3925                                   escape=1),
3926    'diff_format_action' : diff_format_action,
3927    'diff_format_hidden_values' : diff_format_hidden_values,
3928    }))
3929  generate_page(request, "diff", data)
3930
3931
3932def generate_tarball_header(out, name, size=0, mode=None, mtime=0,
3933                            uid=0, gid=0, typeflag=None, linkname=b'',
3934                            uname=b'viewvc', gname=b'viewvc',
3935                            devmajor=1, devminor=0, prefix=None,
3936                            magic=b'ustar', version=b'00', chksum=None):
3937  if not isinstance(name, bytes):
3938    name = name.encode('utf-8', 'surrogateescape')
3939  if not isinstance(linkname, bytes):
3940    linkname = linkname.encode('utf-8', 'surrogateescape')
3941
3942  if not mode:
3943    if name[-1:] == b'/':
3944      mode = 0o0755
3945    else:
3946      mode = 0o0644
3947
3948  if not typeflag:
3949    if linkname:
3950      typeflag = b'2' # symbolic link
3951    elif name[-1:] == b'/':
3952      typeflag = b'5' # directory
3953    else:
3954      typeflag = b'0' # regular file
3955
3956  if not prefix:
3957    prefix = b''
3958  elif not isinstance(prefix, bytes):
3959    prefix = prefix.encode('utf-8', 'surrogateescape')
3960
3961  # generate a GNU tar extension header for a long name.
3962  if len(name) >= 100:
3963    generate_tarball_header(out, b'././@LongLink', len(name),
3964                            0, 0, 0, 0, b'L')
3965    out.write(name)
3966    out.write(b'\0' * (511 - ((len(name) + 511) % 512)))
3967
3968  # generate a GNU tar extension header for a long symlink name.
3969  if len(linkname) >= 100:
3970    generate_tarball_header(out, b'././@LongLink', len(linkname),
3971                            0, 0, 0, 0, b'K')
3972    out.write(linkname)
3973    out.write(b'\0' * (511 - ((len(linkname) + 511) % 512)))
3974
3975  block1 = struct.pack('100s 8s 8s 8s 12s 12s',
3976                       name,
3977                       b'%07o' % mode,
3978                       b'%07o' % uid,
3979                       b'%07o' % gid,
3980                       b'%011o' % size,
3981                       b'%011o' % mtime)
3982
3983  block2 = struct.pack('c 100s 6s 2s 32s 32s 8s 8s 155s',
3984                       typeflag,
3985                       linkname,
3986                       magic,
3987                       version,
3988                       uname,
3989                       gname,
3990                       b'%07o' % devmajor,
3991                       b'%07o' % devminor,
3992                       prefix)
3993
3994  if not chksum:
3995    dummy_chksum = b'        '
3996    block = block1 + dummy_chksum + block2
3997    chksum = 0
3998    for i in range(len(block)):
3999      chksum = chksum + block[i]
4000
4001  block = block1 + struct.pack('8s', b'%07o' % chksum) + block2
4002  block = block + b'\0' * (512 - len(block))
4003
4004  out.write(block)
4005
4006def generate_tarball(out, request, reldir, stack, dir_mtime=None):
4007  # get directory info from repository
4008  rep_path = request.path_parts + reldir
4009  entries = request.repos.listdir(rep_path, request.pathrev, {})
4010  request.repos.dirlogs(rep_path, request.pathrev, entries, {})
4011  entries.sort(key=functools.cmp_to_key(lambda a, b: cmp(a.name, b.name)))
4012
4013  # figure out corresponding path in tar file. everything gets put underneath
4014  # a single top level directory named after the repository directory being
4015  # tarred
4016  if request.path_parts:
4017    tar_dir = request.path_parts[-1] + '/'
4018  else:
4019    # Don't handle context as a directory in the tar ball.
4020    root_path_parts = _path_parts(request.rootname)
4021    tar_dir = root_path_parts[-1] + '/'
4022  if reldir:
4023    tar_dir = tar_dir + _path_join(reldir) + '/'
4024
4025  cvs = request.roottype == 'cvs'
4026
4027  # If our caller doesn't dictate a datestamp to use for the current
4028  # directory, its datestamps will be the youngest of the datestamps
4029  # of versioned items in that subdirectory.  We'll be ignoring dead
4030  # or busted items and, in CVS, subdirs.
4031  if dir_mtime is None:
4032    dir_mtime = 0
4033    for file in entries:
4034      if cvs and (file.kind != vclib.FILE or file.rev is None or file.dead):
4035        continue
4036      if (file.date is not None) and (file.date > dir_mtime):
4037        dir_mtime = file.date
4038
4039  # Push current directory onto the stack.
4040  stack.append(tar_dir)
4041
4042  # If this is Subversion, we generate a header for this directory
4043  # regardless of its contents.  For CVS it will only get into the
4044  # tarball if it has files underneath it, which we determine later.
4045  if not cvs:
4046    generate_tarball_header(out, tar_dir, mtime=dir_mtime)
4047
4048  # Run through the files in this directory, skipping busted and
4049  # unauthorized ones.
4050  for file in entries:
4051    if file.kind != vclib.FILE:
4052      continue
4053    if cvs and (file.rev is None or file.dead):
4054      continue
4055
4056    # If we get here, we've seen at least one valid file in the
4057    # current directory.  For CVS, we need to make sure there are
4058    # directory parents to contain it, so we flush the stack.
4059    if cvs:
4060      for dir in stack:
4061        generate_tarball_header(out, dir, mtime=dir_mtime)
4062      del stack[:]
4063
4064    # Calculate the mode for the file.  Sure, we could look directly
4065    # at the ,v file in CVS, but that's a layering violation we'd like
4066    # to avoid as much as possible.
4067    if request.repos.isexecutable(rep_path + [file.name], request.pathrev):
4068      mode = 0o0755
4069    else:
4070      mode = 0o0644
4071
4072    # Is this thing a symlink?
4073    #
4074    ### FIXME: A better solution would be to have vclib returning
4075    ### symlinks with a new vclib.SYMLINK path type.
4076    symlink_target = None
4077    if hasattr(request.repos, 'get_symlink_target'):
4078      symlink_target = request.repos.get_symlink_target(rep_path + [file.name],
4079                                                        request.pathrev)
4080
4081    # If the object is a symlink, generate the appropriate header.
4082    # Otherwise, we're dealing with a regular file.
4083    if symlink_target:
4084      generate_tarball_header(out, tar_dir + file.name, 0, mode,
4085                              file.date is not None and file.date or 0,
4086                              typeflag=b'2', linkname=symlink_target)
4087    else:
4088      filesize = request.repos.filesize(rep_path + [file.name], request.pathrev)
4089
4090      if filesize == -1:
4091        # Bummer.  We have to calculate the filesize manually.
4092        fp = request.repos.openfile(rep_path + [file.name], request.pathrev, {})[0]
4093        filesize = 0
4094        while 1:
4095          chunk = retry_read(fp)
4096          if not chunk:
4097            break
4098          filesize = filesize + len(chunk)
4099        fp.close()
4100
4101      # Write the tarball header...
4102      generate_tarball_header(out, tar_dir + file.name, filesize, mode,
4103                              file.date is not None and file.date or 0)
4104
4105      # ...the file's contents ...
4106      fp = request.repos.openfile(rep_path + [file.name], request.pathrev, {})[0]
4107      while 1:
4108        chunk = retry_read(fp)
4109        if not chunk:
4110          break
4111        out.write(chunk)
4112      fp.close()
4113
4114      # ... and then add the block padding.
4115      out.write(b'\0' * (511 - (filesize + 511) % 512))
4116
4117  # Recurse into subdirectories, skipping busted and unauthorized (or
4118  # configured-to-be-hidden) ones.
4119  for file in entries:
4120    if file.errors or file.kind != vclib.DIR:
4121      continue
4122    if request.cfg.options.hide_cvsroot \
4123       and is_cvsroot_path(request.roottype, rep_path + [file.name]):
4124      continue
4125
4126    mtime = request.roottype == 'svn' and file.date or None
4127    generate_tarball(out, request, reldir + [file.name], stack, mtime)
4128
4129  # Pop the current directory from the stack.
4130  del stack[-1:]
4131
4132def download_tarball(request):
4133  cfg = request.cfg
4134
4135  if 'tar' not in request.cfg.options.allowed_views:
4136    raise ViewVCException('Tarball generation is disabled',
4137                          '403 Forbidden')
4138
4139  # Set DEBUG_TARFILE_PATH to a server-local path to enable tarball
4140  # generation debugging and cause ViewVC to write the generated
4141  # tarball (minus the compression layer) to that server filesystem
4142  # location.  This is *NOT* suitable for production environments!
4143  #
4144  # Otherwise, we do tarball generation as usual by getting a
4145  # writeable server output stream -- disabling any default
4146  # compression thereupon -- and wrapping that in our own gzip stream
4147  # wrapper.
4148  DEBUG_TARFILE_PATH = None
4149  if DEBUG_TARFILE_PATH is not None:
4150    fp = open(DEBUG_TARFILE_PATH, 'wb')
4151  else:
4152    tarfile = request.rootname
4153    if request.path_parts:
4154      tarfile = "%s-%s" % (tarfile, request.path_parts[-1])
4155    request.server.add_header('Content-Disposition',
4156                              'attachment; filename="%s.tar.gz"' % (tarfile))
4157    server_fp = get_writeready_server_file(request, 'application/x-gzip',
4158                                           allow_compress=False)
4159    fp = gzip.GzipFile('', 'wb', 9, server_fp)
4160
4161  ### FIXME: For Subversion repositories, we can get the real mtime of the
4162  ### top-level directory here.
4163  generate_tarball(fp, request, [], [])
4164
4165  fp.write(b'\0' * 1024)
4166  fp.close()
4167
4168  if DEBUG_TARFILE_PATH:
4169    server_fp = get_writeready_server_file(request, is_text=True)
4170    server_fp.write("""
4171<html>
4172<body>
4173<p>Tarball '%s' successfully generated!</p>
4174</body>
4175</html>""" % (DEBUG_TARFILE_PATH))
4176
4177
4178def view_revision(request):
4179  if request.roottype != "svn":
4180    raise ViewVCException("Revision view not supported for CVS repositories "
4181                          "at this time.",
4182                          "400 Bad Request")
4183
4184  cfg = request.cfg
4185  query_dict = request.query_dict
4186  try:
4187    rev = request.repos._getrev(query_dict.get('revision'))
4188  except vclib.InvalidRevision:
4189    raise ViewVCException('Invalid revision', '404 Not Found')
4190  youngest_rev = request.repos.get_youngest_revision()
4191
4192  # The revision number acts as a weak validator (but we tell browsers
4193  # not to cache the youngest revision).
4194  if rev != youngest_rev and check_freshness(request, None, str(rev), weak=1):
4195    return
4196
4197  # Fetch the revision information.
4198  date, author, msg, revprops, changes = request.repos.revinfo(rev)
4199  date_str = make_time_string(date, cfg)
4200
4201  # Fix up the revprops list (rather like get_itemprops()).
4202  propnames = sorted(revprops.keys())
4203  props = []
4204  for name in propnames:
4205    # skip non-utf8 property names
4206    if is_undisplayable(name, 'utf-8'):
4207      continue
4208    undisplayable = is_undisplayable(revprops[name])
4209    if not undisplayable:
4210      lf = LogFormatter(request, revprops[name].decode(request.repos.encoding,
4211                                                       'backslashreplace'))
4212      value = lf.get(maxlen=0, htmlize=1)
4213    else:
4214      # note non-utf8 property values
4215      value = None
4216    props.append(_item(name=name, value=value,
4217                       undisplayable=ezt.boolean(undisplayable)))
4218
4219  # Sort the changes list by path.
4220  def changes_sort_by_path(a, b):
4221    return cmp(a.path_parts, b.path_parts)
4222  changes.sort(key=functools.cmp_to_key(changes_sort_by_path))
4223
4224  # Handle limit_changes parameter
4225  cfg_limit_changes = cfg.options.limit_changes
4226  limit_changes = int(query_dict.get('limit_changes', cfg_limit_changes))
4227  more_changes = None
4228  more_changes_href = None
4229  first_changes = None
4230  first_changes_href = None
4231  num_changes = len(changes)
4232  if limit_changes and len(changes) > limit_changes:
4233    more_changes = len(changes) - limit_changes
4234    params = query_dict.copy()
4235    params['limit_changes'] = 0
4236    more_changes_href = request.get_url(params=params, escape=1)
4237    changes = changes[:limit_changes]
4238  elif cfg_limit_changes and len(changes) > cfg_limit_changes:
4239    first_changes = cfg_limit_changes
4240    params = query_dict.copy()
4241    params['limit_changes'] = None
4242    first_changes_href = request.get_url(params=params, escape=1)
4243
4244  # Add the hrefs, types, and prev info
4245  for change in changes:
4246    change.view_href = change.diff_href = change.type = change.log_href = None
4247
4248    # If the path is newly added, don't claim text or property
4249    # modifications.
4250    if (change.action == vclib.ADDED or change.action == vclib.REPLACED) \
4251       and not change.copied:
4252      change.text_changed = 0
4253      change.props_changed = 0
4254
4255    # Calculate the view link URLs (for which we must have a pathtype).
4256    if change.pathtype:
4257      view_func = None
4258      if change.pathtype is vclib.FILE \
4259         and 'markup' in cfg.options.allowed_views:
4260        view_func = view_markup
4261      elif change.pathtype is vclib.DIR:
4262        view_func = view_directory
4263
4264      path = _path_join(change.path_parts)
4265      base_path = _path_join(change.base_path_parts)
4266      if change.action == vclib.DELETED:
4267        link_rev = str(change.base_rev)
4268        link_where = base_path
4269      else:
4270        link_rev = str(rev)
4271        link_where = path
4272
4273      change.view_href = request.get_url(view_func=view_func,
4274                                         where=link_where,
4275                                         pathtype=change.pathtype,
4276                                         params={'pathrev' : link_rev},
4277                                         escape=1)
4278      change.log_href = request.get_url(view_func=view_log,
4279                                        where=link_where,
4280                                        pathtype=change.pathtype,
4281                                        params={'pathrev' : link_rev},
4282                                        escape=1)
4283
4284      if (change.pathtype is vclib.FILE and change.text_changed) \
4285          or change.props_changed:
4286        change.diff_href = request.get_url(view_func=view_diff,
4287                                           where=path,
4288                                           pathtype=change.pathtype,
4289                                           params={'pathrev' : str(rev),
4290                                                   'r1' : str(rev),
4291                                                   'r2' : str(change.base_rev),
4292                                                   },
4293                                           escape=1)
4294
4295
4296    # use same variable names as the log template
4297    change.path = _path_join(change.path_parts)
4298    change.copy_path = _path_join(change.base_path_parts)
4299    change.copy_rev = change.base_rev
4300    change.text_mods = ezt.boolean(change.text_changed)
4301    change.prop_mods = ezt.boolean(change.props_changed)
4302    change.is_copy = ezt.boolean(change.copied)
4303    change.pathtype = (change.pathtype == vclib.FILE and 'file') \
4304                      or (change.pathtype == vclib.DIR and 'dir') \
4305                      or None
4306    del change.path_parts
4307    del change.base_path_parts
4308    del change.base_rev
4309    del change.text_changed
4310    del change.props_changed
4311    del change.copied
4312
4313  prev_rev_href = next_rev_href = None
4314  if rev > 0:
4315    prev_rev_href = request.get_url(view_func=view_revision,
4316                                    where=None,
4317                                    pathtype=None,
4318                                    params={'revision': str(rev - 1)},
4319                                    escape=1)
4320  if rev < request.repos.get_youngest_revision():
4321    next_rev_href = request.get_url(view_func=view_revision,
4322                                    where=None,
4323                                    pathtype=None,
4324                                    params={'revision': str(rev + 1)},
4325                                    escape=1)
4326  jump_rev_action, jump_rev_hidden_values = \
4327    request.get_form(params={'revision': None})
4328
4329  lf = LogFormatter(request, msg)
4330  data = common_template_data(request)
4331  data.merge(TemplateData({
4332    'rev' : str(rev),
4333    'author' : author,
4334    'date' : date_str,
4335    'log' : lf.get(maxlen=0, htmlize=1),
4336    'properties' : props,
4337    'ago' : date is not None and html_time(request, date, 1) or None,
4338    'changes' : changes,
4339    'prev_href' : prev_rev_href,
4340    'next_href' : next_rev_href,
4341    'num_changes' : num_changes,
4342    'limit_changes': limit_changes,
4343    'more_changes': more_changes,
4344    'more_changes_href': more_changes_href,
4345    'first_changes': first_changes,
4346    'first_changes_href': first_changes_href,
4347    'jump_rev_action' : jump_rev_action,
4348    'jump_rev_hidden_values' : jump_rev_hidden_values,
4349    'revision_href' : request.get_url(view_func=view_revision,
4350                                      where=None,
4351                                      pathtype=None,
4352                                      params={'revision': str(rev)},
4353                                      escape=1),
4354  }))
4355  if rev == youngest_rev:
4356    request.server.add_header("Cache-control", "no-store")
4357  generate_page(request, "revision", data)
4358
4359def is_query_supported(request):
4360  """Returns true if querying is supported for the given path."""
4361  return request.cfg.cvsdb.enabled \
4362         and request.pathtype == vclib.DIR \
4363         and request.roottype in ['cvs', 'svn']
4364
4365def is_querydb_nonempty_for_root(request):
4366  """Return 1 iff commits database integration is supported *and* the
4367  current root is found in that database.  Only does this check if
4368  check_database is set to 1."""
4369  if request.cfg.cvsdb.enabled and request.roottype in ['cvs', 'svn']:
4370    if request.cfg.cvsdb.check_database_for_root:
4371      global cvsdb
4372      import cvsdb
4373      db = cvsdb.ConnectDatabaseReadOnly(request.cfg)
4374      repos_root, repos_dir = cvsdb.FindRepository(db, request.rootpath)
4375      if repos_root:
4376        return 1
4377    else:
4378      return 1
4379  return 0
4380
4381def validate_query_args(request):
4382  # Do some additional input validation of query form arguments beyond
4383  # what is offered by the CGI param validation loop in Request.run_viewvc().
4384
4385  for arg_base in ['branch', 'file', 'comment', 'who']:
4386    # First, make sure the the XXX_match args have valid values:
4387    arg_match = arg_base + '_match'
4388    arg_match_value = request.query_dict.get(arg_match, 'exact')
4389    if not arg_match_value in ('exact', 'like', 'glob', 'regex', 'notregex'):
4390      raise ViewVCException(
4391        'An illegal value was provided for the "%s" parameter.'
4392        % (arg_match),
4393        '400 Bad Request')
4394
4395    # Now, for those args which are supposed to be regular expressions (per
4396    # their corresponding XXX_match values), make sure they are.
4397    if arg_match_value == 'regex' or arg_match_value == 'notregex':
4398      arg_base_value = request.query_dict.get(arg_base)
4399      if arg_base_value:
4400        try:
4401          re.compile(arg_base_value)
4402        except:
4403          raise ViewVCException(
4404            'An illegal value was provided for the "%s" parameter.'
4405            % (arg_base),
4406            '400 Bad Request')
4407
4408def view_queryform(request):
4409  if not is_query_supported(request):
4410    raise ViewVCException('Can not query project root "%s" at "%s".'
4411                                 % (request.rootname, request.where),
4412                                 '403 Forbidden')
4413
4414  # Do some more precise input validation.
4415  validate_query_args(request)
4416
4417  query_action, query_hidden_values = \
4418    request.get_form(view_func=view_query, params={'limit_changes': None})
4419  limit_changes = \
4420    int(request.query_dict.get('limit_changes',
4421                               request.cfg.options.limit_changes))
4422
4423  def escaped_query_dict_get(itemname, itemdefault=''):
4424    return request.server.escape(request.query_dict.get(itemname, itemdefault))
4425
4426  data = common_template_data(request)
4427  data.merge(TemplateData({
4428    'branch' : escaped_query_dict_get('branch', ''),
4429    'branch_match' : escaped_query_dict_get('branch_match', 'exact'),
4430    'dir' : escaped_query_dict_get('dir', ''),
4431    'file' : escaped_query_dict_get('file', ''),
4432    'file_match' : escaped_query_dict_get('file_match', 'exact'),
4433    'who' : escaped_query_dict_get('who', ''),
4434    'who_match' : escaped_query_dict_get('who_match', 'exact'),
4435    'comment' : escaped_query_dict_get('comment', ''),
4436    'comment_match' : escaped_query_dict_get('comment_match', 'exact'),
4437    'querysort' : escaped_query_dict_get('querysort', 'date'),
4438    'date' : escaped_query_dict_get('date', 'hours'),
4439    'hours' : escaped_query_dict_get('hours', '2'),
4440    'mindate' : escaped_query_dict_get('mindate', ''),
4441    'maxdate' : escaped_query_dict_get('maxdate', ''),
4442    'query_action' : query_action,
4443    'query_hidden_values' : query_hidden_values,
4444    'limit_changes' : limit_changes,
4445    'dir_href' : request.get_url(view_func=view_directory, params={},
4446                                 escape=1),
4447    }))
4448  generate_page(request, "query_form", data)
4449
4450def parse_date(datestr):
4451  """Parse a date string from the query form."""
4452
4453  match = re.match(r'^(\d\d\d\d)-(\d\d)-(\d\d)(?:\ +'
4454                   '(\d\d):(\d\d)(?::(\d\d))?)?$', datestr)
4455  if match:
4456    year = int(match.group(1))
4457    month = int(match.group(2))
4458    day = int(match.group(3))
4459    hour = match.group(4)
4460    if hour is not None:
4461      hour = int(hour)
4462    else:
4463      hour = 0
4464    minute = match.group(5)
4465    if minute is not None:
4466      minute = int(minute)
4467    else:
4468      minute = 0
4469    second = match.group(6)
4470    if second is not None:
4471      second = int(second)
4472    else:
4473      second = 0
4474    # return a "seconds since epoch" value assuming date given in UTC
4475    tm = (year, month, day, hour, minute, second, 0, 0, 0)
4476    return calendar.timegm(tm)
4477  else:
4478    return None
4479
4480def english_query(request):
4481  """Generate a sentance describing the query."""
4482  cfg = request.cfg
4483  ret = [ 'Checkins ' ]
4484  dir = request.query_dict.get('dir', '')
4485  if dir:
4486    ret.append('to ')
4487    if ',' in dir:
4488      ret.append('subdirectories')
4489    else:
4490      ret.append('subdirectory')
4491    ret.append(' <em>%s</em> ' % request.server.escape(dir))
4492  file = request.query_dict.get('file', '')
4493  if file:
4494    if len(ret) != 1:
4495      ret.append('and ')
4496    ret.append('to file <em>%s</em> ' % request.server.escape(file))
4497  who = request.query_dict.get('who', '')
4498  branch = request.query_dict.get('branch', '')
4499  if branch:
4500    ret.append('on branch <em>%s</em> ' % request.server.escape(branch))
4501  else:
4502    ret.append('on all branches ')
4503  comment = request.query_dict.get('comment', '')
4504  if comment:
4505    ret.append('with comment <i>%s</i> ' % request.server.escape(comment))
4506  if who:
4507    ret.append('by <em>%s</em> ' % request.server.escape(who))
4508  date = request.query_dict.get('date', 'hours')
4509  if date == 'hours':
4510    ret.append('in the last %s hours' \
4511               % request.server.escape(request.query_dict.get('hours', '2')))
4512  elif date == 'day':
4513    ret.append('in the last day')
4514  elif date == 'week':
4515    ret.append('in the last week')
4516  elif date == 'month':
4517    ret.append('in the last month')
4518  elif date == 'all':
4519    ret.append('since the beginning of time')
4520  elif date == 'explicit':
4521    mindate = request.query_dict.get('mindate', '')
4522    maxdate = request.query_dict.get('maxdate', '')
4523    if mindate and maxdate:
4524      w1, w2 = 'between', 'and'
4525    else:
4526      w1, w2 = 'since', 'before'
4527    if mindate:
4528      mindate = make_time_string(parse_date(mindate), cfg)
4529      ret.append('%s <em>%s</em> ' % (w1, mindate))
4530    if maxdate:
4531      maxdate = make_time_string(parse_date(maxdate), cfg)
4532      ret.append('%s <em>%s</em> ' % (w2, maxdate))
4533  return ''.join(ret)
4534
4535def prev_rev(rev):
4536  """Returns a string representing the previous revision of the argument."""
4537  r = rev.split('.')
4538  # decrement final revision component
4539  r[-1] = str(int(r[-1]) - 1)
4540  # prune if we pass the beginning of the branch
4541  if len(r) > 2 and r[-1] == '0':
4542    r = r[:-2]
4543  return '.'.join(r)
4544
4545def build_commit(request, files, max_files, dir_strip, format):
4546  """Return a commit object build from the information in FILES, or
4547  None if no allowed files are present in the set.  DIR_STRIP is the
4548  path prefix to remove from the commit object's set of files.  If
4549  MAX_FILES is non-zero, it is used to limit the number of files
4550  returned in the commit object.  FORMAT is the requested output
4551  format of the query request."""
4552
4553  cfg = request.cfg
4554  author = files[0].GetAuthor()
4555  date = files[0].GetTime()
4556  desc = files[0].GetDescription()
4557  commit_rev = files[0].GetRevision()
4558  len_strip = len(dir_strip)
4559  commit_files = []
4560  num_allowed = 0
4561  plus_count = 0
4562  minus_count = 0
4563  found_unreadable = 0
4564
4565  for f in files:
4566    dirname = f.GetDirectory()
4567    filename = f.GetFile()
4568    if dir_strip:
4569      assert dirname[:len_strip] == dir_strip
4570      assert len(dirname) == len_strip or dirname[len(dir_strip)] == '/'
4571      dirname = dirname[len_strip+1:]
4572    where = dirname and ("%s/%s" % (dirname, filename)) or filename
4573    rev = f.GetRevision()
4574    rev_prev = prev_rev(rev)
4575    commit_time = f.GetTime()
4576    if commit_time:
4577      commit_time = make_time_string(commit_time, cfg)
4578    change_type = f.GetTypeString()
4579
4580    # In CVS, we can actually look at deleted revisions; in Subversion
4581    # we can't -- we'll look at the previous revision instead.
4582    exam_rev = rev
4583    if request.roottype == 'svn' and change_type == 'Remove':
4584      exam_rev = rev_prev
4585
4586    # Check path access (since the commits database logic bypasses the
4587    # vclib layer and, thus, the vcauth stuff that layer uses).
4588    path_parts = _path_parts(where)
4589    if path_parts:
4590      # Skip files in CVSROOT if asked to hide such.
4591      if cfg.options.hide_cvsroot \
4592         and is_cvsroot_path(request.roottype, path_parts):
4593        found_unreadable = 1
4594        continue
4595
4596      # We have to do a rare authz check here because this data comes
4597      # from the CVSdb, not from the vclib providers.
4598      #
4599      # WARNING: The Subversion CVSdb integration logic is weak, weak,
4600      # weak.  It has no ability to track copies, so complex
4601      # situations like a copied directory with a deleted subfile (all
4602      # in the same revision) are very ... difficult.  We've no choice
4603      # but to omit as unauthorized paths the authorization logic
4604      # can't find.
4605      try:
4606        readable = vclib.check_path_access(request.repos, path_parts,
4607                                           None, exam_rev)
4608      except vclib.ItemNotFound:
4609        readable = 0
4610      if not readable:
4611        found_unreadable = 1
4612        continue
4613
4614    if request.roottype == 'svn':
4615      params = { 'pathrev': exam_rev }
4616    else:
4617      params = { 'revision': exam_rev, 'pathrev': f.GetBranch() or None }
4618
4619    dir_href = request.get_url(view_func=view_directory,
4620                               where=dirname, pathtype=vclib.DIR,
4621                               params=params, escape=1)
4622    log_href = request.get_url(view_func=view_log,
4623                               where=where, pathtype=vclib.FILE,
4624                               params=params, escape=1)
4625    diff_href = view_href = download_href = None
4626    if 'markup' in cfg.options.allowed_views:
4627      view_href = request.get_url(view_func=view_markup,
4628                                  where=where, pathtype=vclib.FILE,
4629                                  params=params, escape=1)
4630    if 'co' in cfg.options.allowed_views:
4631      download_href = request.get_url(view_func=view_checkout,
4632                                      where=where, pathtype=vclib.FILE,
4633                                      params=params, escape=1)
4634    if change_type == 'Change':
4635      diff_href_params = params.copy()
4636      diff_href_params.update({
4637        'r1': rev_prev,
4638        'r2': rev,
4639        'diff_format': None
4640        })
4641      diff_href = request.get_url(view_func=view_diff,
4642                                  where=where, pathtype=vclib.FILE,
4643                                  params=diff_href_params, escape=1)
4644    mime_type, encoding = calculate_mime_type(request, path_parts, exam_rev)
4645    prefer_markup = ezt.boolean(default_view(mime_type, cfg) == view_markup)
4646
4647    # Update plus/minus line change count.
4648    plus = int(f.GetPlusCount())
4649    minus = int(f.GetMinusCount())
4650    plus_count = plus_count + plus
4651    minus_count = minus_count + minus
4652
4653    num_allowed = num_allowed + 1
4654    if max_files and num_allowed > max_files:
4655      continue
4656
4657    commit_files.append(_item(date=commit_time,
4658                              dir=request.server.escape(dirname),
4659                              file=request.server.escape(filename),
4660                              author=request.server.escape(f.GetAuthor()),
4661                              rev=rev,
4662                              branch=f.GetBranch(),
4663                              plus=plus,
4664                              minus=minus,
4665                              type=change_type,
4666                              dir_href=dir_href,
4667                              log_href=log_href,
4668                              view_href=view_href,
4669                              download_href=download_href,
4670                              prefer_markup=prefer_markup,
4671                              diff_href=diff_href))
4672
4673  # No files survived authz checks?  Let's just pretend this
4674  # little commit didn't happen, shall we?
4675  if not len(commit_files):
4676    return None
4677
4678  commit = _item(num_files=len(commit_files), files=commit_files,
4679                 plus=plus_count, minus=minus_count)
4680  commit.limited_files = ezt.boolean(num_allowed > len(commit_files))
4681
4682  # We'll mask log messages in commits which contain unreadable paths,
4683  # but even that is kinda iffy.  If a person searches for
4684  # '/some/hidden/path' across log messages, then gets a response set
4685  # that shows commits lacking log message, said person can reasonably
4686  # assume that the log messages contained the hidden path, and that
4687  # this is likely because they are referencing a real path in the
4688  # repository -- a path the user isn't supposed to even know about.
4689  if found_unreadable:
4690    commit.log = None
4691    commit.short_log = None
4692  else:
4693    lf = LogFormatter(request, desc)
4694    htmlize = (format != 'rss')
4695    commit.log = lf.get(maxlen=0, htmlize=htmlize)
4696    commit.short_log = lf.get(maxlen=cfg.options.short_log_len, htmlize=htmlize)
4697  commit.author = request.server.escape(author)
4698  commit.rss_date = make_rss_time_string(date, request.cfg)
4699  if request.roottype == 'svn':
4700    commit.rev = commit_rev
4701    commit.rss_url = '%s://%s%s' % \
4702      (request.server.getenv("HTTPS") == "on" and "https" or "http",
4703       request.server.getenv("HTTP_HOST"),
4704       request.get_url(view_func=view_revision,
4705                       params={'revision': commit.rev},
4706                       escape=1))
4707  else:
4708    commit.rev = None
4709    commit.rss_url = None
4710  return commit
4711
4712def query_backout(request, commits):
4713  server_fp = get_writeready_server_file(request, 'text/plain', is_text=True)
4714  if not commits:
4715    server_fp.write("""\
4716# No changes were selected by the query.
4717# There is nothing to back out.
4718""")
4719    return
4720  server_fp.write("""\
4721# This page can be saved as a shell script and executed.
4722# It should be run at the top of your work area.  It will update
4723# your working copy to back out the changes selected by the
4724# query.
4725""")
4726  for commit in commits:
4727    for fileinfo in commit.files:
4728      if request.roottype == 'cvs':
4729        server_fp.write('cvs update -j %s -j %s %s/%s\n'
4730                        % (fileinfo.rev, prev_rev(fileinfo.rev),
4731                           fileinfo.dir, fileinfo.file))
4732      elif request.roottype == 'svn':
4733        server_fp.write('svn merge -r %s:%s %s/%s\n'
4734                        % (fileinfo.rev, prev_rev(fileinfo.rev),
4735                           fileinfo.dir, fileinfo.file))
4736
4737def view_query(request):
4738  if not is_query_supported(request):
4739    raise ViewVCException('Can not query project root "%s" at "%s".'
4740                          % (request.rootname, request.where),
4741                          '403 Forbidden')
4742
4743  cfg = request.cfg
4744
4745  # Do some more precise input validation.
4746  validate_query_args(request)
4747
4748  # get form data
4749  branch = request.query_dict.get('branch', '')
4750  branch_match = request.query_dict.get('branch_match', 'exact')
4751  dir = request.query_dict.get('dir', '')
4752  file = request.query_dict.get('file', '')
4753  file_match = request.query_dict.get('file_match', 'exact')
4754  who = request.query_dict.get('who', '')
4755  who_match = request.query_dict.get('who_match', 'exact')
4756  comment = request.query_dict.get('comment', '')
4757  comment_match = request.query_dict.get('comment_match', 'exact')
4758  querysort = request.query_dict.get('querysort', 'date')
4759  date = request.query_dict.get('date', 'hours')
4760  hours = request.query_dict.get('hours', '2')
4761  mindate = request.query_dict.get('mindate', '')
4762  maxdate = request.query_dict.get('maxdate', '')
4763  format = request.query_dict.get('format')
4764  limit_changes = int(request.query_dict.get('limit_changes',
4765                                             cfg.options.limit_changes))
4766
4767  match_types = { 'exact':1, 'like':1, 'glob':1, 'regex':1, 'notregex':1 }
4768  sort_types = { 'date':1, 'author':1, 'file':1 }
4769  date_types = { 'hours':1, 'day':1, 'week':1, 'month':1,
4770                 'all':1, 'explicit':1 }
4771
4772  # parse various fields, validating or converting them
4773  if branch_match not in match_types: branch_match = 'exact'
4774  if file_match not in match_types: file_match = 'exact'
4775  if who_match not in match_types: who_match = 'exact'
4776  if comment_match not in match_types: comment_match = 'exact'
4777  if querysort not in sort_types: querysort = 'date'
4778  if date not in date_types: date = 'hours'
4779  mindate = parse_date(mindate)
4780  maxdate = parse_date(maxdate)
4781
4782  global cvsdb
4783  import cvsdb
4784
4785  db = cvsdb.ConnectDatabaseReadOnly(cfg)
4786  repos_root, repos_dir = cvsdb.FindRepository(db, request.rootpath)
4787  if not repos_root:
4788    raise ViewVCException(
4789      "The root '%s' was not found in the commit database "
4790      % request.rootname)
4791
4792  # create the database query from the form data
4793  query = cvsdb.CreateCheckinQuery()
4794  query.SetRepository(repos_root)
4795  # treat "HEAD" specially ...
4796  if branch_match == 'exact' and branch == 'HEAD':
4797    query.SetBranch('')
4798  elif branch:
4799    query.SetBranch(branch, branch_match)
4800  if dir:
4801    for subdir in dir.split(','):
4802      path = (_path_join(repos_dir + request.path_parts
4803                         + _path_parts(subdir.strip())))
4804      query.SetDirectory(path, 'exact')
4805      query.SetDirectory('%s/%%' % cvsdb.EscapeLike(path), 'like')
4806  else:
4807    where = _path_join(repos_dir + request.path_parts)
4808    if where: # if we are in a subdirectory ...
4809      query.SetDirectory(where, 'exact')
4810      query.SetDirectory('%s/%%' % cvsdb.EscapeLike(where), 'like')
4811  if file:
4812    query.SetFile(file, file_match)
4813  if who:
4814    query.SetAuthor(who, who_match)
4815  if comment:
4816    query.SetComment(comment, comment_match)
4817  query.SetSortMethod(querysort)
4818  if date == 'hours':
4819    query.SetFromDateHoursAgo(int(hours))
4820  elif date == 'day':
4821    query.SetFromDateDaysAgo(1)
4822  elif date == 'week':
4823    query.SetFromDateDaysAgo(7)
4824  elif date == 'month':
4825    query.SetFromDateDaysAgo(31)
4826  elif date == 'all':
4827    pass
4828  elif date == 'explicit':
4829    if mindate is not None:
4830      query.SetFromDateObject(mindate)
4831    if maxdate is not None:
4832      query.SetToDateObject(maxdate)
4833
4834  # Set the admin-defined (via configuration) row limits.  This is to avoid
4835  # slamming the database server with a monster query.
4836  if format == 'rss':
4837    query.SetLimit(cfg.cvsdb.rss_row_limit)
4838  else:
4839    query.SetLimit(cfg.cvsdb.row_limit)
4840
4841  # run the query
4842  db.RunQuery(query)
4843  commit_list = query.GetCommitList()
4844  row_limit_reached = query.GetLimitReached()
4845
4846  # gather commits
4847  commits = []
4848  plus_count = 0
4849  minus_count = 0
4850  mod_time = -1
4851  if commit_list:
4852    files = []
4853    limited_files = 0
4854    current_desc = commit_list[0].GetDescriptionID()
4855    current_rev = commit_list[0].GetRevision()
4856    dir_strip = _path_join(repos_dir)
4857
4858    for commit in commit_list:
4859      commit_desc = commit.GetDescriptionID()
4860      commit_rev = commit.GetRevision()
4861
4862      # base modification time on the newest commit
4863      if commit.GetTime() > mod_time:
4864        mod_time = commit.GetTime()
4865
4866      # For CVS, group commits with the same commit message.
4867      # For Subversion, group them only if they have the same revision number
4868      if request.roottype == 'cvs':
4869        if current_desc == commit_desc:
4870          files.append(commit)
4871          continue
4872      else:
4873        if current_rev == commit_rev:
4874          files.append(commit)
4875          continue
4876
4877      # append this grouping
4878      commit_item = build_commit(request, files, limit_changes,
4879                                 dir_strip, format)
4880      if commit_item:
4881        # update running plus/minus totals
4882        plus_count = plus_count + commit_item.plus
4883        minus_count = minus_count + commit_item.minus
4884        commits.append(commit_item)
4885
4886      files = [ commit ]
4887      limited_files = 0
4888      current_desc = commit_desc
4889      current_rev = commit_rev
4890
4891    # we need to tack on our last commit grouping, if any
4892    commit_item = build_commit(request, files, limit_changes,
4893                               dir_strip, format)
4894    if commit_item:
4895      # update running plus/minus totals
4896      plus_count = plus_count + commit_item.plus
4897      minus_count = minus_count + commit_item.minus
4898      commits.append(commit_item)
4899
4900  # only show the branch column if we are querying all branches
4901  # or doing a non-exact branch match on a CVS repository.
4902  show_branch = ezt.boolean(request.roottype == 'cvs' and
4903                            (branch == '' or branch_match != 'exact'))
4904
4905  # backout link
4906  params = request.query_dict.copy()
4907  params['format'] = 'backout'
4908  backout_href = request.get_url(params=params,
4909                                 escape=1)
4910
4911  # link to zero limit_changes value
4912  params = request.query_dict.copy()
4913  params['limit_changes'] = 0
4914  limit_changes_href = request.get_url(params=params, escape=1)
4915
4916  # if we got any results, use the newest commit as the modification time
4917  if mod_time >= 0:
4918    if check_freshness(request, mod_time):
4919      return
4920
4921  if format == 'backout':
4922    query_backout(request, commits)
4923    return
4924
4925  data = common_template_data(request)
4926  data.merge(TemplateData({
4927    'sql': request.server.escape(db.CreateSQLQueryString(query)),
4928    'english_query': english_query(request),
4929    'queryform_href': request.get_url(view_func=view_queryform, escape=1),
4930    'backout_href': backout_href,
4931    'plus_count': plus_count,
4932    'minus_count': minus_count,
4933    'show_branch': show_branch,
4934    'querysort': querysort,
4935    'commits': commits,
4936    'row_limit_reached' : ezt.boolean(row_limit_reached),
4937    'limit_changes': limit_changes,
4938    'limit_changes_href': limit_changes_href,
4939    'rss_link_href': request.get_url(view_func=view_query,
4940                                     params={'date': 'month'},
4941                                     escape=1,
4942                                     prefix=1),
4943    }))
4944  if format == 'rss':
4945    generate_page(request, "rss", data, "application/rss+xml")
4946  else:
4947    generate_page(request, "query_results", data)
4948
4949_views = {
4950  'annotate':  view_annotate,
4951  'co':        view_checkout,
4952  'diff':      view_diff,
4953  'dir':       view_directory,
4954  'graph':     view_cvsgraph,
4955  'graphimg':  view_cvsgraph_image,
4956  'log':       view_log,
4957  'markup':    view_markup,
4958  'patch':     view_patch,
4959  'query':     view_query,
4960  'queryform': view_queryform,
4961  'revision':  view_revision,
4962  'roots':     view_roots,
4963  'tar':       download_tarball,
4964  'redirect_pathrev': redirect_pathrev,
4965}
4966
4967_view_codes = {}
4968for code, view in _views.items():
4969  _view_codes[view] = code
4970
4971def list_roots(request):
4972  cfg = request.cfg
4973  allroots = { }
4974
4975  # Add the viewable Subversion roots
4976  for root in cfg.general.svn_roots.keys():
4977    auth = setup_authorizer(cfg, request.username, root)
4978    try:
4979      repos = vclib.svn.SubversionRepository(root, cfg.general.svn_roots[root],
4980                                             auth, cfg.utilities,
4981                                             cfg.options.svn_config_dir,
4982                                             cfg.options.default_encoding)
4983      lastmod = None
4984      if cfg.options.show_roots_lastmod:
4985        try:
4986          repos.open()
4987          youngest_rev = repos.youngest
4988          date, author, msg, revprops, changes = repos.revinfo(youngest_rev)
4989          date_str = make_time_string(date, cfg)
4990          ago = html_time(request, date)
4991          lf = LogFormatter(request, msg)
4992          log = lf.get(maxlen=0, htmlize=1)
4993          short_log = lf.get(maxlen=cfg.options.short_log_len, htmlize=1)
4994          lastmod = _item(ago=ago, author=author, date=date_str, log=log,
4995                          short_log=short_log, rev=str(youngest_rev))
4996        except:
4997          lastmod = None
4998    except vclib.ReposNotFound:
4999      continue
5000    allroots[root] = [cfg.general.svn_roots[root], 'svn', lastmod]
5001
5002  # Add the viewable CVS roots
5003  for root in cfg.general.cvs_roots.keys():
5004    auth = setup_authorizer(cfg, request.username, root)
5005    try:
5006      vclib.ccvs.CVSRepository(root, cfg.general.cvs_roots[root], auth,
5007                               cfg.utilities, cfg.options.use_rcsparse,
5008                               cfg.options.default_encoding)
5009    except vclib.ReposNotFound:
5010      continue
5011    allroots[root] = [cfg.general.cvs_roots[root], 'cvs', None]
5012
5013  return allroots
5014
5015def _parse_root_parent(pp):
5016  """Parse a single root parent "directory [= context] : repo_type" string
5017  and return as tuple."""
5018
5019  pos = pp.rfind(':')
5020  if pos > 0:
5021    repo_type = pp[pos+1:].strip()
5022    pp = pp[:pos].strip()
5023  else:
5024    repo_type = None
5025
5026  pos = pp.rfind('=')
5027  if pos > 0:
5028    context = _path_parts(pp[pos+1:].strip())
5029    pp = pp[:pos].strip()
5030  else:
5031    context = None
5032
5033  path = os.path.normpath(pp)
5034  return path,context,repo_type
5035
5036def expand_root_parents(cfg):
5037  """Expand the configured root parents into individual roots."""
5038
5039  # Each item in root_parents is a "directory [= context ] : repo_type" string.
5040  for pp in cfg.general.root_parents:
5041    path,context,repo_type = _parse_root_parent(pp)
5042
5043    if repo_type == 'cvs':
5044      roots = vclib.ccvs.expand_root_parent(path)
5045      if cfg.options.hide_cvsroot and 'CVSROOT' in roots:
5046        del roots['CVSROOT']
5047      if context:
5048        fullroots = {}
5049        for root, rootpath in roots.items():
5050          fullroots[_path_join(context + [root])] = rootpath
5051        cfg.general.cvs_roots.update(fullroots)
5052      else:
5053        cfg.general.cvs_roots.update(roots)
5054    elif repo_type == 'svn':
5055      roots = vclib.svn.expand_root_parent(path)
5056      if context:
5057        fullroots = {}
5058        for root, rootpath in roots.items():
5059          fullroots[_path_join(context + [root])] = rootpath
5060        cfg.general.svn_roots.update(fullroots)
5061      else:
5062        cfg.general.svn_roots.update(roots)
5063    elif repo_type == None:
5064      raise ViewVCException(
5065        'The path "%s" in "root_parents" does not include a '
5066        'repository type.  Expected "cvs" or "svn".' % (pp))
5067    else:
5068      raise ViewVCException(
5069        'The path "%s" in "root_parents" has an unrecognized '
5070        'repository type ("%s").  Expected "cvs" or "svn".'
5071        % (pp, repo_type))
5072
5073def find_root_in_parents(cfg, path_parts, roottype):
5074  """Return the rootpath for configured ROOTNAME of ROOTTYPE."""
5075
5076  # Easy out:  caller wants rootname "CVSROOT", and we're hiding those.
5077  if path_parts[-1] == 'CVSROOT' and cfg.options.hide_cvsroot:
5078    return None
5079
5080  for pp in cfg.general.root_parents:
5081    path,context,repo_type = _parse_root_parent(pp)
5082
5083    if repo_type != roottype:
5084      continue
5085    if context != None:
5086      if not _path_starts_with(path_parts, context):
5087        continue
5088      rootidx = len(context)
5089    else:
5090      rootidx = 0
5091
5092    if len(path_parts) <= rootidx:
5093      continue
5094
5095    rootname = path_parts[rootidx]
5096    fullroot = _path_join(path_parts[0:rootidx+1])
5097    remain = path_parts[rootidx+1:]
5098
5099    rootpath = None
5100    if roottype == 'cvs':
5101      rootpath = vclib.ccvs.find_root_in_parent(path, rootname)
5102    elif roottype == 'svn':
5103      rootpath = vclib.svn.find_root_in_parent(path, rootname)
5104
5105    if rootpath is not None:
5106      return fullroot, rootpath, remain
5107  return None, None, None
5108
5109def locate_root_from_path(cfg, path_parts):
5110  """Return a 4-tuple ROOTTYPE, ROOTPATH, ROOTNAME, REMAIN for path_parts."""
5111  for rootname, rootpath in cfg.general.cvs_roots.items():
5112    pp = _path_parts(rootname)
5113    if _path_starts_with(path_parts, pp):
5114      return 'cvs', rootpath, rootname, path_parts[len(pp):]
5115  for rootname, rootpath in cfg.general.svn_roots.items():
5116    pp = _path_parts(rootname)
5117    if _path_starts_with(path_parts, pp):
5118      return 'svn', rootpath, rootname, path_parts[len(pp):]
5119  rootname, path_in_parent, remain = \
5120          find_root_in_parents(cfg, path_parts, 'cvs')
5121  if path_in_parent:
5122    cfg.general.cvs_roots[rootname] = path_in_parent
5123    return 'cvs', path_in_parent, rootname, remain
5124  rootname, path_in_parent, remain = \
5125          find_root_in_parents(cfg, path_parts, 'svn')
5126  if path_in_parent:
5127    cfg.general.svn_roots[rootname] = path_in_parent
5128    return 'svn', path_in_parent, rootname, remain
5129  return None, None, None, None
5130
5131def locate_root(cfg, rootname):
5132  """Return a 2-tuple ROOTTYPE, ROOTPATH for configured ROOTNAME."""
5133  # First try a direct match
5134  if rootname in cfg.general.cvs_roots:
5135    return 'cvs', cfg.general.cvs_roots[rootname]
5136  if rootname in cfg.general.svn_roots:
5137    return 'svn', cfg.general.svn_roots[rootname]
5138
5139  path_parts = _path_parts(rootname)
5140  roottype, rootpath, rootname_dupl, remain = \
5141          locate_root_from_path(cfg, path_parts)
5142  if roottype != None:
5143    if rootname_dupl != rootname:
5144      raise ViewVCException(
5145        'Found root name "%s" doesn\'t match "%s"' \
5146        % (rootname_dupl, rootname),
5147        '500 Internal Server Error')
5148    if len(remain) > 0:
5149      raise ViewVCException(
5150        'Have remaining path "%s"' \
5151        % (remain),
5152        '500 Internal Server Error')
5153  return roottype, rootpath
5154
5155def load_config(pathname=None, server=None):
5156  """Load the ViewVC configuration file.  SERVER is the server object
5157  that will be using this configuration.  Consult the environment for
5158  the variable VIEWVC_CONF_PATHNAME and VIEWCVS_CONF_PATHNAME (its
5159  legacy name) and, if set, use its value as the path of the
5160  configuration file; otherwise, use PATHNAME (if provided).  Failing
5161  all else, use a hardcoded default configuration path."""
5162
5163  # See if the environment contains overrides to the configuration
5164  # path.  If we have a SERVER object, consult its environment; use
5165  # the OS environment otherwise.
5166  env_get = server and server.getenv or os.environ.get
5167  env_pathname = (env_get("VIEWVC_CONF_PATHNAME")
5168                  or env_get("VIEWCVS_CONF_PATHNAME"))
5169
5170  # Try to find the configuration pathname by searching these ordered
5171  # locations: the environment, the passed-in PATHNAME, the hard-coded
5172  # default.
5173  pathname = (env_pathname
5174              or pathname
5175              or os.path.join(os.path.dirname(os.path.dirname(__file__)),
5176                              "viewvc.conf"))
5177
5178  # Load the configuration!
5179  cfg = config.Config()
5180  cfg.set_defaults()
5181  cfg.load_config(pathname, env_get("HTTP_HOST"))
5182
5183  # Apply the stacktrace configuration immediately.
5184  sys.tracebacklimit = cfg.options.stacktraces and 1000 or 0
5185
5186  # Load mime types file(s), but reverse the order -- our
5187  # configuration uses a most-to-least preferred approach, but the
5188  # 'mimetypes' package wants things the other way around.
5189  if cfg.general.mime_types_files:
5190    files = cfg.general.mime_types_files[:]
5191    files.reverse()
5192    files = list(map(lambda x, y=pathname: os.path.join(os.path.dirname(y), x), files))
5193    mimetypes.init(files)
5194
5195  return cfg
5196
5197
5198def view_error(server, cfg):
5199  exc_dict = get_exception_data()
5200  status = exc_dict['status']
5201  if exc_dict['msg']:
5202    exc_dict['msg'] = server.escape(exc_dict['msg'])
5203  if exc_dict['stacktrace']:
5204    exc_dict['stacktrace'] = server.escape(exc_dict['stacktrace'])
5205
5206  # Use the configured error template if possible.
5207  try:
5208    if cfg and not server.response_started():
5209      server.start_response(status=status)
5210      template = get_view_template(cfg, "error")
5211      template.generate(server.file(), exc_dict)
5212      return
5213  except:
5214    pass
5215
5216  # Fallback to the old exception printer if no configuration is
5217  # available, or if something went wrong.
5218  print_exception_data(server, exc_dict)
5219
5220def main(server, cfg):
5221  try:
5222    # build a Request object, which contains info about the HTTP request
5223    request = Request(server, cfg)
5224    request.run_viewvc()
5225  except SystemExit as e:
5226    return
5227  except:
5228    view_error(server, cfg)
5229