1"""A client for ClearCase."""
2
3from __future__ import unicode_literals
4
5import datetime
6import itertools
7import logging
8import os
9import sys
10import threading
11from collections import deque
12
13import six
14from pkg_resources import parse_version
15
16from rbtools.api.errors import APIError
17from rbtools.clients import SCMClient, RepositoryInfo
18from rbtools.clients.errors import InvalidRevisionSpecError, SCMError
19from rbtools.utils.checks import check_gnu_diff, check_install
20from rbtools.utils.filesystem import make_tempfile
21from rbtools.utils.process import execute
22
23# This specific import is necessary to handle the paths when running on cygwin.
24if sys.platform.startswith(('cygwin', 'win')):
25    import ntpath as cpath
26else:
27    import os.path as cpath
28
29
30class _get_elements_from_label_thread(threading.Thread):
31    def __init__(self, threadID, dir_name, label, elements):
32        self.threadID = threadID
33        self.dir_name = dir_name
34        self.elements = elements
35
36        # Remove any trailing vobstag not supported by cleartool find.
37        try:
38            label, vobstag = label.rsplit('@', 1)
39        except Exception:
40            pass
41        self.label = label
42
43        if sys.platform.startswith('win'):
44            self.cc_xpn = '%CLEARCASE_XPN%'
45        else:
46            self.cc_xpn = '$CLEARCASE_XPN'
47
48        threading.Thread.__init__(self)
49
50    def run(self):
51        """Run the thread.
52
53        This will store a dictionary of ClearCase elements (oid + version)
54        belonging to a label and identified by path.
55        """
56        output = execute(
57            ['cleartool', 'find', self.dir_name, '-version',
58             'lbtype(%s)' % self.label, '-exec',
59             r'cleartool describe -fmt "%On\t%En\t%Vn\n" ' + self.cc_xpn],
60            extra_ignore_errors=(1,), with_errors=False)
61
62        for line in output.split('\n'):
63            # Does not process empty lines.
64            if not line:
65                continue
66
67            oid, path, version = line.split('\t', 2)
68            self.elements[path] = {
69                'oid': oid,
70                'version': version,
71            }
72
73
74class ClearCaseClient(SCMClient):
75    """A client for ClearCase.
76
77    This is a wrapper around the clearcase tool that fetches repository
78    information and generates compatible diffs. This client assumes that cygwin
79    is installed on Windows.
80    """
81
82    name = 'ClearCase'
83    viewtype = None
84    supports_patch_revert = True
85
86    REVISION_ACTIVITY_BASE = '--rbtools-activity-base'
87    REVISION_ACTIVITY_PREFIX = 'activity:'
88    REVISION_BRANCH_BASE = '--rbtools-branch-base'
89    REVISION_BRANCH_PREFIX = 'brtype:'
90    REVISION_CHECKEDOUT_BASE = '--rbtools-checkedout-base'
91    REVISION_CHECKEDOUT_CHANGESET = '--rbtools-checkedout-changeset'
92    REVISION_FILES = '--rbtools-files'
93    REVISION_LABEL_BASE = '--rbtools-label-base'
94    REVISION_LABEL_PREFIX = 'lbtype:'
95
96    def get_repository_info(self):
97        """Return information on the ClearCase repository.
98
99        This will first check if the cleartool command is installed and in the
100        path, and that the current working directory is inside of the view.
101
102        Returns:
103            ClearCaseRepositoryInfo:
104            The repository info structure.
105        """
106        if not check_install(['cleartool', 'help']):
107            logging.debug('Unable to execute "cleartool help": skipping '
108                          'ClearCase')
109            return None
110
111        viewname = execute(['cleartool', 'pwv', '-short']).strip()
112        if viewname.startswith('** NONE'):
113            return None
114
115        # Now that we know it's ClearCase, make sure we have GNU diff
116        # installed, and error out if we don't.
117        check_gnu_diff()
118
119        property_lines = execute(
120            ['cleartool', 'lsview', '-full', '-properties', '-cview'],
121            split_lines=True)
122        for line in property_lines:
123            properties = line.split(' ')
124            if properties[0] == 'Properties:':
125                # Determine the view type and check if it's supported.
126                #
127                # Specifically check if webview was listed in properties
128                # because webview types also list the 'snapshot'
129                # entry in properties.
130                if 'webview' in properties:
131                    raise SCMError('Webviews are not supported. You can use '
132                                   'rbt commands only in dynamic or snapshot '
133                                   'views.')
134                if 'dynamic' in properties:
135                    self.viewtype = 'dynamic'
136                else:
137                    self.viewtype = 'snapshot'
138
139                break
140
141        # Find current VOB's tag
142        vobstag = execute(['cleartool', 'describe', '-short', 'vob:.'],
143                          ignore_errors=True).strip()
144        if 'Error: ' in vobstag:
145            raise SCMError('Failed to generate diff run rbt inside vob.')
146
147        root_path = execute(['cleartool', 'pwv', '-root'],
148                            ignore_errors=True).strip()
149        if 'Error: ' in root_path:
150            raise SCMError('Failed to generate diff run rbt inside view.')
151
152        # From current working directory cut path to VOB. On Windows
153        # and under cygwin, the VOB tag contains the VOB's path including
154        # name, e.g. `\new_proj` for a VOB `new_proj` mounted at the root
155        # of a drive. On Unix, the VOB tag is similar, but with a different
156        # path separator, e.g. `/vobs/new_proj` for our new_proj VOB mounted
157        # at `/vobs`.
158        cwd = os.getcwd()
159        base_path = cwd[:len(root_path) + len(vobstag)]
160
161        return ClearCaseRepositoryInfo(path=base_path,
162                                       base_path=base_path,
163                                       vobstag=vobstag)
164
165    def _determine_branch_path(self, version_path):
166        """Determine the branch path of a version path.
167
168        Args:
169            version_path (unicode):
170                A version path consisting of a branch path and a version
171                number.
172
173        Returns:
174            unicode:
175            The branch path.
176        """
177        branch_path, number = cpath.split(version_path)
178        return branch_path
179
180    def _list_checkedout(self, path):
181        """List all checked out elements in current view below path.
182
183        Run the :command:`cleartool` command twice because ``recurse`` finds
184        checked out elements under path except path, and the directory is
185        detected only if the path directory is checked out.
186
187        Args:
188            path (unicode):
189                The path of the directory to find checked-out files in.
190
191        Returns:
192            list of unicode:
193            A list of the checked out files.
194        """
195        checkedout_elements = []
196
197        for option in ['-recurse', '-directory']:
198            # We ignore return code 1 in order to omit files that ClearCase
199            # cannot read.
200            output = execute(['cleartool', 'lscheckout', option, '-cview',
201                              '-fmt', r'%En@@%Vn\n', path],
202                             split_lines=True,
203                             extra_ignore_errors=(1,),
204                             with_errors=False)
205
206            if output:
207                checkedout_elements.extend(output)
208                logging.debug(output)
209
210        return checkedout_elements
211
212    def _is_a_label(self, label, vobstag=None):
213        """Return whether a given label is a valid ClearCase lbtype.
214
215        Args:
216            label (unicode):
217                The label to check.
218
219            vobstag (unicode, optional):
220                An optional vobstag to limit the label to.
221
222        Raises:
223            Exception:
224                The vobstag did not match.
225
226        Returns:
227            bool:
228            Whether the label was valid.
229        """
230        label_vobstag = None
231        # Try to find any vobstag.
232        try:
233            label, label_vobstag = label.rsplit('@', 1)
234        except Exception:
235            pass
236
237        # Be sure label is prefix by lbtype, required by cleartool describe.
238        if not label.startswith(self.REVISION_LABEL_PREFIX):
239            label = '%s%s' % (self.REVISION_LABEL_PREFIX, label)
240
241        # If vobstag defined, check if it matches with the one extracted from
242        # label, otherwise raise an exception.
243        if vobstag and label_vobstag and label_vobstag != vobstag:
244            raise Exception('label vobstag %s does not match expected vobstag '
245                            '%s' % (label_vobstag, vobstag))
246
247        # Finally check if label exists in database, otherwise quit. Ignore
248        # return code 1, it means label does not exist.
249        output = execute(['cleartool', 'describe', '-short', label],
250                         extra_ignore_errors=(1,),
251                         with_errors=False)
252        return bool(output)
253
254    def _get_tmp_label(self):
255        """Return a string that will be used to set a ClearCase label.
256
257        Returns:
258            unicode:
259            A string suitable for using as a temporary label.
260        """
261        now = datetime.datetime.now()
262        temporary_label = 'Current_%d_%d_%d_%d_%d_%d_%d' % (
263            now.year, now.month, now.day, now.hour, now.minute, now.second,
264            now.microsecond)
265        return temporary_label
266
267    def _set_label(self, label, path):
268        """Set a ClearCase label on elements seen under path.
269
270        Args:
271            label (unicode):
272                The label to set.
273
274            path (unicode):
275                The filesystem path to set the label on.
276        """
277        checkedout_elements = self._list_checkedout(path)
278        if checkedout_elements:
279            raise Exception(
280                'ClearCase backend cannot set label when some elements are '
281                'checked out:\n%s' % ''.join(checkedout_elements))
282
283        # First create label in vob database.
284        execute(['cleartool', 'mklbtype', '-c', 'label created for rbtools',
285                 label],
286                with_errors=True)
287
288        # We ignore return code 1 in order to omit files that ClearCase cannot
289        # read.
290        recursive_option = ''
291        if cpath.isdir(path):
292            recursive_option = '-recurse'
293
294        # Apply label to path.
295        execute(['cleartool', 'mklabel', '-nc', recursive_option, label, path],
296                extra_ignore_errors=(1,),
297                with_errors=False)
298
299    def _remove_label(self, label):
300        """Remove a ClearCase label from vob database.
301
302        It will remove all references of this label on elements.
303
304        Args:
305            label (unicode):
306                The ClearCase label to remove.
307        """
308        # Be sure label is prefix by lbtype.
309        if not label.startswith(self.REVISION_LABEL_PREFIX):
310            label = '%s%s' % (self.REVISION_LABEL_PREFIX, label)
311
312        # Label exists so remove it.
313        execute(['cleartool', 'rmtype', '-rmall', '-force', label],
314                with_errors=True)
315
316    def _determine_version(self, version_path):
317        """Determine the numeric version of a version path.
318
319        This will split a version path, pulling out the branch and version. A
320        special version value of ``CHECKEDOUT`` represents the latest version
321        of a file, similar to ``HEAD`` in many other types of repositories.
322
323        Args:
324            version_path (unicode):
325                A version path consisting of a branch path and a version
326                number.
327
328        Returns:
329            int:
330            The numeric portion of the version path.
331        """
332        branch, number = cpath.split(version_path)
333
334        if number == 'CHECKEDOUT':
335            return sys.maxint
336
337        return int(number)
338
339    def _construct_extended_path(self, path, version):
340        """Construct an extended path from a file path and version identifier.
341
342        This will construct a path in the form of ``path@version``. If the
343        version is the special value ``CHECKEDOUT``, only the path will be
344        returned.
345
346        Args:
347            path (unicode):
348                A file path.
349
350            version (unicode):
351                The version of the file.
352
353        Returns:
354            unicode:
355            The combined extended path.
356        """
357        if not version or version.endswith('CHECKEDOUT'):
358            return path
359
360        return '%s@@%s' % (path, version)
361
362    def _construct_revision(self, branch_path, version_number):
363        """Construct a revisioned path from a branch path and version ID.
364
365        Args:
366            branch_path (unicode):
367                The path of a branch.
368
369            version_number (unicode):
370                The version number of the revision.
371
372        Returns:
373            unicode:
374            The combined revision.
375        """
376        return cpath.join(branch_path, version_number)
377
378    def parse_revision_spec(self, revisions):
379        """Parse the given revision spec.
380
381        Args:
382            revisions (list of unicode, optional):
383                A list of revisions as specified by the user. Items in the list
384                do not necessarily represent a single revision, since the user
385                can use SCM-native syntaxes such as ``r1..r2`` or ``r1:r2``.
386                SCMTool-specific overrides of this method are expected to deal
387                with such syntaxes.
388
389        Raises:
390            rbtools.clients.errors.InvalidRevisionSpecError:
391                The given revisions could not be parsed.
392
393            rbtools.clients.errors.TooManyRevisionsError:
394                The specified revisions list contained too many revisions.
395
396        Returns:
397            dict:
398            A dictionary with the following keys:
399
400            ``base`` (:py:class:`unicode`):
401                A revision to use as the base of the resulting diff.
402
403            ``tip`` (:py:class:`unicode`):
404                A revision to use as the tip of the resulting diff.
405
406            These will be used to generate the diffs to upload to Review Board
407            (or print).
408
409            There are many different ways to generate diffs for clearcase,
410            because there are so many different workflows. This method serves
411            more as a way to validate the passed-in arguments than actually
412            parsing them in the way that other clients do.
413        """
414        n_revs = len(revisions)
415
416        if n_revs == 0:
417            return {
418                'base': self.REVISION_CHECKEDOUT_BASE,
419                'tip': self.REVISION_CHECKEDOUT_CHANGESET,
420            }
421        elif n_revs == 1:
422            if revisions[0].startswith(self.REVISION_ACTIVITY_PREFIX):
423                return {
424                    'base': self.REVISION_ACTIVITY_BASE,
425                    'tip': revisions[0][len(self.REVISION_ACTIVITY_PREFIX):],
426                }
427            if revisions[0].startswith(self.REVISION_BRANCH_PREFIX):
428                return {
429                    'base': self.REVISION_BRANCH_BASE,
430                    'tip': revisions[0][len(self.REVISION_BRANCH_PREFIX):],
431                }
432            if revisions[0].startswith(self.REVISION_LABEL_PREFIX):
433                return {
434                    'base': self.REVISION_LABEL_BASE,
435                    'tip': [revisions[0][len(self.REVISION_BRANCH_PREFIX):]],
436                }
437            # TODO:
438            # stream:streamname[@pvob] => review changes in this UCM stream
439            #                             (UCM "branch")
440            # baseline:baseline[@pvob] => review changes between this baseline
441            #                             and the working directory
442        elif n_revs == 2:
443            if self.viewtype != 'dynamic':
444                raise SCMError('To generate a diff using multiple revisions, '
445                               'you must use a dynamic view.')
446
447            if (revisions[0].startswith(self.REVISION_LABEL_PREFIX) and
448                revisions[1].startswith(self.REVISION_LABEL_PREFIX)):
449                return {
450                    'base': self.REVISION_LABEL_BASE,
451                    'tip': [x[len(self.REVISION_BRANCH_PREFIX):]
452                            for x in revisions],
453                }
454            # TODO:
455            # baseline:baseline1[@pvob] baseline:baseline2[@pvob]
456            #                             => review changes between these two
457            #                                baselines
458            pass
459
460        pairs = []
461        for r in revisions:
462            p = r.split(':')
463            if len(p) != 2:
464                raise InvalidRevisionSpecError(
465                    '"%s" is not a valid file@revision pair' % r)
466            pairs.append(p)
467
468        return {
469            'base': self.REVISION_FILES,
470            'tip': pairs,
471        }
472
473    def _sanitize_activity_changeset(self, changeset):
474        """Return changeset containing non-binary, branched file versions.
475
476        A UCM activity changeset contains all file revisions created/touched
477        during this activity. File revisions are ordered earlier versions first
478        in the format:
479        changelist = [
480        <path>@@<branch_path>/<version_number>, ...,
481        <path>@@<branch_path>/<version_number>
482        ]
483
484        <path> is relative path to file
485        <branch_path> is clearcase specific branch path to file revision
486        <version number> is the version number of the file in <branch_path>.
487
488        A UCM activity changeset can contain changes from different vobs,
489        however reviewboard supports only changes from a single repo at the
490        same time, so changes made outside of the current vobstag will be
491        ignored.
492
493        Args:
494            changeset (unicode):
495                The changeset to fetch.
496
497        Returns:
498            list:
499            The list of file versions.
500        """
501        changelist = {}
502        # Maybe we should be able to access repository_info without calling
503        # cleartool again.
504        repository_info = self.get_repository_info()
505
506        for change in changeset:
507            path, current = change.split('@@')
508
509            # If a file isn't in the correct vob, then ignore it.
510            if path.find('%s/' % (repository_info.vobstag,)) == -1:
511                logging.debug('Vobstag does not match, ignoring changes on %s',
512                              path)
513                continue
514
515            version_number = self._determine_version(current)
516            if path not in changelist:
517                changelist[path] = {
518                    'highest': version_number,
519                    'lowest': version_number,
520                    'current': current,
521                }
522
523            if version_number == 0:
524                raise SCMError('Unexepected version_number=0 in activity '
525                               'changeset')
526            elif version_number > changelist[path]['highest']:
527                changelist[path]['highest'] = version_number
528                changelist[path]['current'] = current
529            elif version_number < changelist[path]['lowest']:
530                changelist[path]['lowest'] = version_number
531
532        # Convert to list
533        changeranges = []
534        for path, version in six.iteritems(changelist):
535            # Previous version is predecessor of lowest ie its version number
536            # decreased by 1.
537            branch_path = self._determine_branch_path(version['current'])
538            prev_version_number = str(int(version['lowest']) - 1)
539            version['previous'] = self._construct_revision(branch_path,
540                                                           prev_version_number)
541            changeranges.append(
542                (self._construct_extended_path(path, version['previous']),
543                 self._construct_extended_path(path, version['current']))
544            )
545
546        return changeranges
547
548    def _sanitize_branch_changeset(self, changeset):
549        """Return changeset containing non-binary, branched file versions.
550
551        Changeset contain only first and last version of file made on branch.
552
553        Args:
554            changeset (unicode):
555                The changeset to fetch.
556
557        Returns:
558            list:
559            The list of file versions.
560        """
561        changelist = {}
562
563        for path, previous, current in changeset:
564            version_number = self._determine_version(current)
565
566            if path not in changelist:
567                changelist[path] = {
568                    'highest': version_number,
569                    'current': current,
570                    'previous': previous
571                }
572
573            if version_number == 0:
574                # Previous version of 0 version on branch is base
575                changelist[path]['previous'] = previous
576            elif version_number > changelist[path]['highest']:
577                changelist[path]['highest'] = version_number
578                changelist[path]['current'] = current
579
580        # Convert to list
581        changeranges = []
582        for path, version in six.iteritems(changelist):
583            changeranges.append(
584                (self._construct_extended_path(path, version['previous']),
585                 self._construct_extended_path(path, version['current']))
586            )
587
588        return changeranges
589
590    def _sanitize_checkedout_changeset(self, changeset):
591        """Return extended paths for all modifications in a changeset.
592
593        Args:
594            changeset (unicode):
595                The changeset to fetch.
596
597        Returns:
598            list:
599            The list of file versions.
600        """
601        changeranges = []
602
603        for path, previous, current in changeset:
604            changeranges.append(
605                (self._construct_extended_path(path, previous),
606                 self._construct_extended_path(path, current))
607            )
608
609        return changeranges
610
611    def _sanitize_version_0_file(self, file_revision):
612        """Sanitize a version 0 file.
613
614        This fixes up a revision identifier to use the correct predecessor
615        revision when the version is 0. ``/main/0`` is a special case which is
616        left as-is.
617
618        Args:
619            file_revision (unicode):
620                The file revision to sanitize.
621
622        Returns:
623            unicode:
624            Thee sanitized revision.
625        """
626        # There is no predecessor for @@/main/0, so keep current revision.
627        if file_revision.endswith('@@/main/0'):
628            return file_revision
629
630        if file_revision.endswith('/0'):
631            logging.debug('Found file %s with version 0', file_revision)
632            file_revision = execute(['cleartool',
633                                     'describe',
634                                     '-fmt', '%En@@%PSn',
635                                     file_revision])
636            logging.debug('Sanitized with predecessor, new file: %s',
637                          file_revision)
638
639        return file_revision
640
641    def _sanitize_version_0_changeset(self, changeset):
642        """Return changeset sanitized of its <branch>/0 version.
643
644        Indeed this predecessor (equal to <branch>/0) should already be
645        available from previous vob synchro in multi-site context.
646
647        Args:
648            changeset (list):
649                A list of changes in the changeset.
650
651        Returns:
652            list:
653            The sanitized changeset.
654        """
655        sanitized_changeset = []
656
657        for old_file, new_file in changeset:
658            # This should not happen for new file but it is safer to sanitize
659            # both file revisions.
660            sanitized_changeset.append(
661                (self._sanitize_version_0_file(old_file),
662                 self._sanitize_version_0_file(new_file)))
663
664        return sanitized_changeset
665
666    def _directory_content(self, path):
667        """Return directory content ready for saving to tempfile.
668
669        Args:
670            path (unicode):
671                The path to list.
672
673        Returns:
674            unicode:
675            The listed files in the directory.
676        """
677        # Get the absolute path of each element located in path, but only
678        # clearcase elements => -vob_only
679        output = execute(['cleartool', 'ls', '-short', '-nxname', '-vob_only',
680                          path])
681        lines = output.splitlines(True)
682
683        content = []
684        # The previous command returns absolute file paths but only file names
685        # are required.
686        for absolute_path in lines:
687            short_path = os.path.basename(absolute_path.strip())
688            content.append(short_path)
689
690        return ''.join([
691            '%s\n' % s
692            for s in sorted(content)])
693
694    def _construct_changeset(self, output):
695        """Construct a changeset from cleartool output.
696
697        Args:
698            output (unicode):
699                The result from a :command:`cleartool lsX` operation.
700
701        Returns:
702            list:
703            A list of changes.
704        """
705        return [
706            info.split('\t')
707            for info in output.strip().split('\n')
708        ]
709
710    def _get_checkedout_changeset(self):
711        """Return information about the checked out changeset.
712
713        This function returns: kind of element, path to file, previews and
714        current file version.
715
716        Returns:
717            list:
718            A list of the changed files.
719        """
720        changeset = []
721        # We ignore return code 1 in order to omit files that ClearCase can't
722        # read.
723        output = execute(['cleartool',
724                          'lscheckout',
725                          '-all',
726                          '-cview',
727                          '-me',
728                          '-fmt',
729                          r'%En\t%PVn\t%Vn\n'],
730                         extra_ignore_errors=(1,),
731                         with_errors=False)
732
733        if output:
734            changeset = self._construct_changeset(output)
735
736        return self._sanitize_checkedout_changeset(changeset)
737
738    def _get_activity_changeset(self, activity):
739        """Return information about the versions changed on a branch.
740
741        This takes into account the changes attached to this activity
742        (including rebase changes) in all vobs of the current view.
743
744        Args:
745            activity (unicode):
746                The activity name.
747
748        Returns:
749            list:
750            A list of the changed files.
751        """
752        changeset = []
753
754        # Get list of revisions and get the diff of each one. Return code 1 is
755        # ignored in order to omit files that ClearCase can't read.
756        output = execute(['cleartool',
757                          'lsactivity',
758                          '-fmt',
759                          '%[versions]p',
760                          activity],
761                         extra_ignore_errors=(1,),
762                         with_errors=False)
763
764        if output:
765            # UCM activity changeset is split by spaces not but EOL, so we
766            # cannot reuse self._construct_changeset here.
767            changeset = output.split()
768
769        return self._sanitize_activity_changeset(changeset)
770
771    def _get_branch_changeset(self, branch):
772        """Return information about the versions changed on a branch.
773
774        This takes into account the changes on the branch owned by the
775        current user in all vobs of the current view.
776
777        Args:
778            branch (unicode):
779                The branch name.
780
781        Returns:
782            list:
783            A list of the changed files.
784        """
785        changeset = []
786
787        # We ignore return code 1 in order to omit files that ClearCase can't
788        # read.
789        if sys.platform.startswith('win'):
790            CLEARCASE_XPN = '%CLEARCASE_XPN%'
791        else:
792            CLEARCASE_XPN = '$CLEARCASE_XPN'
793
794        output = execute(
795            [
796                'cleartool',
797                'find',
798                '-all',
799                '-version',
800                'brtype(%s)' % branch,
801                '-exec',
802                'cleartool descr -fmt "%%En\t%%PVn\t%%Vn\n" %s' % CLEARCASE_XPN
803            ],
804            extra_ignore_errors=(1,),
805            with_errors=False)
806
807        if output:
808            changeset = self._construct_changeset(output)
809
810        return self._sanitize_branch_changeset(changeset)
811
812    def _get_label_changeset(self, labels):
813        """Return information about the versions changed between labels.
814
815        This takes into account the changes done between labels and restrict
816        analysis to current working directory. A ClearCase label belongs to a
817        unique vob.
818
819        Args:
820            labels (list):
821                A list of labels to compare.
822
823        Returns:
824            list:
825            A list of the changed files.
826        """
827        changeset = []
828        tmp_labels = []
829
830        # Initialize comparison_path to current working directory.
831        # TODO: support another argument to manage a different comparison path.
832        comparison_path = os.getcwd()
833
834        error_message = None
835
836        try:
837            # Unless user has provided 2 labels, set a temporary label on
838            # current version seen of comparison_path directory. It will be
839            # used to process changeset.
840            # Indeed ClearCase can identify easily each file and associated
841            # version belonging to a label.
842            if len(labels) == 1:
843                tmp_lb = self._get_tmp_label()
844                tmp_labels.append(tmp_lb)
845                self._set_label(tmp_lb, comparison_path)
846                labels.append(tmp_lb)
847
848            label_count = len(labels)
849
850            if label_count != 2:
851                raise Exception(
852                    'ClearCase label comparison does not support %d labels'
853                    % label_count)
854
855            # Now we get 2 labels for comparison, check if they are both valid.
856            repository_info = self.get_repository_info()
857            for label in labels:
858                if not self._is_a_label(label, repository_info.vobstag):
859                    raise Exception(
860                        'ClearCase label %s is not a valid label' % label)
861
862            previous_label, current_label = labels
863            logging.debug('Comparison between labels %s and %s on %s',
864                          previous_label, current_label, comparison_path)
865
866            # List ClearCase element path and version belonging to previous and
867            # current labels, element path is the key of each dict.
868            previous_elements = {}
869            current_elements = {}
870            previous_label_elements_thread = _get_elements_from_label_thread(
871                1, comparison_path, previous_label, previous_elements)
872            previous_label_elements_thread.start()
873
874            current_label_elements_thread = _get_elements_from_label_thread(
875                2, comparison_path, current_label, current_elements)
876            current_label_elements_thread.start()
877
878            previous_label_elements_thread.join()
879            current_label_elements_thread.join()
880
881            seen = []
882            changelist = {}
883            # Iterate on each ClearCase path in order to find respective
884            # previous and current version.
885            for path in itertools.chain(previous_elements.keys(),
886                                        current_elements.keys()):
887                if path in seen:
888                    continue
889
890                seen.append(path)
891
892                # Initialize previous and current version to '/main/0'
893                changelist[path] = {
894                    'previous': '/main/0',
895                    'current': '/main/0',
896                }
897
898                if path in current_elements:
899                    changelist[path]['current'] = \
900                        current_elements[path]['version']
901
902                if path in previous_elements:
903                    changelist[path]['previous'] = \
904                        previous_elements[path]['version']
905
906                logging.debug('path: %s\nprevious: %s\ncurrent:  %s\n',
907                              path,
908                              changelist[path]['previous'],
909                              changelist[path]['current'])
910
911                # Prevent adding identical version to comparison.
912                if changelist[path]['current'] == changelist[path]['previous']:
913                    continue
914
915                changeset.append(
916                    (self._construct_extended_path(
917                        path,
918                        changelist[path]['previous']),
919                     self._construct_extended_path(
920                        path,
921                        changelist[path]['current'])))
922
923        except Exception as e:
924            error_message = str(e)
925
926        finally:
927            # Delete all temporary labels.
928            for lb in tmp_labels:
929                if self._is_a_label(lb):
930                    self._remove_label(lb)
931
932            if error_message:
933                raise SCMError('Label comparison failed:\n%s' % error_message)
934
935        return changeset
936
937    def diff(self, revisions, include_files=[], exclude_patterns=[],
938             extra_args=[], **kwargs):
939        """Perform a diff using the given revisions.
940
941        Args:
942            revisions (dict):
943                A dictionary of revisions, as returned by
944                :py:meth:`parse_revision_spec`.
945
946            include_files (list of unicode, optional):
947                A list of files to whitelist during the diff generation.
948
949            exclude_patterns (list of unicode, optional):
950                A list of shell-style glob patterns to blacklist during diff
951                generation.
952
953            extra_args (list, unused):
954                Additional arguments to be passed to the diff generation.
955                Unused for ClearCase.
956
957            **kwargs (dict, optional):
958                Unused keyword arguments.
959
960        Returns:
961            dict:
962            A dictionary containing the following keys:
963
964            ``diff`` (:py:class:`bytes`):
965                The contents of the diff to upload.
966        """
967        if include_files:
968            raise Exception(
969                'The ClearCase backend does not currently support the '
970                '-I/--include parameter. To diff for specific files, pass in '
971                'file@revision1:file@revision2 pairs as arguments')
972
973        if revisions['tip'] == self.REVISION_CHECKEDOUT_CHANGESET:
974            changeset = self._get_checkedout_changeset()
975            return self._do_diff(changeset)
976        elif revisions['base'] == self.REVISION_ACTIVITY_BASE:
977            changeset = self._get_activity_changeset(revisions['tip'])
978            return self._do_diff(changeset)
979        elif revisions['base'] == self.REVISION_BRANCH_BASE:
980            changeset = self._get_branch_changeset(revisions['tip'])
981            return self._do_diff(changeset)
982        elif revisions['base'] == self.REVISION_LABEL_BASE:
983            changeset = self._get_label_changeset(revisions['tip'])
984            return self._do_diff(changeset)
985        elif revisions['base'] == self.REVISION_FILES:
986            include_files = revisions['tip']
987            return self._do_diff(include_files)
988        else:
989            assert False
990
991    def _diff_files(self, old_file, new_file):
992        """Return a unified diff for file.
993
994        Args:
995            old_file (unicode):
996                The name and version of the old file.
997
998            new_file (unicode):
999                The name and version of the new file.
1000
1001        Returns:
1002            bytes:
1003            The diff between the two files.
1004        """
1005        # In snapshot view, diff can't access history clearcase file version
1006        # so copy cc files to tempdir by 'cleartool get -to dest-pname pname',
1007        # and compare diff with the new temp ones.
1008        if self.viewtype == 'snapshot':
1009            # Create temporary file first.
1010            tmp_old_file = make_tempfile()
1011            tmp_new_file = make_tempfile()
1012
1013            # Delete so cleartool can write to them.
1014            try:
1015                os.remove(tmp_old_file)
1016            except OSError:
1017                pass
1018
1019            try:
1020                os.remove(tmp_new_file)
1021            except OSError:
1022                pass
1023
1024            execute(['cleartool', 'get', '-to', tmp_old_file, old_file])
1025            execute(['cleartool', 'get', '-to', tmp_new_file, new_file])
1026            diff_cmd = ['diff', '-uN', tmp_old_file, tmp_new_file]
1027        else:
1028            diff_cmd = ['diff', '-uN', old_file, new_file]
1029
1030        dl = execute(diff_cmd,
1031                     extra_ignore_errors=(1, 2),
1032                     results_unicode=False)
1033
1034        # Replace temporary file name in diff with the one in snapshot view.
1035        if self.viewtype == 'snapshot':
1036            dl = dl.replace(tmp_old_file.encode('utf-8'),
1037                            old_file.encode('utf-8'))
1038            dl = dl.replace(tmp_new_file.encode('utf-8'),
1039                            new_file.encode('utf-8'))
1040
1041        # If the input file has ^M characters at end of line, lets ignore them.
1042        dl = dl.replace(b'\r\r\n', b'\r\n')
1043        dl = dl.splitlines(True)
1044
1045        # Special handling for the output of the diff tool on binary files:
1046        #     diff outputs "Files a and b differ"
1047        # and the code below expects the output to start with
1048        #     "Binary files "
1049        if (len(dl) == 1 and
1050            dl[0].startswith(b'Files %s and %s differ'
1051                             % (old_file.encode('utf-8'),
1052                                new_file.encode('utf-8')))):
1053            dl = [b'Binary files %s and %s differ\n'
1054                  % (old_file.encode('utf-8'),
1055                     new_file.encode('utf-8'))]
1056
1057        # We need oids of files to translate them to paths on reviewboard
1058        # repository.
1059        old_oid = execute(['cleartool', 'describe', '-fmt', '%On', old_file],
1060                          results_unicode=False)
1061        new_oid = execute(['cleartool', 'describe', '-fmt', '%On', new_file],
1062                          results_unicode=False)
1063
1064        if dl == [] or dl[0].startswith(b'Binary files '):
1065            if dl == []:
1066                dl = [b'File %s in your changeset is unmodified\n' %
1067                      new_file.encode('utf-8')]
1068
1069            dl.insert(0, b'==== %s %s ====\n' % (old_oid, new_oid))
1070            dl.append(b'\n')
1071        else:
1072            dl.insert(2, b'==== %s %s ====\n' % (old_oid, new_oid))
1073
1074        return dl
1075
1076    def _diff_directories(self, old_dir, new_dir):
1077        """Return a unified diff between two directories' content.
1078
1079        This function saves two version's content of directory to temp
1080        files and treats them as casual diff between two files.
1081
1082        Args:
1083            old_dir (unicode):
1084                The path to a directory within a vob.
1085
1086            new_dir (unicode):
1087                The path to a directory within a vob.
1088
1089        Returns:
1090            list:
1091            The diff between the two directory trees, split into lines.
1092        """
1093        old_content = self._directory_content(old_dir)
1094        new_content = self._directory_content(new_dir)
1095
1096        old_tmp = make_tempfile(content=old_content)
1097        new_tmp = make_tempfile(content=new_content)
1098
1099        diff_cmd = ['diff', '-uN', old_tmp, new_tmp]
1100        dl = execute(diff_cmd,
1101                     extra_ignore_errors=(1, 2),
1102                     results_unicode=False,
1103                     split_lines=True)
1104
1105        # Replace temporary filenames with real directory names and add ids
1106        if dl:
1107            dl[0] = dl[0].replace(old_tmp.encode('utf-8'),
1108                                  old_dir.encode('utf-8'))
1109            dl[1] = dl[1].replace(new_tmp.encode('utf-8'),
1110                                  new_dir.encode('utf-8'))
1111            old_oid = execute(['cleartool', 'describe', '-fmt', '%On',
1112                               old_dir],
1113                              results_unicode=False)
1114            new_oid = execute(['cleartool', 'describe', '-fmt', '%On',
1115                               new_dir],
1116                              results_unicode=False)
1117            dl.insert(2, b'==== %s %s ====\n' % (old_oid, new_oid))
1118
1119        return dl
1120
1121    def _do_diff(self, changeset):
1122        """Generate a unified diff for all files in the given changeset.
1123
1124        Args:
1125            changeset (list):
1126                A list of changes.
1127
1128        Returns:
1129            dict:
1130            A dictionary containing a ``diff`` key.
1131        """
1132        # Sanitize all changesets of version 0 before processing
1133        changeset = self._sanitize_version_0_changeset(changeset)
1134
1135        diff = []
1136        for old_file, new_file in changeset:
1137            dl = []
1138
1139            # cpath.isdir does not work for snapshot views but this
1140            # information can be found using `cleartool describe`.
1141            if self.viewtype == 'snapshot':
1142                # ClearCase object path is file path + @@
1143                object_path = new_file.split('@@')[0] + '@@'
1144                output = execute(['cleartool', 'describe', '-fmt', '%m',
1145                                  object_path])
1146                object_kind = output.strip()
1147                isdir = object_kind == 'directory element'
1148            else:
1149                isdir = cpath.isdir(new_file)
1150
1151            if isdir:
1152                dl = self._diff_directories(old_file, new_file)
1153            elif cpath.exists(new_file) or self.viewtype == 'snapshot':
1154                dl = self._diff_files(old_file, new_file)
1155            else:
1156                logging.error('File %s does not exist or access is denied.',
1157                              new_file)
1158                continue
1159
1160            if dl:
1161                diff.append(b''.join(dl))
1162
1163        return {
1164            'diff': b''.join(diff),
1165        }
1166
1167
1168class ClearCaseRepositoryInfo(RepositoryInfo):
1169    """A representation of a ClearCase source code repository.
1170
1171    This version knows how to find a matching repository on the server even if
1172    the URLs differ.
1173    """
1174
1175    def __init__(self, path, base_path, vobstag):
1176        """Initialize the repsitory info.
1177
1178        Args:
1179            path (unicode):
1180                The path of the repository.
1181
1182            base_path (unicode):
1183                The relative path between the repository root and the working
1184                directory.
1185
1186            vobstag (unicode):
1187                The vobstag for the repository.
1188        """
1189        RepositoryInfo.__init__(self, path, base_path,
1190                                supports_parent_diffs=False)
1191        self.vobstag = vobstag
1192
1193    def find_server_repository_info(self, server):
1194        """Find a matching repository on the server.
1195
1196        The point of this function is to find a repository on the server that
1197        matches self, even if the paths aren't the same. (For example, if self
1198        uses an 'http' path, but the server uses a 'file' path for the same
1199        repository.) It does this by comparing the VOB's name and uuid. If the
1200        repositories use the same path, you'll get back self, otherwise you'll
1201        get a different ClearCaseRepositoryInfo object (with a different path).
1202
1203        Args:
1204            server (rbtools.api.resource.RootResource):
1205                The root resource for the Review Board server.
1206
1207        Returns:
1208            ClearCaseRepositoryInfo:
1209            The server-side information for this repository.
1210        """
1211        # Find VOB's family uuid based on VOB's tag
1212        uuid = self._get_vobs_uuid(self.vobstag)
1213        logging.debug('Repository vobstag %s uuid is %r', self.vobstag, uuid)
1214
1215        # To reduce HTTP requests (_get_repository_info calls), we build an
1216        # ordered list of ClearCase repositories starting with the ones that
1217        # have a similar vobstag.
1218        repository_scan_order = deque()
1219
1220        # Because the VOB tag is platform-specific, we split and search
1221        # for the remote name in any sub-part so this HTTP request
1222        # optimization can work for users on both Windows and Unix-like
1223        # platforms.
1224        vob_tag_parts = self.vobstag.split(cpath.sep)
1225
1226        # Reduce list of repositories to only ClearCase ones and sort them by
1227        # repo name matching vobstag (or some part of the vobstag) first.
1228        for repository in server.get_repositories(tool='ClearCase').all_items:
1229            # Ignore non-ClearCase repositories.
1230            if repository['tool'] != 'ClearCase':
1231                continue
1232
1233            repo_name = repository['name']
1234
1235            # Repositories with a similar VOB tag get put at the beginning and
1236            # the others at the end.
1237            if repo_name == self.vobstag or repo_name in vob_tag_parts:
1238                repository_scan_order.appendleft(repository)
1239            else:
1240                repository_scan_order.append(repository)
1241
1242        # Now try to find a matching uuid
1243        for repository in repository_scan_order:
1244            repo_name = repository['name']
1245            try:
1246                info = repository.get_info()
1247            except APIError as e:
1248                # If the current repository is not publicly accessible and the
1249                # current user has no explicit access to it, the server will
1250                # return error_code 101 and http_status 403.
1251                if not (e.error_code == 101 and e.http_status == 403):
1252                    # We can safely ignore this repository unless the VOB tag
1253                    # matches.
1254                    if repo_name == self.vobstag:
1255                        raise SCMError('You do not have permission to access '
1256                                       'this repository.')
1257
1258                    continue
1259                else:
1260                    # Bubble up any other errors
1261                    raise e
1262
1263            if not info or uuid != info['uuid']:
1264                continue
1265
1266            path = info['repopath']
1267            logging.debug('Matching repository uuid:%s with path:%s',
1268                          uuid, path)
1269            return ClearCaseRepositoryInfo(path=path, base_path=path,
1270                                           vobstag=self.vobstag)
1271
1272        # We didn't found uuid but if version is >= 1.5.3
1273        # we can try to use VOB's name hoping it is better
1274        # than current VOB's path.
1275        if parse_version(server.rb_version) >= parse_version('1.5.3'):
1276            self.path = cpath.split(self.vobstag)[1]
1277
1278        # We didn't find a matching repository on the server.
1279        # We'll just return self and hope for the best.
1280        return self
1281
1282    def _get_vobs_uuid(self, vobstag):
1283        property_lines = execute(['cleartool', 'lsvob', '-long', vobstag],
1284                                 split_lines=True)
1285        for line in property_lines:
1286            if line.startswith('Vob family uuid:'):
1287                return line.split(' ')[-1].rstrip()
1288
1289    def _get_repository_info(self, server, repository):
1290        try:
1291            return server.get_repository_info(repository['id'])
1292        except APIError as e:
1293            # If the server couldn't fetch the repository info, it will return
1294            # code 210. Ignore those.
1295            # Other more serious errors should still be raised, though.
1296            if e.error_code == 210:
1297                return None
1298
1299            raise e
1300