1# Copyright (C) 2005-2010 Canonical Ltd
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License as published by
5# the Free Software Foundation; either version 2 of the License, or
6# (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17"""File annotate based on weave storage"""
18
19# TODO: Choice of more or less verbose formats:
20#
21# interposed: show more details between blocks of modified lines
22
23# TODO: Show which revision caused a line to merge into the parent
24
25# TODO: perhaps abbreviate timescales depending on how recent they are
26# e.g. "3:12 Tue", "13 Oct", "Oct 2005", etc.
27
28import sys
29import time
30
31from .lazy_import import lazy_import
32lazy_import(globals(), """
33
34import patiencediff
35
36from breezy import (
37    tsort,
38    )
39""")
40from . import (
41    config,
42    errors,
43    osutils,
44    )
45from .repository import _strip_NULL_ghosts
46from .revision import (
47    CURRENT_REVISION,
48    Revision,
49    )
50
51
52def annotate_file_tree(tree, path, to_file, verbose=False, full=False,
53                       show_ids=False, branch=None):
54    """Annotate path in a tree.
55
56    The tree should already be read_locked() when annotate_file_tree is called.
57
58    :param tree: The tree to look for revision numbers and history from.
59    :param path: The path to annotate
60    :param to_file: The file to output the annotation to.
61    :param verbose: Show all details rather than truncating to ensure
62        reasonable text width.
63    :param full: XXXX Not sure what this does.
64    :param show_ids: Show revision ids in the annotation output.
65    :param branch: Branch to use for revision revno lookups
66    """
67    if branch is None:
68        branch = tree.branch
69    if to_file is None:
70        to_file = sys.stdout
71
72    encoding = osutils.get_terminal_encoding()
73    # Handle the show_ids case
74    annotations = list(tree.annotate_iter(path))
75    if show_ids:
76        return _show_id_annotations(annotations, to_file, full, encoding)
77
78    if not getattr(tree, "get_revision_id", False):
79        # Create a virtual revision to represent the current tree state.
80        # Should get some more pending commit attributes, like pending tags,
81        # bugfixes etc.
82        current_rev = Revision(CURRENT_REVISION)
83        current_rev.parent_ids = tree.get_parent_ids()
84        try:
85            current_rev.committer = branch.get_config_stack().get('email')
86        except errors.NoWhoami:
87            current_rev.committer = 'local user'
88        current_rev.message = "?"
89        current_rev.timestamp = round(time.time(), 3)
90        current_rev.timezone = osutils.local_time_offset()
91    else:
92        current_rev = None
93    annotation = list(_expand_annotations(
94        annotations, branch, current_rev))
95    _print_annotations(annotation, verbose, to_file, full, encoding)
96
97
98def _print_annotations(annotation, verbose, to_file, full, encoding):
99    """Print annotations to to_file.
100
101    :param to_file: The file to output the annotation to.
102    :param verbose: Show all details rather than truncating to ensure
103        reasonable text width.
104    :param full: XXXX Not sure what this does.
105    """
106    if len(annotation) == 0:
107        max_origin_len = max_revno_len = 0
108    else:
109        max_origin_len = max(len(x[1]) for x in annotation)
110        max_revno_len = max(len(x[0]) for x in annotation)
111    if not verbose:
112        max_revno_len = min(max_revno_len, 12)
113    max_revno_len = max(max_revno_len, 3)
114
115    # Output the annotations
116    prevanno = ''
117    for (revno_str, author, date_str, line_rev_id, text) in annotation:
118        if verbose:
119            anno = '%-*s %-*s %8s ' % (max_revno_len, revno_str,
120                                       max_origin_len, author, date_str)
121        else:
122            if len(revno_str) > max_revno_len:
123                revno_str = revno_str[:max_revno_len - 1] + '>'
124            anno = "%-*s %-7s " % (max_revno_len, revno_str, author[:7])
125        if anno.lstrip() == "" and full:
126            anno = prevanno
127        # GZ 2017-05-21: Writing both unicode annotation and bytes from file
128        # which the given to_file must cope with.
129        to_file.write(anno)
130        to_file.write('| %s\n' % (text.decode(encoding),))
131        prevanno = anno
132
133
134def _show_id_annotations(annotations, to_file, full, encoding):
135    if not annotations:
136        return
137    last_rev_id = None
138    max_origin_len = max(len(origin) for origin, text in annotations)
139    for origin, text in annotations:
140        if full or last_rev_id != origin:
141            this = origin
142        else:
143            this = b''
144        to_file.write('%*s | %s' % (
145            max_origin_len, this.decode('utf-8'), text.decode(encoding)))
146        last_rev_id = origin
147    return
148
149
150def _expand_annotations(annotations, branch, current_rev=None):
151    """Expand a file's annotations into command line UI ready tuples.
152
153    Each tuple includes detailed information, such as the author name, and date
154    string for the commit, rather than just the revision id.
155
156    :param annotations: The annotations to expand.
157    :param revision_id_to_revno: A map from id to revision numbers.
158    :param branch: A locked branch to query for revision details.
159    """
160    repository = branch.repository
161    revision_ids = set(o for o, t in annotations)
162    if current_rev is not None:
163        # This can probably become a function on MutableTree, get_revno_map
164        # there, or something.
165        last_revision = current_rev.revision_id
166        # XXX: Partially Cloned from branch, uses the old_get_graph, eep.
167        # XXX: The main difficulty is that we need to inject a single new node
168        #      (current_rev) into the graph before it gets numbered, etc.
169        #      Once KnownGraph gets an 'add_node()' function, we can use
170        #      VF.get_known_graph_ancestry().
171        graph = repository.get_graph()
172        revision_graph = {
173            key: value for key, value in
174            graph.iter_ancestry(current_rev.parent_ids) if value is not None}
175        revision_graph = _strip_NULL_ghosts(revision_graph)
176        revision_graph[last_revision] = current_rev.parent_ids
177        merge_sorted_revisions = tsort.merge_sort(
178            revision_graph,
179            last_revision,
180            None,
181            generate_revno=True)
182        revision_id_to_revno = {
183            rev_id: revno
184            for seq_num, rev_id, depth, revno, end_of_merge in
185            merge_sorted_revisions}
186    else:
187        # TODO(jelmer): Only look up the revision ids that we need (i.e. those
188        # in revision_ids). Possibly add a HPSS call that can look those up
189        # in bulk over HPSS.
190        revision_id_to_revno = branch.get_revision_id_to_revno_map()
191    last_origin = None
192    revisions = {}
193    if CURRENT_REVISION in revision_ids:
194        revision_id_to_revno[CURRENT_REVISION] = (
195            "%d?" % (branch.revno() + 1),)
196        revisions[CURRENT_REVISION] = current_rev
197    revisions.update(
198        entry for entry in
199        repository.iter_revisions(revision_ids)
200        if entry[1] is not None)
201    for origin, text in annotations:
202        text = text.rstrip(b'\r\n')
203        if origin == last_origin:
204            (revno_str, author, date_str) = ('', '', '')
205        else:
206            last_origin = origin
207            if origin not in revisions:
208                (revno_str, author, date_str) = ('?', '?', '?')
209            else:
210                revno_str = '.'.join(
211                    str(i) for i in revision_id_to_revno[origin])
212            rev = revisions[origin]
213            tz = rev.timezone or 0
214            date_str = time.strftime('%Y%m%d',
215                                     time.gmtime(rev.timestamp + tz))
216            # a lazy way to get something like the email address
217            # TODO: Get real email address
218            author = rev.get_apparent_authors()[0]
219            _, email = config.parse_username(author)
220            if email:
221                author = email
222        yield (revno_str, author, date_str, origin, text)
223
224
225def reannotate(parents_lines, new_lines, new_revision_id,
226               _left_matching_blocks=None,
227               heads_provider=None):
228    """Create a new annotated version from new lines and parent annotations.
229
230    :param parents_lines: List of annotated lines for all parents
231    :param new_lines: The un-annotated new lines
232    :param new_revision_id: The revision-id to associate with new lines
233        (will often be CURRENT_REVISION)
234    :param left_matching_blocks: a hint about which areas are common
235        between the text and its left-hand-parent.  The format is
236        the SequenceMatcher.get_matching_blocks format
237        (start_left, start_right, length_of_match).
238    :param heads_provider: An object which provides a .heads() call to resolve
239        if any revision ids are children of others.
240        If None, then any ancestry disputes will be resolved with
241        new_revision_id
242    """
243    if len(parents_lines) == 0:
244        lines = [(new_revision_id, line) for line in new_lines]
245    elif len(parents_lines) == 1:
246        lines = _reannotate(parents_lines[0], new_lines, new_revision_id,
247                            _left_matching_blocks)
248    elif len(parents_lines) == 2:
249        left = _reannotate(parents_lines[0], new_lines, new_revision_id,
250                           _left_matching_blocks)
251        lines = _reannotate_annotated(parents_lines[1], new_lines,
252                                      new_revision_id, left,
253                                      heads_provider)
254    else:
255        reannotations = [_reannotate(parents_lines[0], new_lines,
256                                     new_revision_id, _left_matching_blocks)]
257        reannotations.extend(_reannotate(p, new_lines, new_revision_id)
258                             for p in parents_lines[1:])
259        lines = []
260        for annos in zip(*reannotations):
261            origins = set(a for a, l in annos)
262            if len(origins) == 1:
263                # All the parents agree, so just return the first one
264                lines.append(annos[0])
265            else:
266                line = annos[0][1]
267                if len(origins) == 2 and new_revision_id in origins:
268                    origins.remove(new_revision_id)
269                if len(origins) == 1:
270                    lines.append((origins.pop(), line))
271                else:
272                    lines.append((new_revision_id, line))
273    return lines
274
275
276def _reannotate(parent_lines, new_lines, new_revision_id,
277                matching_blocks=None):
278    new_cur = 0
279    if matching_blocks is None:
280        plain_parent_lines = [l for r, l in parent_lines]
281        matcher = patiencediff.PatienceSequenceMatcher(
282            None, plain_parent_lines, new_lines)
283        matching_blocks = matcher.get_matching_blocks()
284    lines = []
285    for i, j, n in matching_blocks:
286        for line in new_lines[new_cur:j]:
287            lines.append((new_revision_id, line))
288        lines.extend(parent_lines[i:i + n])
289        new_cur = j + n
290    return lines
291
292
293def _get_matching_blocks(old, new):
294    matcher = patiencediff.PatienceSequenceMatcher(None, old, new)
295    return matcher.get_matching_blocks()
296
297
298_break_annotation_tie = None
299
300
301def _old_break_annotation_tie(annotated_lines):
302    """Chose an attribution between several possible ones.
303
304    :param annotated_lines: A list of tuples ((file_id, rev_id), line) where
305        the lines are identical but the revids different while no parent
306        relation exist between them
307
308     :return : The "winning" line. This must be one with a revid that
309         guarantees that further criss-cross merges will converge. Failing to
310         do so have performance implications.
311    """
312    # sort lexicographically so that we always get a stable result.
313
314    # TODO: while 'sort' is the easiest (and nearly the only possible solution)
315    # with the current implementation, chosing the oldest revision is known to
316    # provide better results (as in matching user expectations). The most
317    # common use case being manual cherry-pick from an already existing
318    # revision.
319    return sorted(annotated_lines)[0]
320
321
322def _find_matching_unannotated_lines(output_lines, plain_child_lines,
323                                     child_lines, start_child, end_child,
324                                     right_lines, start_right, end_right,
325                                     heads_provider, revision_id):
326    """Find lines in plain_right_lines that match the existing lines.
327
328    :param output_lines: Append final annotated lines to this list
329    :param plain_child_lines: The unannotated new lines for the child text
330    :param child_lines: Lines for the child text which have been annotated
331        for the left parent
332
333    :param start_child: Position in plain_child_lines and child_lines to start
334        the match searching
335    :param end_child: Last position in plain_child_lines and child_lines to
336        search for a match
337    :param right_lines: The annotated lines for the whole text for the right
338        parent
339    :param start_right: Position in right_lines to start the match
340    :param end_right: Last position in right_lines to search for a match
341    :param heads_provider: When parents disagree on the lineage of a line, we
342        need to check if one side supersedes the other
343    :param revision_id: The label to give if a line should be labeled 'tip'
344    """
345    output_extend = output_lines.extend
346    output_append = output_lines.append
347    # We need to see if any of the unannotated lines match
348    plain_right_subset = [l for a, l in right_lines[start_right:end_right]]
349    plain_child_subset = plain_child_lines[start_child:end_child]
350    match_blocks = _get_matching_blocks(plain_right_subset, plain_child_subset)
351
352    last_child_idx = 0
353
354    for right_idx, child_idx, match_len in match_blocks:
355        # All the lines that don't match are just passed along
356        if child_idx > last_child_idx:
357            output_extend(child_lines[start_child + last_child_idx:
358                                      start_child + child_idx])
359        for offset in range(match_len):
360            left = child_lines[start_child + child_idx + offset]
361            right = right_lines[start_right + right_idx + offset]
362            if left[0] == right[0]:
363                # The annotations match, just return the left one
364                output_append(left)
365            elif left[0] == revision_id:
366                # The left parent marked this as unmatched, so let the
367                # right parent claim it
368                output_append(right)
369            else:
370                # Left and Right both claim this line
371                if heads_provider is None:
372                    output_append((revision_id, left[1]))
373                else:
374                    heads = heads_provider.heads((left[0], right[0]))
375                    if len(heads) == 1:
376                        output_append((next(iter(heads)), left[1]))
377                    else:
378                        # Both claim different origins, get a stable result.
379                        # If the result is not stable, there is a risk a
380                        # performance degradation as criss-cross merges will
381                        # flip-flop the attribution.
382                        if _break_annotation_tie is None:
383                            output_append(
384                                _old_break_annotation_tie([left, right]))
385                        else:
386                            output_append(_break_annotation_tie([left, right]))
387        last_child_idx = child_idx + match_len
388
389
390def _reannotate_annotated(right_parent_lines, new_lines, new_revision_id,
391                          annotated_lines, heads_provider):
392    """Update the annotations for a node based on another parent.
393
394    :param right_parent_lines: A list of annotated lines for the right-hand
395        parent.
396    :param new_lines: The unannotated new lines.
397    :param new_revision_id: The revision_id to attribute to lines which are not
398        present in either parent.
399    :param annotated_lines: A list of annotated lines. This should be the
400        annotation of new_lines based on parents seen so far.
401    :param heads_provider: When parents disagree on the lineage of a line, we
402        need to check if one side supersedes the other.
403    """
404    if len(new_lines) != len(annotated_lines):
405        raise AssertionError("mismatched new_lines and annotated_lines")
406    # First compare the newly annotated lines with the right annotated lines.
407    # Lines which were not changed in left or right should match. This tends to
408    # be the bulk of the lines, and they will need no further processing.
409    lines = []
410    lines_extend = lines.extend
411    # The line just after the last match from the right side
412    last_right_idx = 0
413    last_left_idx = 0
414    matching_left_and_right = _get_matching_blocks(right_parent_lines,
415                                                   annotated_lines)
416    for right_idx, left_idx, match_len in matching_left_and_right:
417        # annotated lines from last_left_idx to left_idx did not match the
418        # lines from last_right_idx to right_idx, the raw lines should be
419        # compared to determine what annotations need to be updated
420        if last_right_idx == right_idx or last_left_idx == left_idx:
421            # One of the sides is empty, so this is a pure insertion
422            lines_extend(annotated_lines[last_left_idx:left_idx])
423        else:
424            # We need to see if any of the unannotated lines match
425            _find_matching_unannotated_lines(lines,
426                                             new_lines, annotated_lines,
427                                             last_left_idx, left_idx,
428                                             right_parent_lines,
429                                             last_right_idx, right_idx,
430                                             heads_provider,
431                                             new_revision_id)
432        last_right_idx = right_idx + match_len
433        last_left_idx = left_idx + match_len
434        # If left and right agree on a range, just push that into the output
435        lines_extend(annotated_lines[left_idx:left_idx + match_len])
436    return lines
437
438
439try:
440    from breezy._annotator_pyx import Annotator
441except ImportError as e:
442    osutils.failed_to_load_extension(e)
443    from breezy._annotator_py import Annotator  # noqa: F401
444