1# Copyright 2016 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Utility for outputting a HTML diff of two multi-line strings.
6
7The main purpose of this utility is to show the difference between
8text baselines (-expected.txt files) and actual text results.
9
10Note, in the standard library module difflib, there is also a HtmlDiff class,
11although it outputs a larger and more complex HTML table than we need.
12"""
13
14import cgi
15import difflib
16
17_TEMPLATE = """<html>
18<head>
19<style>
20table { white-space: pre-wrap; font-family: monospace; border-collapse: collapse; }
21th { color: #444; background: #eed; text-align: right; vertical-align: baseline; padding: 1px 4px 1px 4px; }
22.del { background: #faa; }
23.add { background: #afa; }
24</style>
25</head>
26<body><table>%s</table></body>
27</html>
28"""
29
30
31def html_diff(a_text, b_text):
32    """Returns a diff between two strings as HTML."""
33    # Diffs can be between multiple text files of different encodings
34    # so we always want to deal with them as byte arrays, not unicode strings.
35    assert isinstance(a_text, str)
36    assert isinstance(b_text, str)
37    a_lines = a_text.splitlines(True)
38    b_lines = b_text.splitlines(True)
39    return _TEMPLATE % HtmlDiffGenerator().generate_tbody(a_lines, b_lines)
40
41
42class HtmlDiffGenerator(object):
43
44    def __init__(self):
45        self.a_line_no = None
46        self.b_line_no = None
47        self.a_lines_len = None
48
49    def generate_tbody(self, a_lines, b_lines):
50        self.a_line_no = 0
51        self.b_line_no = 0
52        self.a_lines_len = len(a_lines)
53        self.b_lines_len = len(b_lines)
54        matcher = difflib.SequenceMatcher(None, a_lines, b_lines)
55        output = []
56        for tag, a_start, a_end, b_start, b_end in matcher.get_opcodes():
57            output.append(self._format_chunk(tag, a_lines[a_start:a_end], b_lines[b_start:b_end]))
58        return ''.join(output)
59
60    def _format_chunk(self, tag, a_chunk, b_chunk):
61        if tag == 'delete':
62            return self._format_delete(a_chunk)
63        if tag == 'insert':
64            return self._format_insert(b_chunk)
65        if tag == 'replace':
66            return self._format_delete(a_chunk) + self._format_insert(b_chunk)
67        assert tag == 'equal'
68        return self._format_equal(a_chunk)
69
70    def _format_equal(self, common_chunk):
71        output = ''
72        if len(common_chunk) <= 7:
73            for line in common_chunk:
74                output += self._format_equal_line(line)
75        else:
76            # Do not show context lines at the beginning of the file.
77            if self.a_line_no == 0 and self.b_line_no == 0:
78                self.a_line_no += 3
79                self.b_line_no += 3
80            else:
81                for line in common_chunk[0:3]:
82                    output += self._format_equal_line(line)
83            self.a_line_no += len(common_chunk) - 6
84            self.b_line_no += len(common_chunk) - 6
85            output += '<tr><td colspan=3>\n\n</tr>'
86            # Do not show context lines at the end of the file.
87            if self.a_line_no + 3 != self.a_lines_len or self.b_line_no + 3 != self.b_lines_len:
88                for line in common_chunk[len(common_chunk) - 3:len(common_chunk)]:
89                    output += self._format_equal_line(line)
90        return output
91
92    def _format_equal_line(self, line):
93        self.a_line_no += 1
94        self.b_line_no += 1
95        return '<tr><th>%d<th>%d<td>%s</tr>' % (self.a_line_no, self.b_line_no, cgi.escape(line))
96
97    def _format_insert(self, chunk):
98        output = ''
99        for line in chunk:
100            self.b_line_no += 1
101            output += '<tr><th><th>%d<td class="add">%s</tr>' % (self.b_line_no, cgi.escape(line))
102        return output
103
104    def _format_delete(self, chunk):
105        output = ''
106        for line in chunk:
107            self.a_line_no += 1
108            output += '<tr><th>%d<th><td class="del">%s</tr>' % (self.a_line_no, cgi.escape(line))
109        return output
110