1# Copyright 2016 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Utility for outputting a HTML diff of two multi-line strings. 6 7The main purpose of this utility is to show the difference between 8text baselines (-expected.txt files) and actual text results. 9 10Note, in the standard library module difflib, there is also a HtmlDiff class, 11although it outputs a larger and more complex HTML table than we need. 12""" 13 14import cgi 15import difflib 16 17_TEMPLATE = """<html> 18<head> 19<style> 20table { white-space: pre-wrap; font-family: monospace; border-collapse: collapse; } 21th { color: #444; background: #eed; text-align: right; vertical-align: baseline; padding: 1px 4px 1px 4px; } 22.del { background: #faa; } 23.add { background: #afa; } 24</style> 25</head> 26<body><table>%s</table></body> 27</html> 28""" 29 30 31def html_diff(a_text, b_text): 32 """Returns a diff between two strings as HTML.""" 33 # Diffs can be between multiple text files of different encodings 34 # so we always want to deal with them as byte arrays, not unicode strings. 35 assert isinstance(a_text, str) 36 assert isinstance(b_text, str) 37 a_lines = a_text.splitlines(True) 38 b_lines = b_text.splitlines(True) 39 return _TEMPLATE % HtmlDiffGenerator().generate_tbody(a_lines, b_lines) 40 41 42class HtmlDiffGenerator(object): 43 44 def __init__(self): 45 self.a_line_no = None 46 self.b_line_no = None 47 self.a_lines_len = None 48 49 def generate_tbody(self, a_lines, b_lines): 50 self.a_line_no = 0 51 self.b_line_no = 0 52 self.a_lines_len = len(a_lines) 53 self.b_lines_len = len(b_lines) 54 matcher = difflib.SequenceMatcher(None, a_lines, b_lines) 55 output = [] 56 for tag, a_start, a_end, b_start, b_end in matcher.get_opcodes(): 57 output.append(self._format_chunk(tag, a_lines[a_start:a_end], b_lines[b_start:b_end])) 58 return ''.join(output) 59 60 def _format_chunk(self, tag, a_chunk, b_chunk): 61 if tag == 'delete': 62 return self._format_delete(a_chunk) 63 if tag == 'insert': 64 return self._format_insert(b_chunk) 65 if tag == 'replace': 66 return self._format_delete(a_chunk) + self._format_insert(b_chunk) 67 assert tag == 'equal' 68 return self._format_equal(a_chunk) 69 70 def _format_equal(self, common_chunk): 71 output = '' 72 if len(common_chunk) <= 7: 73 for line in common_chunk: 74 output += self._format_equal_line(line) 75 else: 76 # Do not show context lines at the beginning of the file. 77 if self.a_line_no == 0 and self.b_line_no == 0: 78 self.a_line_no += 3 79 self.b_line_no += 3 80 else: 81 for line in common_chunk[0:3]: 82 output += self._format_equal_line(line) 83 self.a_line_no += len(common_chunk) - 6 84 self.b_line_no += len(common_chunk) - 6 85 output += '<tr><td colspan=3>\n\n</tr>' 86 # Do not show context lines at the end of the file. 87 if self.a_line_no + 3 != self.a_lines_len or self.b_line_no + 3 != self.b_lines_len: 88 for line in common_chunk[len(common_chunk) - 3:len(common_chunk)]: 89 output += self._format_equal_line(line) 90 return output 91 92 def _format_equal_line(self, line): 93 self.a_line_no += 1 94 self.b_line_no += 1 95 return '<tr><th>%d<th>%d<td>%s</tr>' % (self.a_line_no, self.b_line_no, cgi.escape(line)) 96 97 def _format_insert(self, chunk): 98 output = '' 99 for line in chunk: 100 self.b_line_no += 1 101 output += '<tr><th><th>%d<td class="add">%s</tr>' % (self.b_line_no, cgi.escape(line)) 102 return output 103 104 def _format_delete(self, chunk): 105 output = '' 106 for line in chunk: 107 self.a_line_no += 1 108 output += '<tr><th>%d<th><td class="del">%s</tr>' % (self.a_line_no, cgi.escape(line)) 109 return output 110