| # Copyright 2016 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Utility for outputting a HTML diff of two multi-line strings. |
| |
| The main purpose of this utility is to show the difference between |
| text baselines (-expected.txt files) and actual text results. |
| |
| Note, in the standard library module difflib, there is also a HtmlDiff class, |
| although it outputs a larger and more complex HTML table than we need. |
| """ |
| |
| import cgi |
| import difflib |
| |
| _TEMPLATE = """<html> |
| <head> |
| <style> |
| table { white-space: pre-wrap; font-family: monospace; border-collapse: collapse; } |
| th { color: #444; background: #eed; text-align: right; vertical-align: baseline; padding: 1px 4px 1px 4px; } |
| .del { background: #faa; } |
| .add { background: #afa; } |
| </style> |
| </head> |
| <body><table>%s</table></body> |
| </html> |
| """ |
| |
| |
| def html_diff(a_text, b_text): |
| """Returns a diff between two strings as HTML.""" |
| # Diffs can be between multiple text files of different encodings |
| # so we always want to deal with them as byte arrays, not unicode strings. |
| assert isinstance(a_text, str) |
| assert isinstance(b_text, str) |
| a_lines = a_text.splitlines(True) |
| b_lines = b_text.splitlines(True) |
| return _TEMPLATE % HtmlDiffGenerator().generate_tbody(a_lines, b_lines) |
| |
| |
| class HtmlDiffGenerator(object): |
| |
| def __init__(self): |
| self.a_line_no = None |
| self.b_line_no = None |
| self.a_lines_len = None |
| |
| def generate_tbody(self, a_lines, b_lines): |
| self.a_line_no = 0 |
| self.b_line_no = 0 |
| self.a_lines_len = len(a_lines) |
| self.b_lines_len = len(b_lines) |
| matcher = difflib.SequenceMatcher(None, a_lines, b_lines) |
| output = [] |
| for tag, a_start, a_end, b_start, b_end in matcher.get_opcodes(): |
| output.append(self._format_chunk(tag, a_lines[a_start:a_end], b_lines[b_start:b_end])) |
| return ''.join(output) |
| |
| def _format_chunk(self, tag, a_chunk, b_chunk): |
| if tag == 'delete': |
| return self._format_delete(a_chunk) |
| if tag == 'insert': |
| return self._format_insert(b_chunk) |
| if tag == 'replace': |
| return self._format_delete(a_chunk) + self._format_insert(b_chunk) |
| assert tag == 'equal' |
| return self._format_equal(a_chunk) |
| |
| def _format_equal(self, common_chunk): |
| output = '' |
| if len(common_chunk) <= 7: |
| for line in common_chunk: |
| output += self._format_equal_line(line) |
| else: |
| # Do not show context lines at the beginning of the file. |
| if self.a_line_no == 0 and self.b_line_no == 0: |
| self.a_line_no += 3 |
| self.b_line_no += 3 |
| else: |
| for line in common_chunk[0:3]: |
| output += self._format_equal_line(line) |
| self.a_line_no += len(common_chunk) - 6 |
| self.b_line_no += len(common_chunk) - 6 |
| output += '<tr><td colspan=3>\n\n</tr>' |
| # Do not show context lines at the end of the file. |
| if self.a_line_no + 3 != self.a_lines_len or self.b_line_no + 3 != self.b_lines_len: |
| for line in common_chunk[len(common_chunk) - 3:len(common_chunk)]: |
| output += self._format_equal_line(line) |
| return output |
| |
| def _format_equal_line(self, line): |
| self.a_line_no += 1 |
| self.b_line_no += 1 |
| return '<tr><th>%d<th>%d<td>%s</tr>' % (self.a_line_no, self.b_line_no, cgi.escape(line)) |
| |
| def _format_insert(self, chunk): |
| output = '' |
| for line in chunk: |
| self.b_line_no += 1 |
| output += '<tr><th><th>%d<td class="add">%s</tr>' % (self.b_line_no, cgi.escape(line)) |
| return output |
| |
| def _format_delete(self, chunk): |
| output = '' |
| for line in chunk: |
| self.a_line_no += 1 |
| output += '<tr><th>%d<th><td class="del">%s</tr>' % (self.a_line_no, cgi.escape(line)) |
| return output |