1// Copyright 2012 Aryan Naraghi (aryan.naraghi@gmail.com) 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package difflib provides functionality for computing the difference 16// between two sequences of strings. 17package difflib 18 19import ( 20 "bytes" 21 "fmt" 22 "math" 23) 24 25// DeltaType describes the relationship of elements in two 26// sequences. The following table provides a summary: 27// 28// Constant Code Meaning 29// ---------- ------ --------------------------------------- 30// Common " " The element occurs in both sequences. 31// LeftOnly "-" The element is unique to sequence 1. 32// RightOnly "+" The element is unique to sequence 2. 33type DeltaType int 34 35const ( 36 Common DeltaType = iota 37 LeftOnly 38 RightOnly 39) 40 41// String returns a string representation for DeltaType. 42func (t DeltaType) String() string { 43 switch t { 44 case Common: 45 return " " 46 case LeftOnly: 47 return "-" 48 case RightOnly: 49 return "+" 50 } 51 return "?" 52} 53 54type DiffRecord struct { 55 Payload string 56 Delta DeltaType 57} 58 59// String returns a string representation of d. The string is a 60// concatenation of the delta type and the payload. 61func (d DiffRecord) String() string { 62 return fmt.Sprintf("%s %s", d.Delta, d.Payload) 63} 64 65// Diff returns the result of diffing the seq1 and seq2. 66func Diff(seq1, seq2 []string) (diff []DiffRecord) { 67 // Trims any common elements at the heads and tails of the 68 // sequences before running the diff algorithm. This is an 69 // optimization. 70 start, end := numEqualStartAndEndElements(seq1, seq2) 71 72 for _, content := range seq1[:start] { 73 diff = append(diff, DiffRecord{content, Common}) 74 } 75 76 diffRes := compute(seq1[start:len(seq1)-end], seq2[start:len(seq2)-end]) 77 diff = append(diff, diffRes...) 78 79 for _, content := range seq1[len(seq1)-end:] { 80 diff = append(diff, DiffRecord{content, Common}) 81 } 82 return 83} 84 85// HTMLDiff returns the results of diffing seq1 and seq2 as an HTML 86// string. The resulting HTML is a table without the opening and 87// closing table tags. Each table row represents a DiffRecord. The 88// first and last columns contain the "line numbers" for seq1 and 89// seq2, respectively (the function assumes that seq1 and seq2 90// represent the lines in a file). The second and third columns 91// contain the actual file contents. 92// 93// The cells that contain line numbers are decorated with the class 94// "line-num". The cells that contain deleted elements are decorated 95// with "deleted" and the cells that contain added elements are 96// decorated with "added". 97func HTMLDiff(seq1, seq2 []string) string { 98 buf := bytes.NewBufferString("") 99 i, j := 0, 0 100 for _, d := range Diff(seq1, seq2) { 101 buf.WriteString(`<tr><td class="line-num">`) 102 if d.Delta == Common || d.Delta == LeftOnly { 103 i++ 104 fmt.Fprintf(buf, "%d</td><td", i) 105 if d.Delta == LeftOnly { 106 fmt.Fprint(buf, ` class="deleted"`) 107 } 108 fmt.Fprintf(buf, "><pre>%s</pre>", d.Payload) 109 } else { 110 buf.WriteString("</td><td>") 111 } 112 buf.WriteString("</td><td") 113 if d.Delta == Common || d.Delta == RightOnly { 114 j++ 115 if d.Delta == RightOnly { 116 fmt.Fprint(buf, ` class="added"`) 117 } 118 fmt.Fprintf(buf, `><pre>%s</pre></td><td class="line-num">%d`, d.Payload, j) 119 } else { 120 buf.WriteString(`></td><td class="line-num">`) 121 } 122 buf.WriteString("</td></tr>\n") 123 } 124 return buf.String() 125} 126 127// numEqualStartAndEndElements returns the number of elements a and b 128// have in common from the beginning and from the end. If a and b are 129// equal, start will equal len(a) == len(b) and end will be zero. 130func numEqualStartAndEndElements(seq1, seq2 []string) (start, end int) { 131 for start < len(seq1) && start < len(seq2) && seq1[start] == seq2[start] { 132 start++ 133 } 134 i, j := len(seq1)-1, len(seq2)-1 135 for i > start && j > start && seq1[i] == seq2[j] { 136 i-- 137 j-- 138 end++ 139 } 140 return 141} 142 143// intMatrix returns a 2-dimensional slice of ints with the given 144// number of rows and columns. 145func intMatrix(rows, cols int) [][]int { 146 matrix := make([][]int, rows) 147 for i := 0; i < rows; i++ { 148 matrix[i] = make([]int, cols) 149 } 150 return matrix 151} 152 153// longestCommonSubsequenceMatrix returns the table that results from 154// applying the dynamic programming approach for finding the longest 155// common subsequence of seq1 and seq2. 156func longestCommonSubsequenceMatrix(seq1, seq2 []string) [][]int { 157 matrix := intMatrix(len(seq1)+1, len(seq2)+1) 158 for i := 1; i < len(matrix); i++ { 159 for j := 1; j < len(matrix[i]); j++ { 160 if seq1[len(seq1)-i] == seq2[len(seq2)-j] { 161 matrix[i][j] = matrix[i-1][j-1] + 1 162 } else { 163 matrix[i][j] = int(math.Max(float64(matrix[i-1][j]), 164 float64(matrix[i][j-1]))) 165 } 166 } 167 } 168 return matrix 169} 170 171// compute is the unexported helper for Diff that returns the results of 172// diffing left and right. 173func compute(seq1, seq2 []string) (diff []DiffRecord) { 174 matrix := longestCommonSubsequenceMatrix(seq1, seq2) 175 i, j := len(seq1), len(seq2) 176 for i > 0 || j > 0 { 177 if i > 0 && matrix[i][j] == matrix[i-1][j] { 178 diff = append(diff, DiffRecord{seq1[len(seq1)-i], LeftOnly}) 179 i-- 180 } else if j > 0 && matrix[i][j] == matrix[i][j-1] { 181 diff = append(diff, DiffRecord{seq2[len(seq2)-j], RightOnly}) 182 j-- 183 } else if i > 0 && j > 0 { 184 diff = append(diff, DiffRecord{seq1[len(seq1)-i], Common}) 185 i-- 186 j-- 187 } 188 } 189 return 190} 191