1// Copyright 2012 Aryan Naraghi (aryan.naraghi@gmail.com)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package difflib provides functionality for computing the difference
16// between two sequences of strings.
17package difflib
18
19import (
20	"bytes"
21	"fmt"
22	"math"
23)
24
25// DeltaType describes the relationship of elements in two
26// sequences. The following table provides a summary:
27//
28//    Constant    Code   Meaning
29//   ----------  ------ ---------------------------------------
30//    Common      " "    The element occurs in both sequences.
31//    LeftOnly    "-"    The element is unique to sequence 1.
32//    RightOnly   "+"    The element is unique to sequence 2.
33type DeltaType int
34
35const (
36	Common DeltaType = iota
37	LeftOnly
38	RightOnly
39)
40
41// String returns a string representation for DeltaType.
42func (t DeltaType) String() string {
43	switch t {
44	case Common:
45		return " "
46	case LeftOnly:
47		return "-"
48	case RightOnly:
49		return "+"
50	}
51	return "?"
52}
53
54type DiffRecord struct {
55	Payload string
56	Delta   DeltaType
57}
58
59// String returns a string representation of d. The string is a
60// concatenation of the delta type and the payload.
61func (d DiffRecord) String() string {
62	return fmt.Sprintf("%s %s", d.Delta, d.Payload)
63}
64
65// Diff returns the result of diffing the seq1 and seq2.
66func Diff(seq1, seq2 []string) (diff []DiffRecord) {
67	// Trims any common elements at the heads and tails of the
68	// sequences before running the diff algorithm. This is an
69	// optimization.
70	start, end := numEqualStartAndEndElements(seq1, seq2)
71
72	for _, content := range seq1[:start] {
73		diff = append(diff, DiffRecord{content, Common})
74	}
75
76	diffRes := compute(seq1[start:len(seq1)-end], seq2[start:len(seq2)-end])
77	diff = append(diff, diffRes...)
78
79	for _, content := range seq1[len(seq1)-end:] {
80		diff = append(diff, DiffRecord{content, Common})
81	}
82	return
83}
84
85// HTMLDiff returns the results of diffing seq1 and seq2 as an HTML
86// string. The resulting HTML is a table without the opening and
87// closing table tags. Each table row represents a DiffRecord. The
88// first and last columns contain the "line numbers" for seq1 and
89// seq2, respectively (the function assumes that seq1 and seq2
90// represent the lines in a file). The second and third columns
91// contain the actual file contents.
92//
93// The cells that contain line numbers are decorated with the class
94// "line-num". The cells that contain deleted elements are decorated
95// with "deleted" and the cells that contain added elements are
96// decorated with "added".
97func HTMLDiff(seq1, seq2 []string) string {
98	buf := bytes.NewBufferString("")
99	i, j := 0, 0
100	for _, d := range Diff(seq1, seq2) {
101		buf.WriteString(`<tr><td class="line-num">`)
102		if d.Delta == Common || d.Delta == LeftOnly {
103			i++
104			fmt.Fprintf(buf, "%d</td><td", i)
105			if d.Delta == LeftOnly {
106				fmt.Fprint(buf, ` class="deleted"`)
107			}
108			fmt.Fprintf(buf, "><pre>%s</pre>", d.Payload)
109		} else {
110			buf.WriteString("</td><td>")
111		}
112		buf.WriteString("</td><td")
113		if d.Delta == Common || d.Delta == RightOnly {
114			j++
115			if d.Delta == RightOnly {
116				fmt.Fprint(buf, ` class="added"`)
117			}
118			fmt.Fprintf(buf, `><pre>%s</pre></td><td class="line-num">%d`, d.Payload, j)
119		} else {
120			buf.WriteString(`></td><td class="line-num">`)
121		}
122		buf.WriteString("</td></tr>\n")
123	}
124	return buf.String()
125}
126
127// numEqualStartAndEndElements returns the number of elements a and b
128// have in common from the beginning and from the end. If a and b are
129// equal, start will equal len(a) == len(b) and end will be zero.
130func numEqualStartAndEndElements(seq1, seq2 []string) (start, end int) {
131	for start < len(seq1) && start < len(seq2) && seq1[start] == seq2[start] {
132		start++
133	}
134	i, j := len(seq1)-1, len(seq2)-1
135	for i > start && j > start && seq1[i] == seq2[j] {
136		i--
137		j--
138		end++
139	}
140	return
141}
142
143// intMatrix returns a 2-dimensional slice of ints with the given
144// number of rows and columns.
145func intMatrix(rows, cols int) [][]int {
146	matrix := make([][]int, rows)
147	for i := 0; i < rows; i++ {
148		matrix[i] = make([]int, cols)
149	}
150	return matrix
151}
152
153// longestCommonSubsequenceMatrix returns the table that results from
154// applying the dynamic programming approach for finding the longest
155// common subsequence of seq1 and seq2.
156func longestCommonSubsequenceMatrix(seq1, seq2 []string) [][]int {
157	matrix := intMatrix(len(seq1)+1, len(seq2)+1)
158	for i := 1; i < len(matrix); i++ {
159		for j := 1; j < len(matrix[i]); j++ {
160			if seq1[len(seq1)-i] == seq2[len(seq2)-j] {
161				matrix[i][j] = matrix[i-1][j-1] + 1
162			} else {
163				matrix[i][j] = int(math.Max(float64(matrix[i-1][j]),
164					float64(matrix[i][j-1])))
165			}
166		}
167	}
168	return matrix
169}
170
171// compute is the unexported helper for Diff that returns the results of
172// diffing left and right.
173func compute(seq1, seq2 []string) (diff []DiffRecord) {
174	matrix := longestCommonSubsequenceMatrix(seq1, seq2)
175	i, j := len(seq1), len(seq2)
176	for i > 0 || j > 0 {
177		if i > 0 && matrix[i][j] == matrix[i-1][j] {
178			diff = append(diff, DiffRecord{seq1[len(seq1)-i], LeftOnly})
179			i--
180		} else if j > 0 && matrix[i][j] == matrix[i][j-1] {
181			diff = append(diff, DiffRecord{seq2[len(seq2)-j], RightOnly})
182			j--
183		} else if i > 0 && j > 0 {
184			diff = append(diff, DiffRecord{seq1[len(seq1)-i], Common})
185			i--
186			j--
187		}
188	}
189	return
190}
191