1<?php
2/* Copyright (C) 2016      Jean-François Ferry  <hello@librethic.io>
3 *
4 * A class containing a diff implementation
5 *
6 * Created by Stephen Morley - http://stephenmorley.org/ - and released under the
7 * terms of the CC0 1.0 Universal legal code:
8 *
9 * http://creativecommons.org/publicdomain/zero/1.0/legalcode
10 */
11
12
13/**
14 * A class containing functions for computing diffs and formatting the output.
15 */
16class Diff
17{
18	// define the constants
19	const UNMODIFIED = 0;
20	const DELETED = 1;
21	const INSERTED = 2;
22
23	/**
24	 * Returns the diff for two strings. The return value is an array, each of
25	 * whose values is an array containing two values: a line (or character, if
26	 * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
27	 * line or character is in both strings), DIFF::DELETED (the line or character
28	 * is only in the first string), and DIFF::INSERTED (the line or character is
29	 * only in the second string). The parameters are:
30	 *
31	 * @param	string	$string1            First string
32	 * @param	string	$string2            Second string
33	 * @param	string	$compareCharacters  true to compare characters, and false to compare lines; this optional parameter defaults to false
34	 * @return	array						Array of diff
35	 */
36	public static function compare($string1, $string2, $compareCharacters = false)
37	{
38		// initialise the sequences and comparison start and end positions
39		$start = 0;
40		if ($compareCharacters) {
41			$sequence1 = $string1;
42			$sequence2 = $string2;
43			$end1 = strlen($string1) - 1;
44			$end2 = strlen($string2) - 1;
45		} else {
46			$sequence1 = preg_split('/\R/', $string1);
47			$sequence2 = preg_split('/\R/', $string2);
48			$end1 = count($sequence1) - 1;
49			$end2 = count($sequence2) - 1;
50		}
51
52		// skip any common prefix
53		while ($start <= $end1 && $start <= $end2
54			&& $sequence1[$start] == $sequence2[$start]) {
55			$start++;
56		}
57
58		// skip any common suffix
59		while ($end1 >= $start && $end2 >= $start
60			&& $sequence1[$end1] == $sequence2[$end2]) {
61			$end1--;
62			$end2--;
63		}
64
65		// compute the table of longest common subsequence lengths
66		$table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);
67
68		// generate the partial diff
69		$partialDiff = self::generatePartialDiff($table, $sequence1, $sequence2, $start);
70
71		// generate the full diff
72		$diff = array();
73		for ($index = 0; $index < $start; $index++) {
74			$diff[] = array($sequence1[$index], self::UNMODIFIED);
75		}
76		while (count($partialDiff) > 0) {
77			$diff[] = array_pop($partialDiff);
78		}
79
80		$end2 = ($compareCharacters ? strlen($sequence1) : count($sequence1));
81		for ($index = $end1 + 1; $index < $end2; $index++) {
82			$diff[] = array($sequence1[$index], self::UNMODIFIED);
83		}
84
85		// return the diff
86		return $diff;
87	}
88
89	/**
90	 * Returns the diff for two files. The parameters are:
91	 *
92	 * @param	string	$file1              Path to the first file
93	 * @param	string	$file2              Path to the second file
94	 * @param	boolean	$compareCharacters  true to compare characters, and false to compare lines; this optional parameter defaults to false
95	 * @return	array						Array of diff
96	 */
97	public static function compareFiles(
98		$file1,
99		$file2,
100		$compareCharacters = false
101	) {
102
103		// return the diff of the files
104		return self::compare(
105			file_get_contents($file1),
106			file_get_contents($file2),
107			$compareCharacters
108		);
109	}
110
111	/**
112	 * Returns the table of longest common subsequence lengths for the specified sequences. The parameters are:
113	 *
114	 * @param	string	$sequence1 	the first sequence
115	 * @param	string	$sequence2 	the second sequence
116	 * @param	string	$start     	the starting index
117	 * @param	string	$end1      	the ending index for the first sequence
118	 * @param	string	$end2      	the ending index for the second sequence
119	 * @return	array				array of diff
120	 */
121	private static function computeTable($sequence1, $sequence2, $start, $end1, $end2)
122	{
123		// determine the lengths to be compared
124		$length1 = $end1 - $start + 1;
125		$length2 = $end2 - $start + 1;
126
127		// initialise the table
128		$table = array(array_fill(0, $length2 + 1, 0));
129
130		// loop over the rows
131		for ($index1 = 1; $index1 <= $length1; $index1++) {
132			// create the new row
133			$table[$index1] = array(0);
134
135			// loop over the columns
136			for ($index2 = 1; $index2 <= $length2; $index2++) {
137				// store the longest common subsequence length
138				if ($sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]
139				) {
140					$table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
141				} else {
142					$table[$index1][$index2] = max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
143				}
144			}
145		}
146
147		// return the table
148		return $table;
149	}
150
151	/**
152	 * Returns the partial diff for the specificed sequences, in reverse order.
153	 * The parameters are:
154	 *
155	 * @param	string	$table     	the table returned by the computeTable function
156	 * @param	string	$sequence1 	the first sequence
157	 * @param	string	$sequence2 	the second sequence
158	 * @param	string	$start     	the starting index
159	 * @return	array				array of diff
160	 */
161	private static function generatePartialDiff($table, $sequence1, $sequence2, $start)
162	{
163		//  initialise the diff
164		$diff = array();
165
166		// initialise the indices
167		$index1 = count($table) - 1;
168		$index2 = count($table[0]) - 1;
169
170		// loop until there are no items remaining in either sequence
171		while ($index1 > 0 || $index2 > 0) {
172			// check what has happened to the items at these indices
173			if ($index1 > 0 && $index2 > 0
174				&& $sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]
175			) {
176				// update the diff and the indices
177				$diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
178				$index1--;
179				$index2--;
180			} elseif ($index2 > 0
181				&& $table[$index1][$index2] == $table[$index1][$index2 - 1]
182			) {
183				// update the diff and the indices
184				$diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
185				$index2--;
186			} else {
187				// update the diff and the indices
188				$diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
189				$index1--;
190			}
191		}
192
193		// return the diff
194		return $diff;
195	}
196
197	/**
198	 * Returns a diff as a string, where unmodified lines are prefixed by '  ',
199	 * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
200	 * parameters are:
201	 *
202	 * @param	array	$diff      	the diff array
203	 * @param	string	$separator 	the separator between lines; this optional parameter defaults to "\n"
204	 * @return	string				String
205	 */
206	public static function toString($diff, $separator = "\n")
207	{
208		// initialise the string
209		$string = '';
210
211		// loop over the lines in the diff
212		foreach ($diff as $line) {
213			// extend the string with the line
214			switch ($line[1]) {
215				case self::UNMODIFIED:
216					$string .= '  '.$line[0];
217					break;
218				case self::DELETED:
219					$string .= '- '.$line[0];
220					break;
221				case self::INSERTED:
222					$string .= '+ '.$line[0];
223					break;
224			}
225
226			// extend the string with the separator
227			$string .= $separator;
228		}
229
230		// return the string
231		return $string;
232	}
233
234	/**
235	 * Returns a diff as an HTML string, where unmodified lines are contained
236	 * within 'span' elements, deletions are contained within 'del' elements, and
237	 * insertions are contained within 'ins' elements. The parameters are:
238	 *
239	 * @param	string	$diff      	the diff array
240	 * @param	string	$separator 	the separator between lines; this optional parameter defaults to '<br>'
241	 * @return	string				HTML string
242	 */
243	public static function toHTML($diff, $separator = '<br>')
244	{
245		// initialise the HTML
246		$html = '';
247
248		// loop over the lines in the diff
249		foreach ($diff as $line) {
250			// extend the HTML with the line
251			switch ($line[1]) {
252				case self::UNMODIFIED:
253					$element = 'span';
254					break;
255				case self::DELETED:
256					$element = 'del';
257					break;
258				case self::INSERTED:
259					$element = 'ins';
260					break;
261			}
262			$html .=
263			'<'.$element.'>'
264			. htmlspecialchars($line[0])
265				. '</'.$element.'>';
266
267			// extend the HTML with the separator
268			$html .= $separator;
269		}
270
271		// return the HTML
272		return $html;
273	}
274
275	/**
276	 * Returns a diff as an HTML table. The parameters are:
277	 *
278	 * @param	string	$diff        	the diff array
279	 * @param	string	$indentation 	indentation to add to every line of the generated HTML; this optional parameter defaults to ''
280	 * @param	string	$separator   	the separator between lines; this optional parameter defaults to '<br>'
281	 * @return	string					HTML string
282	 */
283	public static function toTable($diff, $indentation = '', $separator = '<br>')
284	{
285		// initialise the HTML
286		$html = $indentation."<table class=\"diff\">\n";
287
288		// loop over the lines in the diff
289		$index = 0;
290		while ($index < count($diff)) {
291			// determine the line type
292			switch ($diff[$index][1]) {
293				// display the content on the left and right
294				case self::UNMODIFIED:
295					$leftCell = self::getCellContent(
296						$diff,
297						$indentation,
298						$separator,
299						$index,
300						self::UNMODIFIED
301					);
302					$rightCell = $leftCell;
303					break;
304
305				// display the deleted on the left and inserted content on the right
306				case self::DELETED:
307					$leftCell = self::getCellContent(
308						$diff,
309						$indentation,
310						$separator,
311						$index,
312						self::DELETED
313					);
314					$rightCell = self::getCellContent(
315						$diff,
316						$indentation,
317						$separator,
318						$index,
319						self::INSERTED
320					);
321					break;
322
323				// display the inserted content on the right
324				case self::INSERTED:
325					$leftCell = '';
326					$rightCell = self::getCellContent(
327						$diff,
328						$indentation,
329						$separator,
330						$index,
331						self::INSERTED
332					);
333					break;
334			}
335
336			// extend the HTML with the new row
337			$html .=
338				$indentation
339				. "  <tr>\n"
340				. $indentation
341				. '    <td class="diff'
342				. ($leftCell == $rightCell
343				? 'Unmodified'
344				: ($leftCell == '' ? 'Blank' : 'Deleted'))
345				. '">'
346				. $leftCell
347				. "</td>\n"
348				. $indentation
349				. '    <td class="diff'
350				. ($leftCell == $rightCell
351				? 'Unmodified'
352				: ($rightCell == '' ? 'Blank' : 'Inserted'))
353				. '">'
354				. $rightCell
355				. "</td>\n"
356				. $indentation
357				. "  </tr>\n";
358		}
359
360		// return the HTML
361		return $html.$indentation."</table>\n";
362	}
363
364	/**
365	 * Returns the content of the cell, for use in the toTable function. The
366	 * parameters are:
367	 *
368	 * @param	string	$diff        	the diff array
369	 * @param	string	$indentation 	indentation to add to every line of the generated HTML
370	 * @param	string	$separator   	the separator between lines
371	 * @param	string	$index       	the current index, passes by reference
372	 * @param	string	$type        	the type of line
373	 * @return	string					HTML string
374	 */
375	private static function getCellContent($diff, $indentation, $separator, &$index, $type)
376	{
377		// initialise the HTML
378		$html = '';
379
380		// loop over the matching lines, adding them to the HTML
381		while ($index < count($diff) && $diff[$index][1] == $type) {
382			$html .=
383			'<span>'
384			. htmlspecialchars($diff[$index][0])
385				. '</span>'
386				. $separator;
387			$index++;
388		}
389
390		// return the HTML
391		return $html;
392	}
393}
394