1<?php 2/* Copyright (C) 2016 Jean-François Ferry <hello@librethic.io> 3 * 4 * A class containing a diff implementation 5 * 6 * Created by Stephen Morley - http://stephenmorley.org/ - and released under the 7 * terms of the CC0 1.0 Universal legal code: 8 * 9 * http://creativecommons.org/publicdomain/zero/1.0/legalcode 10 */ 11 12 13/** 14 * A class containing functions for computing diffs and formatting the output. 15 */ 16class Diff 17{ 18 // define the constants 19 const UNMODIFIED = 0; 20 const DELETED = 1; 21 const INSERTED = 2; 22 23 /** 24 * Returns the diff for two strings. The return value is an array, each of 25 * whose values is an array containing two values: a line (or character, if 26 * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the 27 * line or character is in both strings), DIFF::DELETED (the line or character 28 * is only in the first string), and DIFF::INSERTED (the line or character is 29 * only in the second string). The parameters are: 30 * 31 * @param string $string1 First string 32 * @param string $string2 Second string 33 * @param string $compareCharacters true to compare characters, and false to compare lines; this optional parameter defaults to false 34 * @return array Array of diff 35 */ 36 public static function compare($string1, $string2, $compareCharacters = false) 37 { 38 // initialise the sequences and comparison start and end positions 39 $start = 0; 40 if ($compareCharacters) { 41 $sequence1 = $string1; 42 $sequence2 = $string2; 43 $end1 = strlen($string1) - 1; 44 $end2 = strlen($string2) - 1; 45 } else { 46 $sequence1 = preg_split('/\R/', $string1); 47 $sequence2 = preg_split('/\R/', $string2); 48 $end1 = count($sequence1) - 1; 49 $end2 = count($sequence2) - 1; 50 } 51 52 // skip any common prefix 53 while ($start <= $end1 && $start <= $end2 54 && $sequence1[$start] == $sequence2[$start]) { 55 $start++; 56 } 57 58 // skip any common suffix 59 while ($end1 >= $start && $end2 >= $start 60 && $sequence1[$end1] == $sequence2[$end2]) { 61 $end1--; 62 $end2--; 63 } 64 65 // compute the table of longest common subsequence lengths 66 $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2); 67 68 // generate the partial diff 69 $partialDiff = self::generatePartialDiff($table, $sequence1, $sequence2, $start); 70 71 // generate the full diff 72 $diff = array(); 73 for ($index = 0; $index < $start; $index++) { 74 $diff[] = array($sequence1[$index], self::UNMODIFIED); 75 } 76 while (count($partialDiff) > 0) { 77 $diff[] = array_pop($partialDiff); 78 } 79 80 $end2 = ($compareCharacters ? strlen($sequence1) : count($sequence1)); 81 for ($index = $end1 + 1; $index < $end2; $index++) { 82 $diff[] = array($sequence1[$index], self::UNMODIFIED); 83 } 84 85 // return the diff 86 return $diff; 87 } 88 89 /** 90 * Returns the diff for two files. The parameters are: 91 * 92 * @param string $file1 Path to the first file 93 * @param string $file2 Path to the second file 94 * @param boolean $compareCharacters true to compare characters, and false to compare lines; this optional parameter defaults to false 95 * @return array Array of diff 96 */ 97 public static function compareFiles( 98 $file1, 99 $file2, 100 $compareCharacters = false 101 ) { 102 103 // return the diff of the files 104 return self::compare( 105 file_get_contents($file1), 106 file_get_contents($file2), 107 $compareCharacters 108 ); 109 } 110 111 /** 112 * Returns the table of longest common subsequence lengths for the specified sequences. The parameters are: 113 * 114 * @param string $sequence1 the first sequence 115 * @param string $sequence2 the second sequence 116 * @param string $start the starting index 117 * @param string $end1 the ending index for the first sequence 118 * @param string $end2 the ending index for the second sequence 119 * @return array array of diff 120 */ 121 private static function computeTable($sequence1, $sequence2, $start, $end1, $end2) 122 { 123 // determine the lengths to be compared 124 $length1 = $end1 - $start + 1; 125 $length2 = $end2 - $start + 1; 126 127 // initialise the table 128 $table = array(array_fill(0, $length2 + 1, 0)); 129 130 // loop over the rows 131 for ($index1 = 1; $index1 <= $length1; $index1++) { 132 // create the new row 133 $table[$index1] = array(0); 134 135 // loop over the columns 136 for ($index2 = 1; $index2 <= $length2; $index2++) { 137 // store the longest common subsequence length 138 if ($sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1] 139 ) { 140 $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1; 141 } else { 142 $table[$index1][$index2] = max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]); 143 } 144 } 145 } 146 147 // return the table 148 return $table; 149 } 150 151 /** 152 * Returns the partial diff for the specificed sequences, in reverse order. 153 * The parameters are: 154 * 155 * @param string $table the table returned by the computeTable function 156 * @param string $sequence1 the first sequence 157 * @param string $sequence2 the second sequence 158 * @param string $start the starting index 159 * @return array array of diff 160 */ 161 private static function generatePartialDiff($table, $sequence1, $sequence2, $start) 162 { 163 // initialise the diff 164 $diff = array(); 165 166 // initialise the indices 167 $index1 = count($table) - 1; 168 $index2 = count($table[0]) - 1; 169 170 // loop until there are no items remaining in either sequence 171 while ($index1 > 0 || $index2 > 0) { 172 // check what has happened to the items at these indices 173 if ($index1 > 0 && $index2 > 0 174 && $sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1] 175 ) { 176 // update the diff and the indices 177 $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED); 178 $index1--; 179 $index2--; 180 } elseif ($index2 > 0 181 && $table[$index1][$index2] == $table[$index1][$index2 - 1] 182 ) { 183 // update the diff and the indices 184 $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED); 185 $index2--; 186 } else { 187 // update the diff and the indices 188 $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED); 189 $index1--; 190 } 191 } 192 193 // return the diff 194 return $diff; 195 } 196 197 /** 198 * Returns a diff as a string, where unmodified lines are prefixed by ' ', 199 * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The 200 * parameters are: 201 * 202 * @param array $diff the diff array 203 * @param string $separator the separator between lines; this optional parameter defaults to "\n" 204 * @return string String 205 */ 206 public static function toString($diff, $separator = "\n") 207 { 208 // initialise the string 209 $string = ''; 210 211 // loop over the lines in the diff 212 foreach ($diff as $line) { 213 // extend the string with the line 214 switch ($line[1]) { 215 case self::UNMODIFIED: 216 $string .= ' '.$line[0]; 217 break; 218 case self::DELETED: 219 $string .= '- '.$line[0]; 220 break; 221 case self::INSERTED: 222 $string .= '+ '.$line[0]; 223 break; 224 } 225 226 // extend the string with the separator 227 $string .= $separator; 228 } 229 230 // return the string 231 return $string; 232 } 233 234 /** 235 * Returns a diff as an HTML string, where unmodified lines are contained 236 * within 'span' elements, deletions are contained within 'del' elements, and 237 * insertions are contained within 'ins' elements. The parameters are: 238 * 239 * @param string $diff the diff array 240 * @param string $separator the separator between lines; this optional parameter defaults to '<br>' 241 * @return string HTML string 242 */ 243 public static function toHTML($diff, $separator = '<br>') 244 { 245 // initialise the HTML 246 $html = ''; 247 248 // loop over the lines in the diff 249 foreach ($diff as $line) { 250 // extend the HTML with the line 251 switch ($line[1]) { 252 case self::UNMODIFIED: 253 $element = 'span'; 254 break; 255 case self::DELETED: 256 $element = 'del'; 257 break; 258 case self::INSERTED: 259 $element = 'ins'; 260 break; 261 } 262 $html .= 263 '<'.$element.'>' 264 . htmlspecialchars($line[0]) 265 . '</'.$element.'>'; 266 267 // extend the HTML with the separator 268 $html .= $separator; 269 } 270 271 // return the HTML 272 return $html; 273 } 274 275 /** 276 * Returns a diff as an HTML table. The parameters are: 277 * 278 * @param string $diff the diff array 279 * @param string $indentation indentation to add to every line of the generated HTML; this optional parameter defaults to '' 280 * @param string $separator the separator between lines; this optional parameter defaults to '<br>' 281 * @return string HTML string 282 */ 283 public static function toTable($diff, $indentation = '', $separator = '<br>') 284 { 285 // initialise the HTML 286 $html = $indentation."<table class=\"diff\">\n"; 287 288 // loop over the lines in the diff 289 $index = 0; 290 while ($index < count($diff)) { 291 // determine the line type 292 switch ($diff[$index][1]) { 293 // display the content on the left and right 294 case self::UNMODIFIED: 295 $leftCell = self::getCellContent( 296 $diff, 297 $indentation, 298 $separator, 299 $index, 300 self::UNMODIFIED 301 ); 302 $rightCell = $leftCell; 303 break; 304 305 // display the deleted on the left and inserted content on the right 306 case self::DELETED: 307 $leftCell = self::getCellContent( 308 $diff, 309 $indentation, 310 $separator, 311 $index, 312 self::DELETED 313 ); 314 $rightCell = self::getCellContent( 315 $diff, 316 $indentation, 317 $separator, 318 $index, 319 self::INSERTED 320 ); 321 break; 322 323 // display the inserted content on the right 324 case self::INSERTED: 325 $leftCell = ''; 326 $rightCell = self::getCellContent( 327 $diff, 328 $indentation, 329 $separator, 330 $index, 331 self::INSERTED 332 ); 333 break; 334 } 335 336 // extend the HTML with the new row 337 $html .= 338 $indentation 339 . " <tr>\n" 340 . $indentation 341 . ' <td class="diff' 342 . ($leftCell == $rightCell 343 ? 'Unmodified' 344 : ($leftCell == '' ? 'Blank' : 'Deleted')) 345 . '">' 346 . $leftCell 347 . "</td>\n" 348 . $indentation 349 . ' <td class="diff' 350 . ($leftCell == $rightCell 351 ? 'Unmodified' 352 : ($rightCell == '' ? 'Blank' : 'Inserted')) 353 . '">' 354 . $rightCell 355 . "</td>\n" 356 . $indentation 357 . " </tr>\n"; 358 } 359 360 // return the HTML 361 return $html.$indentation."</table>\n"; 362 } 363 364 /** 365 * Returns the content of the cell, for use in the toTable function. The 366 * parameters are: 367 * 368 * @param string $diff the diff array 369 * @param string $indentation indentation to add to every line of the generated HTML 370 * @param string $separator the separator between lines 371 * @param string $index the current index, passes by reference 372 * @param string $type the type of line 373 * @return string HTML string 374 */ 375 private static function getCellContent($diff, $indentation, $separator, &$index, $type) 376 { 377 // initialise the HTML 378 $html = ''; 379 380 // loop over the matching lines, adding them to the HTML 381 while ($index < count($diff) && $diff[$index][1] == $type) { 382 $html .= 383 '<span>' 384 . htmlspecialchars($diff[$index][0]) 385 . '</span>' 386 . $separator; 387 $index++; 388 } 389 390 // return the HTML 391 return $html; 392 } 393} 394