1<?php 2 3namespace dokuwiki\ChangeLog; 4 5/** 6 * methods for handling of changelog of pages or media files 7 */ 8abstract class ChangeLog 9{ 10 11 /** @var string */ 12 protected $id; 13 /** @var int */ 14 protected $chunk_size; 15 /** @var array */ 16 protected $cache; 17 18 /** 19 * Constructor 20 * 21 * @param string $id page id 22 * @param int $chunk_size maximum block size read from file 23 */ 24 public function __construct($id, $chunk_size = 8192) 25 { 26 global $cache_revinfo; 27 28 $this->cache =& $cache_revinfo; 29 if (!isset($this->cache[$id])) { 30 $this->cache[$id] = array(); 31 } 32 33 $this->id = $id; 34 $this->setChunkSize($chunk_size); 35 36 } 37 38 /** 39 * Set chunk size for file reading 40 * Chunk size zero let read whole file at once 41 * 42 * @param int $chunk_size maximum block size read from file 43 */ 44 public function setChunkSize($chunk_size) 45 { 46 if (!is_numeric($chunk_size)) $chunk_size = 0; 47 48 $this->chunk_size = (int)max($chunk_size, 0); 49 } 50 51 /** 52 * Returns path to changelog 53 * 54 * @return string path to file 55 */ 56 abstract protected function getChangelogFilename(); 57 58 /** 59 * Returns path to current page/media 60 * 61 * @return string path to file 62 */ 63 abstract protected function getFilename(); 64 65 /** 66 * Get the changelog information for a specific page id and revision (timestamp) 67 * 68 * Adjacent changelog lines are optimistically parsed and cached to speed up 69 * consecutive calls to getRevisionInfo. For large changelog files, only the chunk 70 * containing the requested changelog line is read. 71 * 72 * @param int $rev revision timestamp 73 * @return bool|array false or array with entries: 74 * - date: unix timestamp 75 * - ip: IPv4 address (127.0.0.1) 76 * - type: log line type 77 * - id: page id 78 * - user: user name 79 * - sum: edit summary (or action reason) 80 * - extra: extra data (varies by line type) 81 * 82 * @author Ben Coburn <btcoburn@silicodon.net> 83 * @author Kate Arzamastseva <pshns@ukr.net> 84 */ 85 public function getRevisionInfo($rev) 86 { 87 $rev = max($rev, 0); 88 89 // check if it's already in the memory cache 90 if (isset($this->cache[$this->id]) && isset($this->cache[$this->id][$rev])) { 91 return $this->cache[$this->id][$rev]; 92 } 93 94 //read lines from changelog 95 list($fp, $lines) = $this->readloglines($rev); 96 if ($fp) { 97 fclose($fp); 98 } 99 if (empty($lines)) return false; 100 101 // parse and cache changelog lines 102 foreach ($lines as $value) { 103 $tmp = parseChangelogLine($value); 104 if ($tmp !== false) { 105 $this->cache[$this->id][$tmp['date']] = $tmp; 106 } 107 } 108 if (!isset($this->cache[$this->id][$rev])) { 109 return false; 110 } 111 return $this->cache[$this->id][$rev]; 112 } 113 114 /** 115 * Return a list of page revisions numbers 116 * 117 * Does not guarantee that the revision exists in the attic, 118 * only that a line with the date exists in the changelog. 119 * By default the current revision is skipped. 120 * 121 * The current revision is automatically skipped when the page exists. 122 * See $INFO['meta']['last_change'] for the current revision. 123 * A negative $first let read the current revision too. 124 * 125 * For efficiency, the log lines are parsed and cached for later 126 * calls to getRevisionInfo. Large changelog files are read 127 * backwards in chunks until the requested number of changelog 128 * lines are recieved. 129 * 130 * @param int $first skip the first n changelog lines 131 * @param int $num number of revisions to return 132 * @return array with the revision timestamps 133 * 134 * @author Ben Coburn <btcoburn@silicodon.net> 135 * @author Kate Arzamastseva <pshns@ukr.net> 136 */ 137 public function getRevisions($first, $num) 138 { 139 $revs = array(); 140 $lines = array(); 141 $count = 0; 142 143 $num = max($num, 0); 144 if ($num == 0) { 145 return $revs; 146 } 147 148 if ($first < 0) { 149 $first = 0; 150 } else { 151 if (file_exists($this->getFilename())) { 152 // skip current revision if the page exists 153 $first = max($first + 1, 0); 154 } 155 } 156 157 $file = $this->getChangelogFilename(); 158 159 if (!file_exists($file)) { 160 return $revs; 161 } 162 if (filesize($file) < $this->chunk_size || $this->chunk_size == 0) { 163 // read whole file 164 $lines = file($file); 165 if ($lines === false) { 166 return $revs; 167 } 168 } else { 169 // read chunks backwards 170 $fp = fopen($file, 'rb'); // "file pointer" 171 if ($fp === false) { 172 return $revs; 173 } 174 fseek($fp, 0, SEEK_END); 175 $tail = ftell($fp); 176 177 // chunk backwards 178 $finger = max($tail - $this->chunk_size, 0); 179 while ($count < $num + $first) { 180 $nl = $this->getNewlinepointer($fp, $finger); 181 182 // was the chunk big enough? if not, take another bite 183 if ($nl > 0 && $tail <= $nl) { 184 $finger = max($finger - $this->chunk_size, 0); 185 continue; 186 } else { 187 $finger = $nl; 188 } 189 190 // read chunk 191 $chunk = ''; 192 $read_size = max($tail - $finger, 0); // found chunk size 193 $got = 0; 194 while ($got < $read_size && !feof($fp)) { 195 $tmp = @fread($fp, max(min($this->chunk_size, $read_size - $got), 0)); 196 if ($tmp === false) { 197 break; 198 } //error state 199 $got += strlen($tmp); 200 $chunk .= $tmp; 201 } 202 $tmp = explode("\n", $chunk); 203 array_pop($tmp); // remove trailing newline 204 205 // combine with previous chunk 206 $count += count($tmp); 207 $lines = array_merge($tmp, $lines); 208 209 // next chunk 210 if ($finger == 0) { 211 break; 212 } else { // already read all the lines 213 $tail = $finger; 214 $finger = max($tail - $this->chunk_size, 0); 215 } 216 } 217 fclose($fp); 218 } 219 220 // skip parsing extra lines 221 $num = max(min(count($lines) - $first, $num), 0); 222 if ($first > 0 && $num > 0) { 223 $lines = array_slice($lines, max(count($lines) - $first - $num, 0), $num); 224 } else { 225 if ($first > 0 && $num == 0) { 226 $lines = array_slice($lines, 0, max(count($lines) - $first, 0)); 227 } elseif ($first == 0 && $num > 0) { 228 $lines = array_slice($lines, max(count($lines) - $num, 0)); 229 } 230 } 231 232 // handle lines in reverse order 233 for ($i = count($lines) - 1; $i >= 0; $i--) { 234 $tmp = parseChangelogLine($lines[$i]); 235 if ($tmp !== false) { 236 $this->cache[$this->id][$tmp['date']] = $tmp; 237 $revs[] = $tmp['date']; 238 } 239 } 240 241 return $revs; 242 } 243 244 /** 245 * Get the nth revision left or right handside for a specific page id and revision (timestamp) 246 * 247 * For large changelog files, only the chunk containing the 248 * reference revision $rev is read and sometimes a next chunck. 249 * 250 * Adjacent changelog lines are optimistically parsed and cached to speed up 251 * consecutive calls to getRevisionInfo. 252 * 253 * @param int $rev revision timestamp used as startdate (doesn't need to be revisionnumber) 254 * @param int $direction give position of returned revision with respect to $rev; positive=next, negative=prev 255 * @return bool|int 256 * timestamp of the requested revision 257 * otherwise false 258 */ 259 public function getRelativeRevision($rev, $direction) 260 { 261 $rev = max($rev, 0); 262 $direction = (int)$direction; 263 264 //no direction given or last rev, so no follow-up 265 if (!$direction || ($direction > 0 && $this->isCurrentRevision($rev))) { 266 return false; 267 } 268 269 //get lines from changelog 270 list($fp, $lines, $head, $tail, $eof) = $this->readloglines($rev); 271 if (empty($lines)) return false; 272 273 // look for revisions later/earlier then $rev, when founded count till the wanted revision is reached 274 // also parse and cache changelog lines for getRevisionInfo(). 275 $revcounter = 0; 276 $relativerev = false; 277 $checkotherchunck = true; //always runs once 278 while (!$relativerev && $checkotherchunck) { 279 $tmp = array(); 280 //parse in normal or reverse order 281 $count = count($lines); 282 if ($direction > 0) { 283 $start = 0; 284 $step = 1; 285 } else { 286 $start = $count - 1; 287 $step = -1; 288 } 289 for ($i = $start; $i >= 0 && $i < $count; $i = $i + $step) { 290 $tmp = parseChangelogLine($lines[$i]); 291 if ($tmp !== false) { 292 $this->cache[$this->id][$tmp['date']] = $tmp; 293 //look for revs older/earlier then reference $rev and select $direction-th one 294 if (($direction > 0 && $tmp['date'] > $rev) || ($direction < 0 && $tmp['date'] < $rev)) { 295 $revcounter++; 296 if ($revcounter == abs($direction)) { 297 $relativerev = $tmp['date']; 298 } 299 } 300 } 301 } 302 303 //true when $rev is found, but not the wanted follow-up. 304 $checkotherchunck = $fp 305 && ($tmp['date'] == $rev || ($revcounter > 0 && !$relativerev)) 306 && !(($tail == $eof && $direction > 0) || ($head == 0 && $direction < 0)); 307 308 if ($checkotherchunck) { 309 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, $direction); 310 311 if (empty($lines)) break; 312 } 313 } 314 if ($fp) { 315 fclose($fp); 316 } 317 318 return $relativerev; 319 } 320 321 /** 322 * Returns revisions around rev1 and rev2 323 * When available it returns $max entries for each revision 324 * 325 * @param int $rev1 oldest revision timestamp 326 * @param int $rev2 newest revision timestamp (0 looks up last revision) 327 * @param int $max maximum number of revisions returned 328 * @return array with two arrays with revisions surrounding rev1 respectively rev2 329 */ 330 public function getRevisionsAround($rev1, $rev2, $max = 50) 331 { 332 $max = floor(abs($max) / 2) * 2 + 1; 333 $rev1 = max($rev1, 0); 334 $rev2 = max($rev2, 0); 335 336 if ($rev2) { 337 if ($rev2 < $rev1) { 338 $rev = $rev2; 339 $rev2 = $rev1; 340 $rev1 = $rev; 341 } 342 } else { 343 //empty right side means a removed page. Look up last revision. 344 $revs = $this->getRevisions(-1, 1); 345 $rev2 = $revs[0]; 346 } 347 //collect revisions around rev2 348 list($revs2, $allrevs, $fp, $lines, $head, $tail) = $this->retrieveRevisionsAround($rev2, $max); 349 350 if (empty($revs2)) return array(array(), array()); 351 352 //collect revisions around rev1 353 $index = array_search($rev1, $allrevs); 354 if ($index === false) { 355 //no overlapping revisions 356 list($revs1, , , , ,) = $this->retrieveRevisionsAround($rev1, $max); 357 if (empty($revs1)) $revs1 = array(); 358 } else { 359 //revisions overlaps, reuse revisions around rev2 360 $revs1 = $allrevs; 361 while ($head > 0) { 362 for ($i = count($lines) - 1; $i >= 0; $i--) { 363 $tmp = parseChangelogLine($lines[$i]); 364 if ($tmp !== false) { 365 $this->cache[$this->id][$tmp['date']] = $tmp; 366 $revs1[] = $tmp['date']; 367 $index++; 368 369 if ($index > floor($max / 2)) break 2; 370 } 371 } 372 373 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1); 374 } 375 sort($revs1); 376 //return wanted selection 377 $revs1 = array_slice($revs1, max($index - floor($max / 2), 0), $max); 378 } 379 380 return array(array_reverse($revs1), array_reverse($revs2)); 381 } 382 383 384 /** 385 * Checks if the ID has old revisons 386 * @return boolean 387 */ 388 public function hasRevisions() { 389 $file = $this->getChangelogFilename(); 390 return file_exists($file); 391 } 392 393 /** 394 * Returns lines from changelog. 395 * If file larger than $chuncksize, only chunck is read that could contain $rev. 396 * 397 * @param int $rev revision timestamp 398 * @return array|false 399 * if success returns array(fp, array(changeloglines), $head, $tail, $eof) 400 * where fp only defined for chuck reading, needs closing. 401 * otherwise false 402 */ 403 protected function readloglines($rev) 404 { 405 $file = $this->getChangelogFilename(); 406 407 if (!file_exists($file)) { 408 return false; 409 } 410 411 $fp = null; 412 $head = 0; 413 $tail = 0; 414 $eof = 0; 415 416 if (filesize($file) < $this->chunk_size || $this->chunk_size == 0) { 417 // read whole file 418 $lines = file($file); 419 if ($lines === false) { 420 return false; 421 } 422 } else { 423 // read by chunk 424 $fp = fopen($file, 'rb'); // "file pointer" 425 if ($fp === false) { 426 return false; 427 } 428 $head = 0; 429 fseek($fp, 0, SEEK_END); 430 $eof = ftell($fp); 431 $tail = $eof; 432 433 // find chunk 434 while ($tail - $head > $this->chunk_size) { 435 $finger = $head + floor(($tail - $head) / 2.0); 436 $finger = $this->getNewlinepointer($fp, $finger); 437 $tmp = fgets($fp); 438 if ($finger == $head || $finger == $tail) { 439 break; 440 } 441 $tmp = parseChangelogLine($tmp); 442 $finger_rev = $tmp['date']; 443 444 if ($finger_rev > $rev) { 445 $tail = $finger; 446 } else { 447 $head = $finger; 448 } 449 } 450 451 if ($tail - $head < 1) { 452 // cound not find chunk, assume requested rev is missing 453 fclose($fp); 454 return false; 455 } 456 457 $lines = $this->readChunk($fp, $head, $tail); 458 } 459 return array( 460 $fp, 461 $lines, 462 $head, 463 $tail, 464 $eof, 465 ); 466 } 467 468 /** 469 * Read chunk and return array with lines of given chunck. 470 * Has no check if $head and $tail are really at a new line 471 * 472 * @param resource $fp resource filepointer 473 * @param int $head start point chunck 474 * @param int $tail end point chunck 475 * @return array lines read from chunck 476 */ 477 protected function readChunk($fp, $head, $tail) 478 { 479 $chunk = ''; 480 $chunk_size = max($tail - $head, 0); // found chunk size 481 $got = 0; 482 fseek($fp, $head); 483 while ($got < $chunk_size && !feof($fp)) { 484 $tmp = @fread($fp, max(min($this->chunk_size, $chunk_size - $got), 0)); 485 if ($tmp === false) { //error state 486 break; 487 } 488 $got += strlen($tmp); 489 $chunk .= $tmp; 490 } 491 $lines = explode("\n", $chunk); 492 array_pop($lines); // remove trailing newline 493 return $lines; 494 } 495 496 /** 497 * Set pointer to first new line after $finger and return its position 498 * 499 * @param resource $fp filepointer 500 * @param int $finger a pointer 501 * @return int pointer 502 */ 503 protected function getNewlinepointer($fp, $finger) 504 { 505 fseek($fp, $finger); 506 $nl = $finger; 507 if ($finger > 0) { 508 fgets($fp); // slip the finger forward to a new line 509 $nl = ftell($fp); 510 } 511 return $nl; 512 } 513 514 /** 515 * Check whether given revision is the current page 516 * 517 * @param int $rev timestamp of current page 518 * @return bool true if $rev is current revision, otherwise false 519 */ 520 public function isCurrentRevision($rev) 521 { 522 return $rev == @filemtime($this->getFilename()); 523 } 524 525 /** 526 * Return an existing revision for a specific date which is 527 * the current one or younger or equal then the date 528 * 529 * @param number $date_at timestamp 530 * @return string revision ('' for current) 531 */ 532 public function getLastRevisionAt($date_at) 533 { 534 //requested date_at(timestamp) younger or equal then modified_time($this->id) => load current 535 if (file_exists($this->getFilename()) && $date_at >= @filemtime($this->getFilename())) { 536 return ''; 537 } else { 538 if ($rev = $this->getRelativeRevision($date_at + 1, -1)) { //+1 to get also the requested date revision 539 return $rev; 540 } else { 541 return false; 542 } 543 } 544 } 545 546 /** 547 * Returns the next lines of the changelog of the chunck before head or after tail 548 * 549 * @param resource $fp filepointer 550 * @param int $head position head of last chunk 551 * @param int $tail position tail of last chunk 552 * @param int $direction positive forward, negative backward 553 * @return array with entries: 554 * - $lines: changelog lines of readed chunk 555 * - $head: head of chunk 556 * - $tail: tail of chunk 557 */ 558 protected function readAdjacentChunk($fp, $head, $tail, $direction) 559 { 560 if (!$fp) return array(array(), $head, $tail); 561 562 if ($direction > 0) { 563 //read forward 564 $head = $tail; 565 $tail = $head + floor($this->chunk_size * (2 / 3)); 566 $tail = $this->getNewlinepointer($fp, $tail); 567 } else { 568 //read backward 569 $tail = $head; 570 $head = max($tail - $this->chunk_size, 0); 571 while (true) { 572 $nl = $this->getNewlinepointer($fp, $head); 573 // was the chunk big enough? if not, take another bite 574 if ($nl > 0 && $tail <= $nl) { 575 $head = max($head - $this->chunk_size, 0); 576 } else { 577 $head = $nl; 578 break; 579 } 580 } 581 } 582 583 //load next chunck 584 $lines = $this->readChunk($fp, $head, $tail); 585 return array($lines, $head, $tail); 586 } 587 588 /** 589 * Collect the $max revisions near to the timestamp $rev 590 * 591 * @param int $rev revision timestamp 592 * @param int $max maximum number of revisions to be returned 593 * @return bool|array 594 * return array with entries: 595 * - $requestedrevs: array of with $max revision timestamps 596 * - $revs: all parsed revision timestamps 597 * - $fp: filepointer only defined for chuck reading, needs closing. 598 * - $lines: non-parsed changelog lines before the parsed revisions 599 * - $head: position of first readed changelogline 600 * - $lasttail: position of end of last readed changelogline 601 * otherwise false 602 */ 603 protected function retrieveRevisionsAround($rev, $max) 604 { 605 //get lines from changelog 606 list($fp, $lines, $starthead, $starttail, /* $eof */) = $this->readloglines($rev); 607 if (empty($lines)) return false; 608 609 //parse chunk containing $rev, and read forward more chunks until $max/2 is reached 610 $head = $starthead; 611 $tail = $starttail; 612 $revs = array(); 613 $aftercount = $beforecount = 0; 614 while (count($lines) > 0) { 615 foreach ($lines as $line) { 616 $tmp = parseChangelogLine($line); 617 if ($tmp !== false) { 618 $this->cache[$this->id][$tmp['date']] = $tmp; 619 $revs[] = $tmp['date']; 620 if ($tmp['date'] >= $rev) { 621 //count revs after reference $rev 622 $aftercount++; 623 if ($aftercount == 1) $beforecount = count($revs); 624 } 625 //enough revs after reference $rev? 626 if ($aftercount > floor($max / 2)) break 2; 627 } 628 } 629 //retrieve next chunk 630 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, 1); 631 } 632 if ($aftercount == 0) return false; 633 634 $lasttail = $tail; 635 636 //read additional chuncks backward until $max/2 is reached and total number of revs is equal to $max 637 $lines = array(); 638 $i = 0; 639 if ($aftercount > 0) { 640 $head = $starthead; 641 $tail = $starttail; 642 while ($head > 0) { 643 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1); 644 645 for ($i = count($lines) - 1; $i >= 0; $i--) { 646 $tmp = parseChangelogLine($lines[$i]); 647 if ($tmp !== false) { 648 $this->cache[$this->id][$tmp['date']] = $tmp; 649 $revs[] = $tmp['date']; 650 $beforecount++; 651 //enough revs before reference $rev? 652 if ($beforecount > max(floor($max / 2), $max - $aftercount)) break 2; 653 } 654 } 655 } 656 } 657 sort($revs); 658 659 //keep only non-parsed lines 660 $lines = array_slice($lines, 0, $i); 661 //trunk desired selection 662 $requestedrevs = array_slice($revs, -$max, $max); 663 664 return array($requestedrevs, $revs, $fp, $lines, $head, $lasttail); 665 } 666} 667