1<?php 2/** 3 * Utilities for handling pagenames 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 * @todo Combine similar functions like {wiki,media,meta}FN() 8 */ 9 10use dokuwiki\ChangeLog\MediaChangeLog; 11use dokuwiki\ChangeLog\PageChangeLog; 12 13/** 14 * Fetch the an ID from request 15 * 16 * Uses either standard $_REQUEST variable or extracts it from 17 * the full request URI when userewrite is set to 2 18 * 19 * For $param='id' $conf['start'] is returned if no id was found. 20 * If the second parameter is true (default) the ID is cleaned. 21 * 22 * @author Andreas Gohr <andi@splitbrain.org> 23 * 24 * @param string $param the $_REQUEST variable name, default 'id' 25 * @param bool $clean if true, ID is cleaned 26 * @return string 27 */ 28function getID($param='id',$clean=true){ 29 /** @var Input $INPUT */ 30 global $INPUT; 31 global $conf; 32 global $ACT; 33 34 $id = $INPUT->str($param); 35 36 //construct page id from request URI 37 if(empty($id) && $conf['userewrite'] == 2){ 38 $request = $INPUT->server->str('REQUEST_URI'); 39 $script = ''; 40 41 //get the script URL 42 if($conf['basedir']){ 43 $relpath = ''; 44 if($param != 'id') { 45 $relpath = 'lib/exe/'; 46 } 47 $script = $conf['basedir'] . $relpath . 48 \dokuwiki\Utf8\PhpString::basename($INPUT->server->str('SCRIPT_FILENAME')); 49 50 }elseif($INPUT->server->str('PATH_INFO')){ 51 $request = $INPUT->server->str('PATH_INFO'); 52 }elseif($INPUT->server->str('SCRIPT_NAME')){ 53 $script = $INPUT->server->str('SCRIPT_NAME'); 54 }elseif($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')){ 55 $script = preg_replace ('/^'.preg_quote($INPUT->server->str('DOCUMENT_ROOT'),'/').'/','', 56 $INPUT->server->str('SCRIPT_FILENAME')); 57 $script = '/'.$script; 58 } 59 60 //clean script and request (fixes a windows problem) 61 $script = preg_replace('/\/\/+/','/',$script); 62 $request = preg_replace('/\/\/+/','/',$request); 63 64 //remove script URL and Querystring to gain the id 65 if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){ 66 $id = preg_replace ('/\?.*/','',$match[1]); 67 } 68 $id = urldecode($id); 69 //strip leading slashes 70 $id = preg_replace('!^/+!','',$id); 71 } 72 73 // Namespace autolinking from URL 74 if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){ 75 if(page_exists($id.$conf['start'])){ 76 // start page inside namespace 77 $id = $id.$conf['start']; 78 }elseif(page_exists($id.noNS(cleanID($id)))){ 79 // page named like the NS inside the NS 80 $id = $id.noNS(cleanID($id)); 81 }elseif(page_exists($id)){ 82 // page like namespace exists 83 $id = substr($id,0,-1); 84 }else{ 85 // fall back to default 86 $id = $id.$conf['start']; 87 } 88 if (isset($ACT) && $ACT === 'show') { 89 $urlParameters = $_GET; 90 if (isset($urlParameters['id'])) { 91 unset($urlParameters['id']); 92 } 93 send_redirect(wl($id, $urlParameters, true, '&')); 94 } 95 } 96 if($clean) $id = cleanID($id); 97 if($id === '' && $param=='id') $id = $conf['start']; 98 99 return $id; 100} 101 102/** 103 * Remove unwanted chars from ID 104 * 105 * Cleans a given ID to only use allowed characters. Accented characters are 106 * converted to unaccented ones 107 * 108 * @author Andreas Gohr <andi@splitbrain.org> 109 * 110 * @param string $raw_id The pageid to clean 111 * @param boolean $ascii Force ASCII 112 * @return string cleaned id 113 */ 114function cleanID($raw_id,$ascii=false){ 115 global $conf; 116 static $sepcharpat = null; 117 118 global $cache_cleanid; 119 $cache = & $cache_cleanid; 120 121 // check if it's already in the memory cache 122 if (!$ascii && isset($cache[(string)$raw_id])) { 123 return $cache[(string)$raw_id]; 124 } 125 126 $sepchar = $conf['sepchar']; 127 if($sepcharpat == null) // build string only once to save clock cycles 128 $sepcharpat = '#\\'.$sepchar.'+#'; 129 130 $id = trim((string)$raw_id); 131 $id = \dokuwiki\Utf8\PhpString::strtolower($id); 132 133 //alternative namespace seperator 134 if($conf['useslash']){ 135 $id = strtr($id,';/','::'); 136 }else{ 137 $id = strtr($id,';/',':'.$sepchar); 138 } 139 140 if($conf['deaccent'] == 2 || $ascii) $id = \dokuwiki\Utf8\Clean::romanize($id); 141 if($conf['deaccent'] || $ascii) $id = \dokuwiki\Utf8\Clean::deaccent($id,-1); 142 143 //remove specials 144 $id = \dokuwiki\Utf8\Clean::stripspecials($id,$sepchar,'\*'); 145 146 if($ascii) $id = \dokuwiki\Utf8\Clean::strip($id); 147 148 //clean up 149 $id = preg_replace($sepcharpat,$sepchar,$id); 150 $id = preg_replace('#:+#',':',$id); 151 $id = trim($id,':._-'); 152 $id = preg_replace('#:[:\._\-]+#',':',$id); 153 $id = preg_replace('#[:\._\-]+:#',':',$id); 154 155 if (!$ascii) $cache[(string)$raw_id] = $id; 156 return($id); 157} 158 159/** 160 * Return namespacepart of a wiki ID 161 * 162 * @author Andreas Gohr <andi@splitbrain.org> 163 * 164 * @param string $id 165 * @return string|false the namespace part or false if the given ID has no namespace (root) 166 */ 167function getNS($id){ 168 $pos = strrpos((string)$id,':'); 169 if($pos!==false){ 170 return substr((string)$id,0,$pos); 171 } 172 return false; 173} 174 175/** 176 * Returns the ID without the namespace 177 * 178 * @author Andreas Gohr <andi@splitbrain.org> 179 * 180 * @param string $id 181 * @return string 182 */ 183function noNS($id) { 184 $pos = strrpos($id, ':'); 185 if ($pos!==false) { 186 return substr($id, $pos+1); 187 } else { 188 return $id; 189 } 190} 191 192/** 193 * Returns the current namespace 194 * 195 * @author Nathan Fritz <fritzn@crown.edu> 196 * 197 * @param string $id 198 * @return string 199 */ 200function curNS($id) { 201 return noNS(getNS($id)); 202} 203 204/** 205 * Returns the ID without the namespace or current namespace for 'start' pages 206 * 207 * @author Nathan Fritz <fritzn@crown.edu> 208 * 209 * @param string $id 210 * @return string 211 */ 212function noNSorNS($id) { 213 global $conf; 214 215 $p = noNS($id); 216 if ($p === $conf['start'] || $p === false || $p === '') { 217 $p = curNS($id); 218 if ($p === false || $p === '') { 219 return $conf['start']; 220 } 221 } 222 return $p; 223} 224 225/** 226 * Creates a XHTML valid linkid from a given headline title 227 * 228 * @param string $title The headline title 229 * @param array|bool $check Existing IDs (title => number) 230 * @return string the title 231 * 232 * @author Andreas Gohr <andi@splitbrain.org> 233 */ 234function sectionID($title,&$check) { 235 $title = str_replace(array(':','.'),'',cleanID($title)); 236 $new = ltrim($title,'0123456789_-'); 237 if(empty($new)){ 238 $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline 239 }else{ 240 $title = $new; 241 } 242 243 if(is_array($check)){ 244 // make sure tiles are unique 245 if (!array_key_exists ($title,$check)) { 246 $check[$title] = 0; 247 } else { 248 $title .= ++ $check[$title]; 249 } 250 } 251 252 return $title; 253} 254 255/** 256 * Wiki page existence check 257 * 258 * parameters as for wikiFN 259 * 260 * @author Chris Smith <chris@jalakai.co.uk> 261 * 262 * @param string $id page id 263 * @param string|int $rev empty or revision timestamp 264 * @param bool $clean flag indicating that $id should be cleaned (see wikiFN as well) 265 * @param bool $date_at 266 * @return bool exists? 267 */ 268function page_exists($id,$rev='',$clean=true, $date_at=false) { 269 if($rev !== '' && $date_at) { 270 $pagelog = new PageChangeLog($id); 271 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 272 if($pagelog_rev !== false) 273 $rev = $pagelog_rev; 274 } 275 return file_exists(wikiFN($id,$rev,$clean)); 276} 277 278/** 279 * returns the full path to the datafile specified by ID and optional revision 280 * 281 * The filename is URL encoded to protect Unicode chars 282 * 283 * @param $raw_id string id of wikipage 284 * @param $rev int|string page revision, empty string for current 285 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false 286 * when $id is guaranteed to have been cleaned already. 287 * @return string full path 288 * 289 * @author Andreas Gohr <andi@splitbrain.org> 290 */ 291function wikiFN($raw_id,$rev='',$clean=true){ 292 global $conf; 293 294 global $cache_wikifn; 295 $cache = & $cache_wikifn; 296 297 $id = $raw_id; 298 299 if ($clean) $id = cleanID($id); 300 $id = str_replace(':','/',$id); 301 302 if (isset($cache[$id]) && isset($cache[$id][$rev])) { 303 return $cache[$id][$rev]; 304 } 305 306 if(empty($rev)){ 307 $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt'; 308 }else{ 309 $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt'; 310 if($conf['compression']){ 311 //test for extensions here, we want to read both compressions 312 if (file_exists($fn . '.gz')){ 313 $fn .= '.gz'; 314 }else if(file_exists($fn . '.bz2')){ 315 $fn .= '.bz2'; 316 }else{ 317 //file doesnt exist yet, so we take the configured extension 318 $fn .= '.' . $conf['compression']; 319 } 320 } 321 } 322 323 if (!isset($cache[$id])) { $cache[$id] = array(); } 324 $cache[$id][$rev] = $fn; 325 return $fn; 326} 327 328/** 329 * Returns the full path to the file for locking the page while editing. 330 * 331 * @author Ben Coburn <btcoburn@silicodon.net> 332 * 333 * @param string $id page id 334 * @return string full path 335 */ 336function wikiLockFN($id) { 337 global $conf; 338 return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock'; 339} 340 341 342/** 343 * returns the full path to the meta file specified by ID and extension 344 * 345 * @author Steven Danz <steven-danz@kc.rr.com> 346 * 347 * @param string $id page id 348 * @param string $ext file extension 349 * @return string full path 350 */ 351function metaFN($id,$ext){ 352 global $conf; 353 $id = cleanID($id); 354 $id = str_replace(':','/',$id); 355 $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext; 356 return $fn; 357} 358 359/** 360 * returns the full path to the media's meta file specified by ID and extension 361 * 362 * @author Kate Arzamastseva <pshns@ukr.net> 363 * 364 * @param string $id media id 365 * @param string $ext extension of media 366 * @return string 367 */ 368function mediaMetaFN($id,$ext){ 369 global $conf; 370 $id = cleanID($id); 371 $id = str_replace(':','/',$id); 372 $fn = $conf['mediametadir'].'/'.utf8_encodeFN($id).$ext; 373 return $fn; 374} 375 376/** 377 * returns an array of full paths to all metafiles of a given ID 378 * 379 * @author Esther Brunner <esther@kaffeehaus.ch> 380 * @author Michael Hamann <michael@content-space.de> 381 * 382 * @param string $id page id 383 * @return array 384 */ 385function metaFiles($id){ 386 $basename = metaFN($id, ''); 387 $files = glob($basename.'.*', GLOB_MARK); 388 // filter files like foo.bar.meta when $id == 'foo' 389 return $files ? preg_grep('/^'.preg_quote($basename, '/').'\.[^.\/]*$/u', $files) : array(); 390} 391 392/** 393 * returns the full path to the mediafile specified by ID 394 * 395 * The filename is URL encoded to protect Unicode chars 396 * 397 * @author Andreas Gohr <andi@splitbrain.org> 398 * @author Kate Arzamastseva <pshns@ukr.net> 399 * 400 * @param string $id media id 401 * @param string|int $rev empty string or revision timestamp 402 * @param bool $clean 403 * 404 * @return string full path 405 */ 406function mediaFN($id, $rev='', $clean=true){ 407 global $conf; 408 if ($clean) $id = cleanID($id); 409 $id = str_replace(':','/',$id); 410 if(empty($rev)){ 411 $fn = $conf['mediadir'].'/'.utf8_encodeFN($id); 412 }else{ 413 $ext = mimetype($id); 414 $name = substr($id,0, -1*strlen($ext[0])-1); 415 $fn = $conf['mediaolddir'].'/'.utf8_encodeFN($name .'.'.( (int) $rev ).'.'.$ext[0]); 416 } 417 return $fn; 418} 419 420/** 421 * Returns the full filepath to a localized file if local 422 * version isn't found the english one is returned 423 * 424 * @param string $id The id of the local file 425 * @param string $ext The file extension (usually txt) 426 * @return string full filepath to localized file 427 * 428 * @author Andreas Gohr <andi@splitbrain.org> 429 */ 430function localeFN($id,$ext='txt'){ 431 global $conf; 432 $file = DOKU_CONF.'lang/'.$conf['lang'].'/'.$id.'.'.$ext; 433 if(!file_exists($file)){ 434 $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.'.$ext; 435 if(!file_exists($file)){ 436 //fall back to english 437 $file = DOKU_INC.'inc/lang/en/'.$id.'.'.$ext; 438 } 439 } 440 return $file; 441} 442 443/** 444 * Resolve relative paths in IDs 445 * 446 * Do not call directly use resolve_mediaid or resolve_pageid 447 * instead 448 * 449 * Partyly based on a cleanPath function found at 450 * http://php.net/manual/en/function.realpath.php#57016 451 * 452 * @author <bart at mediawave dot nl> 453 * 454 * @param string $ns namespace which is context of id 455 * @param string $id relative id 456 * @param bool $clean flag indicating that id should be cleaned 457 * @return string 458 */ 459function resolve_id($ns,$id,$clean=true){ 460 global $conf; 461 462 // some pre cleaning for useslash: 463 if($conf['useslash']) $id = str_replace('/',':',$id); 464 465 // if the id starts with a dot we need to handle the 466 // relative stuff 467 if($id && $id[0] == '.'){ 468 // normalize initial dots without a colon 469 $id = preg_replace('/^((\.+:)*)(\.+)(?=[^:\.])/','\1\3:',$id); 470 // prepend the current namespace 471 $id = $ns.':'.$id; 472 473 // cleanup relatives 474 $result = array(); 475 $pathA = explode(':', $id); 476 if (!$pathA[0]) $result[] = ''; 477 foreach ($pathA AS $key => $dir) { 478 if ($dir == '..') { 479 if (end($result) == '..') { 480 $result[] = '..'; 481 } elseif (!array_pop($result)) { 482 $result[] = '..'; 483 } 484 } elseif ($dir && $dir != '.') { 485 $result[] = $dir; 486 } 487 } 488 if (!end($pathA)) $result[] = ''; 489 $id = implode(':', $result); 490 }elseif($ns !== false && strpos($id,':') === false){ 491 //if link contains no namespace. add current namespace (if any) 492 $id = $ns.':'.$id; 493 } 494 495 if($clean) $id = cleanID($id); 496 return $id; 497} 498 499/** 500 * Returns a full media id 501 * 502 * @author Andreas Gohr <andi@splitbrain.org> 503 * 504 * @param string $ns namespace which is context of id 505 * @param string &$page (reference) relative media id, updated to resolved id 506 * @param bool &$exists (reference) updated with existance of media 507 * @param int|string $rev 508 * @param bool $date_at 509 */ 510function resolve_mediaid($ns,&$page,&$exists,$rev='',$date_at=false){ 511 $page = resolve_id($ns,$page); 512 if($rev !== '' && $date_at){ 513 $medialog = new MediaChangeLog($page); 514 $medialog_rev = $medialog->getLastRevisionAt($rev); 515 if($medialog_rev !== false) { 516 $rev = $medialog_rev; 517 } 518 } 519 520 $file = mediaFN($page,$rev); 521 $exists = file_exists($file); 522} 523 524/** 525 * Returns a full page id 526 * 527 * @author Andreas Gohr <andi@splitbrain.org> 528 * 529 * @param string $ns namespace which is context of id 530 * @param string &$page (reference) relative page id, updated to resolved id 531 * @param bool &$exists (reference) updated with existance of media 532 * @param string $rev 533 * @param bool $date_at 534 */ 535function resolve_pageid($ns,&$page,&$exists,$rev='',$date_at=false ){ 536 global $conf; 537 global $ID; 538 $exists = false; 539 540 //empty address should point to current page 541 if ($page === "") { 542 $page = $ID; 543 } 544 545 //keep hashlink if exists then clean both parts 546 if (strpos($page,'#')) { 547 list($page,$hash) = explode('#',$page,2); 548 } else { 549 $hash = ''; 550 } 551 $hash = cleanID($hash); 552 $page = resolve_id($ns,$page,false); // resolve but don't clean, yet 553 554 // get filename (calls clean itself) 555 if($rev !== '' && $date_at) { 556 $pagelog = new PageChangeLog($page); 557 $pagelog_rev = $pagelog->getLastRevisionAt($rev); 558 if($pagelog_rev !== false)//something found 559 $rev = $pagelog_rev; 560 } 561 $file = wikiFN($page,$rev); 562 563 // if ends with colon or slash we have a namespace link 564 if(in_array(substr($page,-1), array(':', ';')) || 565 ($conf['useslash'] && substr($page,-1) == '/')){ 566 if(page_exists($page.$conf['start'],$rev,true,$date_at)){ 567 // start page inside namespace 568 $page = $page.$conf['start']; 569 $exists = true; 570 }elseif(page_exists($page.noNS(cleanID($page)),$rev,true,$date_at)){ 571 // page named like the NS inside the NS 572 $page = $page.noNS(cleanID($page)); 573 $exists = true; 574 }elseif(page_exists($page,$rev,true,$date_at)){ 575 // page like namespace exists 576 $page = $page; 577 $exists = true; 578 }else{ 579 // fall back to default 580 $page = $page.$conf['start']; 581 } 582 }else{ 583 //check alternative plural/nonplural form 584 if(!file_exists($file)){ 585 if( $conf['autoplural'] ){ 586 if(substr($page,-1) == 's'){ 587 $try = substr($page,0,-1); 588 }else{ 589 $try = $page.'s'; 590 } 591 if(page_exists($try,$rev,true,$date_at)){ 592 $page = $try; 593 $exists = true; 594 } 595 } 596 }else{ 597 $exists = true; 598 } 599 } 600 601 // now make sure we have a clean page 602 $page = cleanID($page); 603 604 //add hash if any 605 if(!empty($hash)) $page .= '#'.$hash; 606} 607 608/** 609 * Returns the name of a cachefile from given data 610 * 611 * The needed directory is created by this function! 612 * 613 * @author Andreas Gohr <andi@splitbrain.org> 614 * 615 * @param string $data This data is used to create a unique md5 name 616 * @param string $ext This is appended to the filename if given 617 * @return string The filename of the cachefile 618 */ 619function getCacheName($data,$ext=''){ 620 global $conf; 621 $md5 = md5($data); 622 $file = $conf['cachedir'].'/'.$md5[0].'/'.$md5.$ext; 623 io_makeFileDir($file); 624 return $file; 625} 626 627/** 628 * Checks a pageid against $conf['hidepages'] 629 * 630 * @author Andreas Gohr <gohr@cosmocode.de> 631 * 632 * @param string $id page id 633 * @return bool 634 */ 635function isHiddenPage($id){ 636 $data = array( 637 'id' => $id, 638 'hidden' => false 639 ); 640 \dokuwiki\Extension\Event::createAndTrigger('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage'); 641 return $data['hidden']; 642} 643 644/** 645 * callback checks if page is hidden 646 * 647 * @param array $data event data - see isHiddenPage() 648 */ 649function _isHiddenPage(&$data) { 650 global $conf; 651 global $ACT; 652 653 if ($data['hidden']) return; 654 if(empty($conf['hidepages'])) return; 655 if($ACT == 'admin') return; 656 657 if(preg_match('/'.$conf['hidepages'].'/ui',':'.$data['id'])){ 658 $data['hidden'] = true; 659 } 660} 661 662/** 663 * Reverse of isHiddenPage 664 * 665 * @author Andreas Gohr <gohr@cosmocode.de> 666 * 667 * @param string $id page id 668 * @return bool 669 */ 670function isVisiblePage($id){ 671 return !isHiddenPage($id); 672} 673 674/** 675 * Format an id for output to a user 676 * 677 * Namespaces are denoted by a trailing “:*”. The root namespace is 678 * “*”. Output is escaped. 679 * 680 * @author Adrian Lang <lang@cosmocode.de> 681 * 682 * @param string $id page id 683 * @return string 684 */ 685function prettyprint_id($id) { 686 if (!$id || $id === ':') { 687 return '*'; 688 } 689 if ((substr($id, -1, 1) === ':')) { 690 $id .= '*'; 691 } 692 return hsc($id); 693} 694 695/** 696 * Encode a UTF-8 filename to use on any filesystem 697 * 698 * Uses the 'fnencode' option to determine encoding 699 * 700 * When the second parameter is true the string will 701 * be encoded only if non ASCII characters are detected - 702 * This makes it safe to run it multiple times on the 703 * same string (default is true) 704 * 705 * @author Andreas Gohr <andi@splitbrain.org> 706 * @see urlencode 707 * 708 * @param string $file file name 709 * @param bool $safe if true, only encoded when non ASCII characters detected 710 * @return string 711 */ 712function utf8_encodeFN($file,$safe=true){ 713 global $conf; 714 if($conf['fnencode'] == 'utf-8') return $file; 715 716 if($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#',$file)){ 717 return $file; 718 } 719 720 if($conf['fnencode'] == 'safe'){ 721 return SafeFN::encode($file); 722 } 723 724 $file = urlencode($file); 725 $file = str_replace('%2F','/',$file); 726 return $file; 727} 728 729/** 730 * Decode a filename back to UTF-8 731 * 732 * Uses the 'fnencode' option to determine encoding 733 * 734 * @author Andreas Gohr <andi@splitbrain.org> 735 * @see urldecode 736 * 737 * @param string $file file name 738 * @return string 739 */ 740function utf8_decodeFN($file){ 741 global $conf; 742 if($conf['fnencode'] == 'utf-8') return $file; 743 744 if($conf['fnencode'] == 'safe'){ 745 return SafeFN::decode($file); 746 } 747 748 return urldecode($file); 749} 750 751/** 752 * Find a page in the current namespace (determined from $ID) or any 753 * higher namespace that can be accessed by the current user, 754 * this condition can be overriden by an optional parameter. 755 * 756 * Used for sidebars, but can be used other stuff as well 757 * 758 * @todo add event hook 759 * 760 * @param string $page the pagename you're looking for 761 * @param bool $useacl only return pages readable by the current user, false to ignore ACLs 762 * @return false|string the full page id of the found page, false if any 763 */ 764function page_findnearest($page, $useacl = true){ 765 if ((string) $page === '') return false; 766 global $ID; 767 768 $ns = $ID; 769 do { 770 $ns = getNS($ns); 771 $pageid = cleanID("$ns:$page"); 772 if(page_exists($pageid) && (!$useacl || auth_quickaclcheck($pageid) >= AUTH_READ)){ 773 return $pageid; 774 } 775 } while($ns !== false); 776 777 return false; 778} 779