1<?php 2// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project 3// 4// All Rights Reserved. See copyright.txt for details and a complete list of authors. 5// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details. 6// $Id$ 7 8/** 9 * 10 */ 11class UnifiedSearchLib 12{ 13 const INCREMENT_QUEUE = 'search-increment'; 14 const INCREMENT_QUEUE_REBUILD = 'search-increment-rebuild'; 15 16 private $batchToken; 17 private $isRebuildingNow = false; 18 private $indices; 19 20 /** 21 * @return string 22 */ 23 public function startBatch() 24 { 25 if (! $this->batchToken) { 26 $this->batchToken = uniqid(); 27 return $this->batchToken; 28 } 29 } 30 31 /** 32 * @param $token 33 * @param int $count 34 */ 35 public function endBatch($token, $count = 100) 36 { 37 if ($token && $this->batchToken === $token) { 38 $this->batchToken = null; 39 $previousLoopCount = null; 40 while (($loopCount = $this->getQueueCount()) > 0) { 41 if ($previousLoopCount !== null && $previousLoopCount >= $loopCount) { 42 break; // avoid to be blocked in loops if messages can not be processed 43 } 44 $previousLoopCount = $loopCount; 45 $this->processUpdateQueue($count); 46 } 47 return true; 48 } 49 50 return false; 51 } 52 53 /** 54 * @param int $count 55 */ 56 public function processUpdateQueue($count = 10) 57 { 58 global $prefs; 59 if (! isset($prefs['unified_engine'])) { 60 return; 61 } 62 63 if ($this->batchToken) { 64 return; 65 } 66 67 $queuelib = TikiLib::lib('queue'); 68 $toProcess = $queuelib->pull(self::INCREMENT_QUEUE, $count); 69 if ($this->rebuildInProgress()) { 70 // Requeue to add to new index too (that is rebuilding) 71 $queuelib->pushAll(self::INCREMENT_QUEUE_REBUILD, $toProcess); 72 } 73 $access = TikiLib::lib('access'); 74 $access->preventRedirect(true); 75 76 if (count($toProcess)) { 77 $indexer = null; 78 try { 79 // Since the object being updated may have category changes during the update, 80 // make sure internal permission cache does not refer to the pre-update situation. 81 Perms::getInstance()->clear(); 82 83 $index = $this->getIndex('data-write'); 84 $index = new Search_Index_TypeAnalysisDecorator($index); 85 $indexer = $this->buildIndexer($index); 86 $indexer->update($toProcess); 87 88 if ($prefs['storedsearch_enabled'] == 'y') { 89 // Stored search relation adding may cause residual index backlog 90 $toProcess = $queuelib->pull(self::INCREMENT_QUEUE, $count); 91 $indexer->update($toProcess); 92 } 93 94 // Detect newly created identifier fields 95 $initial = array_flip($prefs['unified_identifier_fields']); 96 $collected = array_flip($index->getIdentifierFields()); 97 $combined = array_merge($initial, $collected); 98 99 // Store preference only on change 100 if (count($combined) > count($initial)) { 101 $tikilib = TikiLib::lib('tiki'); 102 $tikilib->set_preference('unified_identifier_fields', array_keys($combined)); 103 } 104 } catch (Exception $e) { 105 // Re-queue pulled messages for next update 106 foreach ($toProcess as $message) { 107 $queuelib->push(self::INCREMENT_QUEUE, $message); 108 } 109 110 Feedback::error( 111 tr('The search index could not be updated. The site is misconfigured. Contact an administrator.') . 112 '<br />' . $e->getMessage() 113 ); 114 } 115 116 if ($indexer) { 117 $indexer->clearSources(); 118 } 119 } 120 121 $access->preventRedirect(false); 122 } 123 124 /** 125 * @return array 126 */ 127 public function getQueueCount() 128 { 129 $queuelib = TikiLib::lib('queue'); 130 return $queuelib->count(self::INCREMENT_QUEUE); 131 } 132 133 /** 134 * @return bool 135 */ 136 public function rebuildInProgress() 137 { 138 global $prefs; 139 if ($prefs['unified_engine'] == 'lucene') { 140 $new = $this->getIndex('data-new'); 141 $old = $this->getIndex('data-old'); 142 143 return $new->exists() || $old->exists(); 144 } elseif ($prefs['unified_engine'] == 'elastic') { 145 $name = $this->getIndexLocation('data'); 146 $connection = $this->getElasticConnection(true); 147 return $connection->isRebuilding($name); 148 } elseif ($prefs['unified_engine'] == 'mysql') { 149 $lockName = TikiLib::lib('tiki')->get_preference('unified_mysql_index_rebuilding'); 150 return empty($lockName) ? false : TikiDb::get()->isLocked($lockName); 151 } 152 153 return false; 154 } 155 156 /** 157 */ 158 public function stopRebuild() 159 { 160 global $prefs; 161 if ($prefs['unified_engine'] == 'lucene') { 162 $this->getIndex('data-old')->destroy(); 163 $this->getIndex('data-new')->destroy(); 164 } 165 } 166 167 /** 168 * @param int $loggit 0=no logging, 1=log to Search_Indexer.log, 2=log to Search_Indexer_console.log 169 * @param bool $fallback If the fallback index is being rebuild 170 * @param Symfony\Component\Console\Helper\ProgressBar $progress progress bar object from rebuild console command 171 * 172 * @return array|bool 173 * @throws Exception 174 */ 175 public function rebuild($loggit = 0, $fallback = false, $progress = null) 176 { 177 global $prefs; 178 $engineResults = null; 179 180 $tikilib = TikiLib::lib('tiki'); 181 182 switch ($prefs['unified_engine']) { 183 case 'lucene': 184 $index_location = $this->getIndexLocation('data'); 185 $tempName = $this->getIndexLocation('data-new'); 186 $swapName = $this->getIndexLocation('data-old'); 187 188 if ($this->rebuildInProgress()) { 189 Feedback::error(tr('Rebuild in progress.')); 190 return false; 191 } 192 193 $index = new Search_Lucene_Index($tempName); 194 195 TikiLib::events()->bind( 196 'tiki.process.shutdown', 197 function () use ($index) { 198 if ($index->exists()) { 199 $index->destroy(); 200 echo "Abnormal termination. Unless it was killed manually, it likely ran out of memory.\n"; 201 } 202 } 203 ); 204 break; 205 case 'elastic': 206 $connection = $this->getElasticConnection(true); 207 $aliasName = $prefs['unified_elastic_index_prefix'] . 'main'; 208 $indexName = $aliasName . '_' . uniqid(); 209 $index = new Search_Elastic_Index($connection, $indexName); 210 $engineResults = new Search_EngineResult_Elastic($index); 211 $index->setCamelCaseEnabled($prefs['unified_elastic_camel_case'] == 'y'); 212 213 TikiLib::events()->bind( 214 'tiki.process.shutdown', 215 function () use ($indexName, $index) { 216 global $prefs; 217 if ($prefs['unified_elastic_index_current'] !== $indexName) { 218 $index->destroy(); 219 } 220 } 221 ); 222 break; 223 case 'mysql': 224 $indexName = 'index_' . uniqid(); 225 $index = new Search_MySql_Index(TikiDb::get(), $indexName); 226 $engineResults = new Search_EngineResult_MySQL($index); 227 $tikilib->set_preference('unified_mysql_index_rebuilding', $indexName); 228 TikiDb::get()->getLock($indexName); 229 230 TikiLib::events()->bind( 231 'tiki.process.shutdown', 232 function () use ($indexName, $index) { 233 global $prefs; 234 if ($prefs['unified_mysql_index_current'] !== $indexName) { 235 $index->destroy(); 236 } 237 } 238 ); 239 break; 240 default: 241 die('Unsupported'); 242 } 243 244 // Build in -new 245 if (! $fallback) { 246 TikiLib::lib('queue')->clear(self::INCREMENT_QUEUE); 247 TikiLib::lib('queue')->clear(self::INCREMENT_QUEUE_REBUILD); 248 } 249 250 $access = TikiLib::lib('access'); 251 $access->preventRedirect(true); 252 253 $this->isRebuildingNow = true; 254 255 $stat = []; 256 $indexer = null; 257 try { 258 $index = new Search_Index_TypeAnalysisDecorator($index); 259 $indexer = $this->buildIndexer($index, $loggit); 260 $lastStats = $tikilib->get_preference('unified_last_rebuild_stats', [], true); 261 262 $stat = $tikilib->allocate_extra( 263 'unified_rebuild', 264 function () use ($indexer, $lastStats, $progress) { 265 return $indexer->rebuild($lastStats, $progress); 266 } 267 ); 268 269 $stat['total tiki fields indexed'] = $index->getFieldCount(); 270 271 if (! is_null($engineResults)) { 272 $fieldsCount = $engineResults->getEngineFieldsCount(); 273 274 if ($fieldsCount !== $stat['total tiki fields indexed']) { 275 $stat['total fields used in the ' . $prefs['unified_engine'] . ' search index: '] = $engineResults->getEngineFieldsCount(); 276 } 277 } 278 279 $tikilib->set_preference('unified_field_count', $index->getFieldCount()); 280 $tikilib->set_preference('unified_identifier_fields', $index->getIdentifierFields()); 281 } catch (Exception $e) { 282 Feedback::error(tr('The search index could not be rebuilt.') . '<br />' . $e->getMessage()); 283 } 284 285 $stats = []; 286 $stats['default'] = $stat; 287 288 // Force destruction to clear locks 289 if ($indexer) { 290 $indexer->clearSources(); 291 unset($indexer); 292 } 293 294 unset($index); 295 296 $oldIndex = null; 297 switch ($prefs['unified_engine']) { 298 case 'lucene': 299 // Current to -old 300 if (file_exists($index_location)) { 301 if (! rename($index_location, $swapName)) { 302 Feedback::error(tr('The active index could not be removed, probably due to a file permission issue.')); 303 } 304 } 305 // -new to current 306 if (! rename($tempName, $index_location)) { 307 Feedback::error(tr('The new index could not be made active, probably due to a file permission issue.')); 308 } 309 310 // Destroy old 311 $oldIndex = new Search_Lucene_Index($swapName); 312 break; 313 case 'elastic': 314 $oldIndex = null; // assignAlias will handle the clean-up 315 $tikilib->set_preference('unified_elastic_index_current', $indexName); 316 317 $connection->assignAlias($aliasName, $indexName); 318 319 break; 320 case 'mysql': 321 // Obtain the old index and destroy it after permanently replacing it. 322 $oldIndex = $this->getIndex('data', false); 323 324 $tikilib->set_preference('unified_mysql_index_current', $indexName); 325 TikiDb::get()->releaseLock($indexName); 326 327 break; 328 } 329 330 if ($oldIndex) { 331 if (! $oldIndex->destroy()) { 332 Feedback::error(tr('Failed to delete the old index.')); 333 } 334 } 335 336 if ($fallback) { 337 // Fallback index was rebuilt. Proceed with default index operations 338 return $stats['default']; 339 } 340 341 // Rebuild mysql as fallback for elasticsearch engine 342 list($fallbackEngine) = TikiLib::lib('unifiedsearch')->getFallbackEngineDetails(); 343 if (! $fallback && $fallbackEngine) { 344 $defaultEngine = $prefs['unified_engine']; 345 $prefs['unified_engine'] = $fallbackEngine; 346 $stats['fallback'] = $this->rebuild($loggit, true); 347 $prefs['unified_engine'] = $defaultEngine; 348 } 349 350 // Requeue messages that were added and processed in old index, 351 // while rebuilding the new index 352 $queueLib = TikiLib::lib('queue'); 353 $toProcess = $queueLib->pull( 354 self::INCREMENT_QUEUE_REBUILD, 355 $queueLib->count(self::INCREMENT_QUEUE_REBUILD) 356 ); 357 $queueLib->pushAll(self::INCREMENT_QUEUE, $toProcess); 358 359 // Process the documents updated while we were processing the update 360 $this->processUpdateQueue(1000); 361 362 if ($prefs['storedsearch_enabled'] == 'y') { 363 TikiLib::lib('storedsearch')->reloadAll(); 364 } 365 366 $tikilib->set_preference('unified_last_rebuild', $tikilib->now); 367 $tikilib->set_preference('unified_last_rebuild_stats', $stats); 368 369 $this->isRebuildingNow = false; 370 $access->preventRedirect(false); 371 372 return $stats; 373 } 374 375 /** 376 * Return the current engine for unified search, version and current index name/table 377 * @return array 378 */ 379 public function getCurrentEngineDetails() 380 { 381 global $prefs; 382 global $tikilib; 383 384 switch ($prefs['unified_engine']) { 385 case 'lucene': 386 $engine = 'Lucene'; 387 $version = ''; 388 $index = $prefs['unified_lucene_location']; 389 break; 390 case 'elastic': 391 $elasticsearch = new \Search_Elastic_Connection($prefs['unified_elastic_url']); 392 $engine = 'Elastic'; 393 $version = $elasticsearch->getVersion(); 394 $index = $prefs['unified_elastic_index_current']; 395 break; 396 case 'mysql': 397 $engine = 'MySQL'; 398 $version = $tikilib->getMySQLVersion(); 399 $index = $prefs['unified_mysql_index_current']; 400 break; 401 default: 402 $engine = ''; 403 $version = ''; 404 $index = ''; 405 break; 406 } 407 408 return [$engine, $version, $index]; 409 } 410 411 /** 412 * Get the index location depending on $tikidomain for multi-tiki 413 * 414 * @param string $indexType 415 * @param string $engine If not set, it uses default unified search engine 416 * @return string path to index directory 417 * @throws Exception 418 */ 419 private function getIndexLocation($indexType = 'data', $engine = null) 420 { 421 global $prefs, $tikidomain; 422 $mapping = [ 423 'lucene' => [ 424 'data' => $prefs['unified_lucene_location'], 425 'data-old' => $prefs['unified_lucene_location'] . '-old', 426 'data-new' => $prefs['unified_lucene_location'] . '-new', 427 'preference' => $prefs['tmpDir'] . '/unified-preference-index-' . $prefs['language'], 428 ], 429 'elastic' => [ 430 'data' => $prefs['unified_elastic_index_prefix'] . 'main', 431 'preference' => $prefs['unified_elastic_index_prefix'] . 'pref_' . $prefs['language'], 432 ], 433 'mysql' => [ 434 'data' => $prefs['unified_mysql_index_current'], 435 'preference' => 'index_' . 'pref_' . $prefs['language'], 436 ], 437 ]; 438 439 $engine = $engine ?: $prefs['unified_engine']; 440 441 if (isset($mapping[$engine][$indexType])) { 442 $index = $mapping[$engine][$indexType]; 443 444 if ($engine == 'lucene' && ! empty($tikidomain)) { 445 $temp = $prefs['tmpDir']; 446 if (strpos($index, $tikidomain) === false && strpos($index, "$temp/") === 0) { 447 $index = str_replace("$temp/", "$temp/$tikidomain/", $index); 448 } 449 } 450 451 return $index; 452 } else { 453 throw new Exception('Internal: Invalid index requested: ' . $indexType); 454 } 455 } 456 457 /** 458 * @param $type 459 * @param $objectId 460 */ 461 public function invalidateObject($type, $objectId) 462 { 463 TikiLib::lib('queue')->push( 464 self::INCREMENT_QUEUE, 465 [ 466 'object_type' => $type, 467 'object_id' => $objectId 468 ] 469 ); 470 } 471 472 /** 473 * @return array 474 */ 475 public function getSupportedTypes() 476 { 477 global $prefs; 478 $types = []; 479 480 if ($prefs['feature_wiki'] == 'y') { 481 $types['wiki page'] = tra('wiki page'); 482 } 483 484 if ($prefs['feature_blogs'] == 'y') { 485 $types['blog post'] = tra('blog post'); 486 } 487 488 if ($prefs['feature_articles'] == 'y') { 489 $types['article'] = tra('article'); 490 } 491 492 if ($prefs['feature_file_galleries'] == 'y') { 493 $types['file'] = tra('file'); 494 $types['file gallery'] = tra('file gallery'); 495 } 496 497 if ($prefs['feature_forums'] == 'y') { 498 $types['forum post'] = tra('forum post'); 499 $types['forum'] = tra('forum'); 500 } 501 502 if ($prefs['feature_trackers'] == 'y') { 503 $types['trackeritem'] = tra('tracker item'); 504 $types['tracker'] = tra('tracker'); 505 $types['trackerfield'] = tra('tracker field'); 506 } 507 508 if ($prefs['feature_sheet'] == 'y') { 509 $types['sheet'] = tra('sheet'); 510 } 511 512 if ($prefs['feature_wiki_comments'] == 'y' 513 || $prefs['feature_article_comments'] == 'y' 514 || $prefs['feature_poll_comments'] == 'y' 515 || $prefs['feature_file_galleries_comments'] == 'y' 516 || $prefs['feature_trackers'] == 'y' 517 ) { 518 $types['comment'] = tra('comment'); 519 } 520 521 if ($prefs['feature_categories'] === 'y') { 522 $types['category'] = tra('category'); 523 } 524 525 if ($prefs['feature_webservices'] === 'y') { 526 $types['webservice'] = tra('webservice'); 527 } 528 529 if ($prefs['activity_basic_events'] === 'y' || $prefs['activity_custom_events'] === 'y') { 530 $types['activity'] = tra('activity'); 531 } 532 533 if ($prefs['feature_calendar'] === 'y') { 534 $types['calendaritem'] = tra('calendar item'); 535 $types['calendar'] = tra('calendar'); 536 } 537 538 $types['user'] = tra('user'); 539 $types['group'] = tra('group'); 540 541 return $types; 542 } 543 544 545 public function getLastLogItem() 546 { 547 global $prefs; 548 $files['web'] = $this->getLogFilename(1); 549 $files['console'] = $this->getLogFilename(2); 550 foreach ($files as $type => $file) { 551 if ($fp = @fopen($file, "r")) { 552 $pos = -2; 553 $t = " "; 554 while ($t != "\n") { 555 if (! fseek($fp, $pos, SEEK_END)) { 556 $t = fgetc($fp); 557 $pos = $pos - 1; 558 } else { 559 rewind($fp); 560 break; 561 } 562 } 563 $t = fgets($fp); 564 fclose($fp); 565 $ret[$type] = $t; 566 } else { 567 $ret[$type] = ''; 568 } 569 } 570 return $ret; 571 } 572 573 /** 574 * @param $index 575 * @param int $loggit 0=no logging, 1=log to Search_Indexer.log, 2=log to Search_Indexer_console.log 576 * @return Search_Indexer 577 */ 578 private function buildIndexer($index, $loggit = 0) 579 { 580 global $prefs; 581 582 $isRepository = $index instanceof Search_Index_QueryRepository; 583 584 if (! $isRepository && method_exists($index, 'getRealIndex')) { 585 $isRepository = $index->getRealIndex() instanceof Search_Index_QueryRepository; 586 } 587 588 if (! $this->isRebuildingNow && $isRepository && $prefs['storedsearch_enabled'] == 'y') { 589 $index = new Search_Index_QueryAlertDecorator($index); 590 } 591 592 if (! empty($prefs['unified_excluded_categories'])) { 593 $index = new Search_Index_CategoryFilterDecorator( 594 $index, 595 array_filter( 596 array_map( 597 'intval', 598 $prefs['unified_excluded_categories'] 599 ) 600 ) 601 ); 602 } 603 604 $logWriter = null; 605 606 if ($loggit) { 607 $logWriter = new Zend\Log\Writer\Stream($this->getLogFilename($loggit), 'w'); 608 } 609 610 $indexer = new Search_Indexer($index, $logWriter); 611 $this->addSources($indexer, 'indexing'); 612 613 if ($prefs['unified_tokenize_version_numbers'] == 'y') { 614 $indexer->addContentFilter(new Search_ContentFilter_VersionNumber); 615 } 616 617 return $indexer; 618 } 619 620 public function getDocuments($type, $object) 621 { 622 $indexer = $this->buildIndexer($this->getIndex()); 623 return $indexer->getDocuments($type, $object); 624 } 625 626 public function getAvailableFields() 627 { 628 $indexer = $this->buildIndexer($this->getIndex()); 629 return $indexer->getAvailableFields(); 630 } 631 632 /** 633 * @param Search_Indexer $aggregator 634 * @param string $mode 635 */ 636 private function addSources($aggregator, $mode = 'indexing') 637 { 638 global $prefs; 639 640 $types = $this->getSupportedTypes(); 641 642 // Content Sources 643 if (isset($types['trackeritem'])) { 644 $aggregator->addContentSource('trackeritem', new Search_ContentSource_TrackerItemSource($mode)); 645 $aggregator->addContentSource('tracker', new Search_ContentSource_TrackerSource); 646 $aggregator->addContentSource('trackerfield', new Search_ContentSource_TrackerFieldSource); 647 } 648 649 if (isset($types['forum post'])) { 650 $aggregator->addContentSource('forum post', new Search_ContentSource_ForumPostSource); 651 $aggregator->addContentSource('forum', new Search_ContentSource_ForumSource); 652 } 653 654 if (isset($types['blog post'])) { 655 $aggregator->addContentSource('blog post', new Search_ContentSource_BlogPostSource); 656 } 657 658 if (isset($types['article'])) { 659 $articleSource = new Search_ContentSource_ArticleSource; 660 $aggregator->addContentSource('article', $articleSource); 661 $aggregator->addGlobalSource(new Search_GlobalSource_ArticleAttachmentSource($articleSource)); 662 } 663 664 if (isset($types['file'])) { 665 $fileSource = new Search_ContentSource_FileSource; 666 $aggregator->addContentSource('file', $fileSource); 667 $aggregator->addContentSource('file gallery', new Search_ContentSource_FileGallerySource); 668 $aggregator->addGlobalSource(new Search_GlobalSource_FileAttachmentSource($fileSource)); 669 } 670 671 if (isset($types['sheet'])) { 672 $aggregator->addContentSource('sheet', new Search_ContentSource_SheetSource); 673 } 674 675 if (isset($types['comment'])) { 676 $commentTypes = []; 677 if ($prefs['feature_wiki_comments'] == 'y') { 678 $commentTypes[] = 'wiki page'; 679 } 680 if ($prefs['feature_article_comments'] == 'y') { 681 $commentTypes[] = 'article'; 682 } 683 if ($prefs['feature_poll_comments'] == 'y') { 684 $commentTypes[] = 'poll'; 685 } 686 if ($prefs['feature_file_galleries_comments'] == 'y') { 687 $commentTypes[] = 'file gallery'; 688 } 689 if ($prefs['feature_trackers'] == 'y') { 690 $commentTypes[] = 'trackeritem'; 691 } 692 693 $aggregator->addContentSource('comment', new Search_ContentSource_CommentSource($commentTypes)); 694 $aggregator->addGlobalSource(new Search_GlobalSource_CommentSource); 695 } 696 697 if (isset($types['user'])) { 698 $aggregator->addContentSource('user', new Search_ContentSource_UserSource($prefs['user_in_search_result'])); 699 } 700 701 if (isset($types['group'])) { 702 $aggregator->addContentSource('group', new Search_ContentSource_GroupSource); 703 } 704 705 if (isset($types['calendar'])) { 706 $aggregator->addContentSource('calendaritem', new Search_ContentSource_CalendarItemSource()); 707 $aggregator->addContentSource('calendar', new Search_ContentSource_CalendarSource()); 708 } 709 710 if ($prefs['activity_custom_events'] == 'y' || $prefs['activity_basic_events'] == 'y' || $prefs['monitor_enabled'] == 'y') { 711 $aggregator->addContentSource('activity', new Search_ContentSource_ActivityStreamSource($aggregator instanceof Search_Indexer ? $aggregator : null)); 712 } 713 714 if ($prefs['goal_enabled'] == 'y') { 715 $aggregator->addContentSource('goalevent', new Search_ContentSource_GoalEventSource); 716 } 717 718 if ($prefs['feature_webservices'] === 'y') { 719 $aggregator->addContentSource('webservice', new Search_ContentSource_WebserviceSource()); 720 } 721 722 if (isset($types['wiki page'])) { 723 $aggregator->addContentSource('wiki page', new Search_ContentSource_WikiSource); 724 } 725 726 // Global Sources 727 if ($prefs['feature_categories'] == 'y') { 728 $aggregator->addGlobalSource(new Search_GlobalSource_CategorySource); 729 $aggregator->addContentSource('category', new Search_ContentSource_CategorySource); 730 } 731 732 if ($prefs['feature_freetags'] == 'y') { 733 $aggregator->addGlobalSource(new Search_GlobalSource_FreeTagSource); 734 } 735 736 if ($prefs['rating_advanced'] == 'y' && $mode == 'indexing') { 737 $aggregator->addGlobalSource(new Search_GlobalSource_AdvancedRatingSource($prefs['rating_recalculation'] == 'indexing')); 738 } 739 740 $aggregator->addGlobalSource(new Search_GlobalSource_Geolocation); 741 742 if ($prefs['feature_search_show_visit_count'] === 'y') { 743 $aggregator->addGlobalSource(new Search_GlobalSource_VisitsSource); 744 } 745 746 if ($prefs['feature_friends'] === 'y') { 747 $aggregator->addGlobalSource(new Search_GlobalSource_SocialSource); 748 } 749 750 if ($mode == 'indexing') { 751 $aggregator->addGlobalSource(new Search_GlobalSource_PermissionSource(Perms::getInstance())); 752 $aggregator->addGlobalSource(new Search_GlobalSource_RelationSource); 753 } 754 755 $aggregator->addGlobalSource(new Search_GlobalSource_TitleInitialSource); 756 $aggregator->addGlobalSource(new Search_GlobalSource_SearchableSource); 757 $aggregator->addGlobalSource(new Search_GlobalSource_UrlSource); 758 } 759 760 /** 761 * @return Search_Index_Interface 762 */ 763 public function getIndex($indexType = 'data', $useCache = true) 764 { 765 global $prefs, $tiki_p_admin; 766 767 if (isset($this->indices[$indexType]) && $useCache) { 768 return $this->indices[$indexType]; 769 } 770 771 $writeMode = false; 772 if ($indexType == 'data-write') { 773 $indexType = 'data'; 774 $writeMode = true; 775 } 776 777 $engine = $prefs['unified_engine']; 778 $fallbackMySQL = false; 779 780 if ($engine == 'lucene') { 781 ZendSearch\Lucene\Lucene::setTermsPerQueryLimit($prefs['unified_lucene_terms_limit']); 782 $index = new Search_Lucene_Index($this->getIndexLocation($indexType), $prefs['language'], $prefs['unified_lucene_highlight'] == 'y'); 783 $index->setCache(TikiLib::lib('cache')); 784 $index->setMaxResults($prefs['unified_lucene_max_result']); 785 $index->setResultSetLimit($prefs['unified_lucene_max_resultset_limit']); 786 787 return $index; 788 } 789 790 if ($engine == 'elastic' && $index = $this->getIndexLocation($indexType)) { 791 $connection = $this->getElasticConnection($writeMode); 792 if ($connection->getStatus()->status === 200) { 793 $index = new Search_Elastic_Index($connection, $index); 794 $index->setCamelCaseEnabled($prefs['unified_elastic_camel_case'] == 'y'); 795 $index->setPossessiveStemmerEnabled($prefs['unified_elastic_possessive_stemmer'] == 'y'); 796 $index->setFacetCount($prefs['search_facet_default_amount']); 797 798 if ($useCache) { 799 $this->indices[$indexType] = $index; 800 } 801 return $index; 802 } 803 804 if ($prefs['unified_elastic_mysql_search_fallback'] === 'y') { 805 $fallbackMySQL = true; 806 Feedback::warning(['mes' => tr('Unable to connect to the main search index, MySQL full-text search used, the search results might not be accurate')]); 807 $prefs['unified_incremental_update'] = 'n'; 808 } 809 } 810 811 if (($engine == 'mysql' || $fallbackMySQL) && $index = $this->getIndexLocation($indexType, 'mysql')) { 812 $index = new Search_MySql_Index(TikiDb::get(), $index); 813 814 if ($useCache) { 815 $this->indices[$indexType] = $index; 816 } 817 return $index; 818 } 819 820 // Do nothing, provide a fake index. 821 if ($tiki_p_admin != 'y') { 822 Feedback::error(tr('Contact the site administrator. The index needs rebuilding.')); 823 } else { 824 Feedback::error('<a title="' . tr("Rebuild search index") . '" href="tiki-admin.php?page=search&rebuild=now">' 825 . tr("Click here to rebuild index") . '</a>'); 826 } 827 828 829 return new Search_Index_Memory; 830 } 831 832 public function getEngineInfo() 833 { 834 global $prefs; 835 836 switch ($prefs['unified_engine']) { 837 case 'elastic': 838 $info = []; 839 840 try { 841 $connection = $this->getElasticConnection(true); 842 $root = $connection->rawApi('/'); 843 $info[tr('Client Node')] = $root->name; 844 $info[tr('Elasticsearch Version')] = $root->version->number; 845 $info[tr('Lucene Version')] = $root->version->lucene_version; 846 847 $cluster = $connection->rawApi('/_cluster/health'); 848 $info[tr('Cluster Name')] = $cluster->cluster_name; 849 $info[tr('Cluster Status')] = $cluster->status; 850 $info[tr('Cluster Node Count')] = $cluster->number_of_nodes; 851 852 if (version_compare($root->version->number, '1.0.0') === -1) { 853 $status = $connection->rawApi('/_status'); 854 foreach ($status->indices as $indexName => $data) { 855 if (strpos($indexName, $prefs['unified_elastic_index_prefix']) === 0) { 856 $info[tr('Index %0', $indexName)] = tr( 857 '%0 documents, totaling %1', 858 $data->docs->num_docs, 859 $data->index->primary_size 860 ); 861 } 862 } 863 864 $nodes = $connection->rawApi('/_nodes/jvm/stats'); 865 foreach ($nodes->nodes as $node) { 866 $info[tr('Node %0', $node->name)] = tr('Using %0, since %1', $node->jvm->mem->heap_used, $node->jvm->uptime); 867 } 868 } else { 869 $status = $connection->getIndexStatus(); 870 871 foreach ($status->indices as $indexName => $data) { 872 if (strpos($indexName, $prefs['unified_elastic_index_prefix']) === 0) { 873 if (isset($data->primaries)) { // v2 874 $info[tr('Index %0', $indexName)] = tr( 875 '%0 documents, totaling %1 bytes', 876 $data->primaries->docs->count, 877 number_format($data->primaries->store->size_in_bytes) 878 ); 879 } else { // v1 880 $info[tr('Index %0', $indexName)] = tr( 881 '%0 documents, totaling %1 bytes', 882 $data->docs->num_docs, 883 number_format($data->index->primary_size_in_bytes) 884 ); 885 } 886 } 887 } 888 889 $nodes = $connection->rawApi('/_nodes/stats'); 890 foreach ($nodes->nodes as $node) { 891 $info[tr('Node %0', $node->name)] = tr('Using %0 bytes, since %1', number_format($node->jvm->mem->heap_used_in_bytes), date('Y-m-d H:i:s', $node->jvm->timestamp / 1000)); 892 } 893 894 if (! empty($prefs['unified_field_count'])) { 895 $info[tr('Field Count Tried on Last Rebuild')] = $prefs['unified_field_count']; 896 if ($prefs['unified_field_count'] > $prefs['unified_elastic_field_limit']) { 897 $info[tr('Warning')] = tr('Field limit setting is lower than Tiki needs to store in the index!'); 898 } 899 } 900 } 901 } catch (Search_Elastic_Exception $e) { 902 $info[tr('Information Missing')] = $e->getMessage(); 903 } 904 905 return $info; 906 default: 907 return []; 908 } 909 } 910 911 public function getElasticIndexInfo($indexName) 912 { 913 $connection = $this->getElasticConnection(false); 914 915 try { 916 $mapping = $connection->rawApi("/$indexName/_mapping"); 917 918 return $mapping; 919 } catch (Search_Elastic_Exception $e) { 920 return false; 921 } 922 } 923 924 private function getElasticConnection($useMasterOnly) 925 { 926 global $prefs; 927 static $connections = []; 928 929 $target = $prefs['unified_elastic_url']; 930 931 if (! $useMasterOnly && $prefs['federated_elastic_url']) { 932 $target = $prefs['federated_elastic_url']; 933 } 934 935 if (! empty($connections[$target])) { 936 return $connections[$target]; 937 } 938 939 $connection = new Search_Elastic_Connection($target); 940 $connection->startBulk(); 941 $connection->persistDirty(TikiLib::events()); 942 943 $connections[$target] = $connection; 944 return $connection; 945 } 946 947 /** 948 * @param string $mode 949 * @return Search_Formatter_DataSource_Interface 950 */ 951 public function getDataSource($mode = 'formatting') 952 { 953 global $prefs; 954 955 $dataSource = new Search_Formatter_DataSource_Declarative; 956 957 $this->addSources($dataSource, $mode); 958 959 if ($mode === 'formatting') { 960 if ($prefs['unified_engine'] === 'mysql') { 961 $dataSource->setPrefilter( 962 function ($fields, $entry) { 963 return array_filter( 964 $fields, 965 function ($field) use ($entry) { 966 if (! empty($entry[$field])) { 967 return preg_match('/token[a-z]{20,}/', $entry[$field]); 968 } 969 return true; 970 } 971 ); 972 } 973 ); 974 } elseif ($prefs['unified_engine'] === 'elastic') { 975 $dataSource->setPrefilter( 976 function ($fields, $entry) { 977 return array_filter( 978 $fields, 979 function ($field) use ($entry) { 980 return ! isset($entry[$field]); 981 } 982 ); 983 } 984 ); 985 } 986 } 987 988 return $dataSource; 989 } 990 991 public function getProfileExportHelper() 992 { 993 $helper = new Tiki_Profile_Writer_SearchFieldHelper; 994 $this->addSources($helper, 'indexing'); // Need all fields, so use indexing 995 996 return $helper; 997 } 998 999 /** 1000 * @return Search_Query_WeightCalculator_Field 1001 */ 1002 public function getWeightCalculator() 1003 { 1004 global $prefs; 1005 1006 $lines = explode("\n", $prefs['unified_field_weight']); 1007 1008 $weights = []; 1009 foreach ($lines as $line) { 1010 $parts = explode(':', $line, 2); 1011 if (count($parts) == 2) { 1012 $parts = array_map('trim', $parts); 1013 1014 $weights[$parts[0]] = $parts[1]; 1015 } 1016 } 1017 1018 return new Search_Query_WeightCalculator_Field($weights); 1019 } 1020 1021 public function initQuery(Search_Query $query) 1022 { 1023 $this->initQueryBase($query); 1024 $this->initQueryPermissions($query); 1025 $this->initQueryPresentation($query); 1026 } 1027 1028 public function initQueryBase($query, $applyJail = true) 1029 { 1030 global $prefs; 1031 1032 $query->setWeightCalculator($this->getWeightCalculator()); 1033 $query->setIdentifierFields($prefs['unified_identifier_fields']); 1034 1035 $categlib = TikiLib::lib('categ'); 1036 if ($applyJail && $jail = $categlib->get_jail(false)) { 1037 $query->filterCategory(implode(' or ', $jail), true); 1038 } 1039 } 1040 1041 public function initQueryPermissions($query) 1042 { 1043 global $user; 1044 1045 if (! Perms::get()->admin) { 1046 $query->filterPermissions(Perms::get()->getGroups(), $user); 1047 } 1048 } 1049 1050 public function initQueryPresentation($query) 1051 { 1052 $query->applyTransform(new Search_Formatter_Transform_DynamicLoader($this->getDataSource('formatting'))); 1053 } 1054 1055 /** 1056 * @param array $filter 1057 * @return Search_Query 1058 */ 1059 public function buildQuery(array $filter, $query = null) 1060 { 1061 if (! $query) { 1062 $query = new Search_Query; 1063 $this->initQuery($query); 1064 } 1065 1066 if (! is_array($filter)) { 1067 throw new Exception('Invalid filter type provided in query. It must be an array.'); 1068 } 1069 1070 if (isset($filter['type']) && $filter['type']) { 1071 $query->filterType($filter['type']); 1072 } 1073 1074 if (isset($filter['categories']) && $filter['categories']) { 1075 $query->filterCategory($filter['categories'], isset($filter['deep'])); 1076 } 1077 1078 if (isset($filter['tags']) && $filter['tags']) { 1079 $query->filterTags($filter['tags']); 1080 } 1081 1082 if (isset($filter['content']) && $filter['content']) { 1083 $o = TikiLib::lib('tiki')->get_preference('unified_default_content', ['contents'], true); 1084 if (count($o) == 1 && empty($o[0])) { 1085 // Use "contents" field by default, if no default is specified 1086 $query->filterContent($filter['content'], ['contents']); 1087 } else { 1088 $query->filterContent($filter['content'], $o); 1089 } 1090 } 1091 1092 if (isset($filter['autocomplete']) && $filter['autocomplete']) { 1093 $query->filterInitial($filter['autocomplete']); 1094 } 1095 1096 if (isset($filter['language']) && $filter['language']) { 1097 $q = $filter['language']; 1098 if (preg_match('/^\w+\-\w+$/', $q)) { 1099 $q = "\"$q\""; 1100 } 1101 1102 if (isset($filter['language_unspecified'])) { 1103 $q = "($q) or unknown"; 1104 } 1105 1106 $query->filterLanguage($q); 1107 } 1108 1109 if (isset($filter['groups'])) { 1110 $query->filterMultivalue($filter['groups'], 'groups'); 1111 } 1112 1113 if (isset($filter['prefix']) && is_array($filter['prefix'])) { 1114 foreach ($filter['prefix'] as $field => $prefix) { 1115 $query->filterInitial((string) $prefix, $field); 1116 } 1117 1118 unset($filter['prefix']); 1119 } 1120 1121 if (isset($filter['not_prefix']) && is_array($filter['not_prefix'])) { 1122 foreach ($filter['not_prefix'] as $field => $prefix) { 1123 $query->filterNotInitial((string) $prefix, $field); 1124 } 1125 1126 unset($filter['not_prefix']); 1127 } 1128 1129 if (isset($filter['distance']) && is_array($filter['distance']) && 1130 isset($filter['distance']['distance'], $filter['distance']['lat'], $filter['distance']['lon'])) { 1131 $query->filterDistance($filter['distance']['distance'], $filter['distance']['lat'], $filter['distance']['lon']); 1132 1133 unset($filter['distance']); 1134 } 1135 1136 unset($filter['type']); 1137 unset($filter['categories']); 1138 unset($filter['deep']); 1139 unset($filter['tags']); 1140 unset($filter['content']); 1141 unset($filter['language']); 1142 unset($filter['language_unspecified']); 1143 unset($filter['autocomplete']); 1144 unset($filter['groups']); 1145 1146 foreach ($filter as $key => $value) { 1147 if ($value) { 1148 $query->filterContent($value, $key); 1149 } 1150 } 1151 1152 return $query; 1153 } 1154 1155 public function getFacetProvider() 1156 { 1157 global $prefs; 1158 $types = $this->getSupportedTypes(); 1159 1160 $facets = [ 1161 Search_Query_Facet_Term::fromField('object_type') 1162 ->setLabel(tr('Object Type')) 1163 ->setRenderMap($types), 1164 ]; 1165 1166 if ($prefs['feature_multilingual'] == 'y') { 1167 $facets[] = Search_Query_Facet_Term::fromField('language') 1168 ->setLabel(tr('Language')) 1169 ->setRenderMap(TikiLib::lib('language')->get_language_map()); 1170 } 1171 1172 if ($prefs['search_date_facets'] == 'y') { 1173 $facets[] = Search_Query_Facet_DateHistogram::fromField('date') 1174 ->setName(tr('date_histogram')) 1175 ->setLabel(tr('Date Histogram')) 1176 ->setInterval($prefs['search_date_facets_interval']) 1177 ->setRenderCallback(function ($date) { 1178 $out = TikiLib::lib('tiki')->get_short_date($date / 1000); 1179 return $out; 1180 }); 1181 1182 if ($prefs['search_date_facets_ranges']) { 1183 $facet = Search_Query_Facet_DateRange::fromField('date') 1184 ->setName(tr('date_range')) 1185 ->setLabel(tr('Date Range')) 1186 ->setRenderCallback(function ($label) { 1187 return $label; 1188 }); 1189 1190 $ranges = explode("\n", $prefs['search_date_facets_ranges']); 1191 foreach (array_filter($ranges) as & $range) { 1192 $range = explode(',', $range); 1193 if (count($range) > 2) { 1194 $facet->addRange($range[1], $range[0], $range[2]); 1195 } elseif (count($range) > 1) { 1196 $facet->addRange($range[1], $range[0]); 1197 } 1198 } 1199 1200 1201 $facets[] = $facet; 1202 } 1203 } 1204 1205 if ($prefs['federated_enabled'] === 'y') { 1206 $tiki_extwiki = TikiDb::get()->table('tiki_extwiki'); 1207 1208 $indexMap = [ 1209 $this->getIndexLocation() => tr('Local Search'), 1210 ]; 1211 1212 foreach (TikiLib::lib('federatedsearch')->getIndices() as $indexname => $index) { 1213 $indexMap[$indexname] = $tiki_extwiki->fetchOne('name', [ 1214 'indexname' => $indexname, 1215 ]); 1216 } 1217 1218 $facets[] = Search_Query_Facet_Term::fromField('_index') 1219 ->setLabel(tr('Federated Search')) 1220 ->setRenderCallback(function ($index) use (&$indexMap) { 1221 $out = tr('Index not found'); 1222 if (isset($indexMap[$index])) { 1223 $out = $indexMap[$index]; 1224 } else { 1225 foreach ($indexMap as $candidate => $name) { 1226 if (0 === strpos($index, $candidate . '_')) { 1227 $indicesMap[$index] = $name; 1228 $out = $name; 1229 break; 1230 } 1231 } 1232 } 1233 return $out; 1234 }); 1235 } 1236 1237 $provider = new Search_FacetProvider; 1238 $provider->addFacets($facets); 1239 $this->addSources($provider); 1240 1241 return $provider; 1242 } 1243 1244 public function getRawArray($document) 1245 { 1246 return array_map(function ($entry) { 1247 if (is_object($entry)) { 1248 if (method_exists($entry, 'getRawValue')) { 1249 return $entry->getRawValue(); 1250 } else { 1251 return $entry->getValue(); 1252 } 1253 } else { 1254 return $entry; 1255 } 1256 }, $document); 1257 } 1258 1259 public function isOutdated() 1260 { 1261 1262 global $prefs; 1263 1264 // If incremental update is enabled we cannot rely on the unified_last_rebuild date. 1265 if ($prefs['feature_search'] == 'n' || $prefs['unified_incremental_update'] == 'y') { 1266 return false; 1267 } 1268 1269 $tikilib = TikiLib::lib('tiki'); 1270 1271 $last_rebuild = $tikilib->get_preference('unified_last_rebuild'); 1272 $threshold = strtotime('+ ' . $prefs['search_index_outdated'] . ' days', $last_rebuild); 1273 1274 $types = $this->getSupportedTypes(); 1275 1276 // Content Sources 1277 if (isset($types['wiki page'])) { 1278 $last_page = $tikilib->list_pages(0, 1, 'lastModif_desc', '', '', true, false, false, false); 1279 if (! empty($last_page['data'][0]['lastModif']) && $last_page['data'][0]['lastModif'] > $threshold) { 1280 return true; 1281 } 1282 } 1283 1284 if (isset($types['forum post'])) { 1285 $commentslib = TikiLib::lib('comments'); 1286 1287 $last_forum_post = $commentslib->get_all_comments('forum', 0, -1, 'commentDate_desc'); 1288 if (! empty($last_forum_post['data'][0]['commentDate']) && $last_forum_post['data'][0]['commentDate'] > $threshold) { 1289 return true; 1290 } 1291 1292 $last_forum = $commentslib->list_forums(0, 1, 'created_desc'); 1293 if (! empty($last_forum['data'][0]['created']) && $last_forum['data'][0]['created'] > $threshold) { 1294 return true; 1295 } 1296 } 1297 1298 if (isset($types['blog post'])) { 1299 $last_blog_post = Tikilib::lib('blog')->list_blog_posts(0, false, 0, 1, 'lastModif_desc'); 1300 if (! empty($last_blog_post['data'][0]['lastModif']) && $last_blog_post['data'][0]['lastModif'] > $threshold) { 1301 return true; 1302 } 1303 } 1304 1305 if (isset($types['article'])) { 1306 $last_article = Tikilib::lib('art')->list_articles(0, 1, 'lastModif_desc'); 1307 if (! empty($last_article['data'][0]['lastModif']) && $last_article['data'][0]['lastModif'] > $threshold) { 1308 return true; 1309 } 1310 } 1311 1312 if (isset($types['file'])) { 1313 // todo: files are indexed automatically, probably nothing to do here. 1314 } 1315 1316 if (isset($types['trackeritem'])) { 1317 $trackerlib = TikiLib::lib('trk'); 1318 1319 $last_tracker_item = $trackerlib->list_tracker_items(-1, 0, 1, 'lastModif_desc', null); 1320 if (! empty($last_tracker_item['data'][0]['lastModif']) && $last_tracker_item['data'][0]['lastModif'] > $threshold) { 1321 return true; 1322 } 1323 1324 $last_tracker = $trackerlib->list_trackers(0, 1, 'lastModif_desc'); 1325 if (! empty($last_tracker['data'][0]['lastModif']) && $last_tracker['data'][0]['lastModif'] > $threshold) { 1326 return true; 1327 } 1328 1329 // todo: Missing tracker_fields 1330 } 1331 1332 if (isset($types['sheet'])) { 1333 $sheetlib = TikiLib::lib('sheet'); 1334 1335 $last_sheet = $sheetlib->list_sheets(0, 1, 'begin_desc'); 1336 if (! empty($last_sheet['data'][0]['begin']) && $last_sheet['data'][0]['begin'] > $threshold) { 1337 return true; 1338 } 1339 } 1340 1341 if (isset($types['comment'])) { 1342 $commentTypes = []; 1343 if ($prefs['feature_wiki_comments'] == 'y') { 1344 $commentTypes[] = 'wiki page'; 1345 } 1346 if ($prefs['feature_article_comments'] == 'y') { 1347 $commentTypes[] = 'article'; 1348 } 1349 if ($prefs['feature_poll_comments'] == 'y') { 1350 $commentTypes[] = 'poll'; 1351 } 1352 if ($prefs['feature_file_galleries_comments'] == 'y') { 1353 $commentTypes[] = 'file gallery'; 1354 } 1355 if ($prefs['feature_trackers'] == 'y') { 1356 $commentTypes[] = 'trackeritem'; 1357 } 1358 1359 $commentslib = TikiLib::lib('comments'); 1360 1361 $last_comment = $commentslib->get_all_comments($commentTypes, 0, 1, 'commentDate_desc'); 1362 if (! empty($last_comment['data'][0]['commentDate']) && $last_comment['data'][0]['commentDate'] > $threshold) { 1363 return true; 1364 } 1365 } 1366 1367 if (isset($types['user'])) { 1368 $userlib = TikiLib::lib('user'); 1369 1370 $last_user = $userlib->get_users(0, 1, 'created_desc'); 1371 if (! empty($last_user['data'][0]['created']) && $last_user['data'][0]['created'] > $threshold) { 1372 return true; 1373 } 1374 } 1375 1376 if (isset($types['group'])) { 1377 // todo: unable to track groups by dates 1378 } 1379 } 1380 1381 /** 1382 * Provide the name of the log file 1383 * 1384 * @param int $rebuildType 0: no log, 1: browser rebuild, 2: console rebuild 1385 * @return string 1386 */ 1387 public function getLogFilename($rebuildType = 0): string 1388 { 1389 global $prefs; 1390 1391 $logName = 'Search_Indexer'; 1392 1393 switch ($prefs['unified_engine']) { 1394 case 'elastic': 1395 $logName .= '_elastic_' . rtrim($prefs['unified_elastic_index_prefix'], '_'); 1396 break; 1397 case 'mysql': 1398 $logName .= '_mysql_' . TikiDb::get()->getOne('SELECT DATABASE()'); 1399 break; 1400 case 'lucene': 1401 $logName .= '_lucene'; 1402 break; 1403 } 1404 if ($rebuildType == 2) { 1405 $logName .= '_console'; 1406 } 1407 $logName = $prefs['tmpDir'] . (substr($prefs['tmpDir'], -1) === '/' ? '' : '/') . $logName . '.log'; 1408 return $logName; 1409 } 1410 1411 /** 1412 * Return the fallback search engine name 1413 * 1414 * @return array|null 1415 */ 1416 public function getFallbackEngineDetails() 1417 { 1418 global $prefs, $tikilib; 1419 1420 if ($prefs['unified_engine'] == 'elastic' && $prefs['unified_elastic_mysql_search_fallback'] === 'y') { 1421 $engine = 'mysql'; 1422 $engineName = 'MySQL'; 1423 $version = $tikilib->getMySQLVersion(); 1424 $index = $prefs['unified_mysql_index_current']; 1425 1426 return [$engine, $engineName, $version, $index]; 1427 } 1428 1429 return null; 1430 } 1431} 1432