1<?php
2// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project
3//
4// All Rights Reserved. See copyright.txt for details and a complete list of authors.
5// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details.
6// $Id$
7
8/**
9 *
10 */
11class UnifiedSearchLib
12{
13	const INCREMENT_QUEUE = 'search-increment';
14	const INCREMENT_QUEUE_REBUILD = 'search-increment-rebuild';
15
16	private $batchToken;
17	private $isRebuildingNow = false;
18	private $indices;
19
20	/**
21	 * @return string
22	 */
23	public function startBatch()
24	{
25		if (! $this->batchToken) {
26			$this->batchToken = uniqid();
27			return $this->batchToken;
28		}
29	}
30
31	/**
32	 * @param $token
33	 * @param int $count
34	 */
35	public function endBatch($token, $count = 100)
36	{
37		if ($token && $this->batchToken === $token) {
38			$this->batchToken = null;
39			$previousLoopCount = null;
40			while (($loopCount = $this->getQueueCount()) > 0) {
41				if ($previousLoopCount !== null && $previousLoopCount >= $loopCount) {
42					break; // avoid to be blocked in loops if messages can not be processed
43				}
44				$previousLoopCount = $loopCount;
45				$this->processUpdateQueue($count);
46			}
47			return true;
48		}
49
50		return false;
51	}
52
53	/**
54	 * @param int $count
55	 */
56	public function processUpdateQueue($count = 10)
57	{
58		global $prefs;
59		if (! isset($prefs['unified_engine'])) {
60			return;
61		}
62
63		if ($this->batchToken) {
64			return;
65		}
66
67		$queuelib = TikiLib::lib('queue');
68		$toProcess = $queuelib->pull(self::INCREMENT_QUEUE, $count);
69		if ($this->rebuildInProgress()) {
70			// Requeue to add to new index too (that is rebuilding)
71			$queuelib->pushAll(self::INCREMENT_QUEUE_REBUILD, $toProcess);
72		}
73		$access = TikiLib::lib('access');
74		$access->preventRedirect(true);
75
76		if (count($toProcess)) {
77			$indexer = null;
78			try {
79				// Since the object being updated may have category changes during the update,
80				// make sure internal permission cache does not refer to the pre-update situation.
81				Perms::getInstance()->clear();
82
83				$index = $this->getIndex('data-write');
84				$index = new Search_Index_TypeAnalysisDecorator($index);
85				$indexer = $this->buildIndexer($index);
86				$indexer->update($toProcess);
87
88				if ($prefs['storedsearch_enabled'] == 'y') {
89					// Stored search relation adding may cause residual index backlog
90					$toProcess = $queuelib->pull(self::INCREMENT_QUEUE, $count);
91					$indexer->update($toProcess);
92				}
93
94				// Detect newly created identifier fields
95				$initial = array_flip($prefs['unified_identifier_fields']);
96				$collected = array_flip($index->getIdentifierFields());
97				$combined = array_merge($initial, $collected);
98
99				// Store preference only on change
100				if (count($combined) > count($initial)) {
101					$tikilib = TikiLib::lib('tiki');
102					$tikilib->set_preference('unified_identifier_fields', array_keys($combined));
103				}
104			} catch (Exception $e) {
105				// Re-queue pulled messages for next update
106				foreach ($toProcess as $message) {
107					$queuelib->push(self::INCREMENT_QUEUE, $message);
108				}
109
110				Feedback::error(
111					tr('The search index could not be updated. The site is misconfigured. Contact an administrator.') .
112					'<br />' . $e->getMessage()
113				);
114			}
115
116			if ($indexer) {
117				$indexer->clearSources();
118			}
119		}
120
121		$access->preventRedirect(false);
122	}
123
124	/**
125	 * @return array
126	 */
127	public function getQueueCount()
128	{
129		$queuelib = TikiLib::lib('queue');
130		return $queuelib->count(self::INCREMENT_QUEUE);
131	}
132
133	/**
134	 * @return bool
135	 */
136	public function rebuildInProgress()
137	{
138		global $prefs;
139		if ($prefs['unified_engine'] == 'lucene') {
140			$new = $this->getIndex('data-new');
141			$old = $this->getIndex('data-old');
142
143			return $new->exists() || $old->exists();
144		} elseif ($prefs['unified_engine'] == 'elastic') {
145			$name = $this->getIndexLocation('data');
146			$connection = $this->getElasticConnection(true);
147			return $connection->isRebuilding($name);
148		} elseif ($prefs['unified_engine'] == 'mysql') {
149			$lockName = TikiLib::lib('tiki')->get_preference('unified_mysql_index_rebuilding');
150			return empty($lockName) ? false : TikiDb::get()->isLocked($lockName);
151		}
152
153		return false;
154	}
155
156	/**
157	 */
158	public function stopRebuild()
159	{
160		global $prefs;
161		if ($prefs['unified_engine'] == 'lucene') {
162			$this->getIndex('data-old')->destroy();
163			$this->getIndex('data-new')->destroy();
164		}
165	}
166
167	/**
168	 * @param int  $loggit   0=no logging, 1=log to Search_Indexer.log, 2=log to Search_Indexer_console.log
169	 * @param bool $fallback If the fallback index is being rebuild
170	 * @param Symfony\Component\Console\Helper\ProgressBar $progress progress bar object from rebuild console command
171	 *
172	 * @return array|bool
173	 * @throws Exception
174	 */
175	public function rebuild($loggit = 0, $fallback = false, $progress = null)
176	{
177		global $prefs;
178		$engineResults = null;
179
180		$tikilib = TikiLib::lib('tiki');
181
182		switch ($prefs['unified_engine']) {
183			case 'lucene':
184				$index_location = $this->getIndexLocation('data');
185				$tempName = $this->getIndexLocation('data-new');
186				$swapName = $this->getIndexLocation('data-old');
187
188				if ($this->rebuildInProgress()) {
189					Feedback::error(tr('Rebuild in progress.'));
190					return false;
191				}
192
193				$index = new Search_Lucene_Index($tempName);
194
195				TikiLib::events()->bind(
196					'tiki.process.shutdown',
197					function () use ($index) {
198						if ($index->exists()) {
199							$index->destroy();
200							echo "Abnormal termination. Unless it was killed manually, it likely ran out of memory.\n";
201						}
202					}
203				);
204				break;
205			case 'elastic':
206				$connection = $this->getElasticConnection(true);
207				$aliasName = $prefs['unified_elastic_index_prefix'] . 'main';
208				$indexName = $aliasName . '_' . uniqid();
209				$index = new Search_Elastic_Index($connection, $indexName);
210				$engineResults = new Search_EngineResult_Elastic($index);
211				$index->setCamelCaseEnabled($prefs['unified_elastic_camel_case'] == 'y');
212
213				TikiLib::events()->bind(
214					'tiki.process.shutdown',
215					function () use ($indexName, $index) {
216						global $prefs;
217						if ($prefs['unified_elastic_index_current'] !== $indexName) {
218							$index->destroy();
219						}
220					}
221				);
222				break;
223			case 'mysql':
224				$indexName = 'index_' . uniqid();
225				$index = new Search_MySql_Index(TikiDb::get(), $indexName);
226				$engineResults = new Search_EngineResult_MySQL($index);
227				$tikilib->set_preference('unified_mysql_index_rebuilding', $indexName);
228				TikiDb::get()->getLock($indexName);
229
230				TikiLib::events()->bind(
231					'tiki.process.shutdown',
232					function () use ($indexName, $index) {
233						global $prefs;
234						if ($prefs['unified_mysql_index_current'] !== $indexName) {
235							$index->destroy();
236						}
237					}
238				);
239				break;
240			default:
241				die('Unsupported');
242		}
243
244		// Build in -new
245		if (! $fallback) {
246			TikiLib::lib('queue')->clear(self::INCREMENT_QUEUE);
247			TikiLib::lib('queue')->clear(self::INCREMENT_QUEUE_REBUILD);
248		}
249
250		$access = TikiLib::lib('access');
251		$access->preventRedirect(true);
252
253		$this->isRebuildingNow = true;
254
255		$stat = [];
256		$indexer = null;
257		try {
258			$index = new Search_Index_TypeAnalysisDecorator($index);
259			$indexer = $this->buildIndexer($index, $loggit);
260			$lastStats = $tikilib->get_preference('unified_last_rebuild_stats', [], true);
261
262			$stat = $tikilib->allocate_extra(
263				'unified_rebuild',
264				function () use ($indexer, $lastStats, $progress) {
265					return $indexer->rebuild($lastStats, $progress);
266				}
267			);
268
269			$stat['total tiki fields indexed'] = $index->getFieldCount();
270
271			if (! is_null($engineResults)) {
272				$fieldsCount = $engineResults->getEngineFieldsCount();
273
274				if ($fieldsCount !== $stat['total tiki fields indexed']) {
275					$stat['total fields used in the ' . $prefs['unified_engine'] . ' search index: '] = $engineResults->getEngineFieldsCount();
276				}
277			}
278
279			$tikilib->set_preference('unified_field_count', $index->getFieldCount());
280			$tikilib->set_preference('unified_identifier_fields', $index->getIdentifierFields());
281		} catch (Exception $e) {
282			Feedback::error(tr('The search index could not be rebuilt.') . '<br />' . $e->getMessage());
283		}
284
285		$stats = [];
286		$stats['default'] = $stat;
287
288		// Force destruction to clear locks
289		if ($indexer) {
290			$indexer->clearSources();
291			unset($indexer);
292		}
293
294		unset($index);
295
296		$oldIndex = null;
297		switch ($prefs['unified_engine']) {
298			case 'lucene':
299				// Current to -old
300				if (file_exists($index_location)) {
301					if (! rename($index_location, $swapName)) {
302						Feedback::error(tr('The active index could not be removed, probably due to a file permission issue.'));
303					}
304				}
305				// -new to current
306				if (! rename($tempName, $index_location)) {
307					Feedback::error(tr('The new index could not be made active, probably due to a file permission issue.'));
308				}
309
310				// Destroy old
311				$oldIndex = new Search_Lucene_Index($swapName);
312				break;
313			case 'elastic':
314				$oldIndex = null; // assignAlias will handle the clean-up
315				$tikilib->set_preference('unified_elastic_index_current', $indexName);
316
317				$connection->assignAlias($aliasName, $indexName);
318
319				break;
320			case 'mysql':
321				// Obtain the old index and destroy it after permanently replacing it.
322				$oldIndex = $this->getIndex('data', false);
323
324				$tikilib->set_preference('unified_mysql_index_current', $indexName);
325				TikiDb::get()->releaseLock($indexName);
326
327				break;
328		}
329
330		if ($oldIndex) {
331			if (! $oldIndex->destroy()) {
332				Feedback::error(tr('Failed to delete the old index.'));
333			}
334		}
335
336		if ($fallback) {
337			// Fallback index was rebuilt. Proceed with default index operations
338			return $stats['default'];
339		}
340
341		// Rebuild mysql as fallback for elasticsearch engine
342		list($fallbackEngine) = TikiLib::lib('unifiedsearch')->getFallbackEngineDetails();
343		if (! $fallback && $fallbackEngine) {
344			$defaultEngine = $prefs['unified_engine'];
345			$prefs['unified_engine'] = $fallbackEngine;
346			$stats['fallback'] = $this->rebuild($loggit, true);
347			$prefs['unified_engine'] = $defaultEngine;
348		}
349
350		// Requeue messages that were added and processed in old index,
351		// while rebuilding the new index
352		$queueLib = TikiLib::lib('queue');
353		$toProcess = $queueLib->pull(
354			self::INCREMENT_QUEUE_REBUILD,
355			$queueLib->count(self::INCREMENT_QUEUE_REBUILD)
356		);
357		$queueLib->pushAll(self::INCREMENT_QUEUE, $toProcess);
358
359		// Process the documents updated while we were processing the update
360		$this->processUpdateQueue(1000);
361
362		if ($prefs['storedsearch_enabled'] == 'y') {
363			TikiLib::lib('storedsearch')->reloadAll();
364		}
365
366		$tikilib->set_preference('unified_last_rebuild', $tikilib->now);
367		$tikilib->set_preference('unified_last_rebuild_stats', $stats);
368
369		$this->isRebuildingNow = false;
370		$access->preventRedirect(false);
371
372		return $stats;
373	}
374
375	/**
376	 * Return the current engine for unified search, version and current index name/table
377	 * @return array
378	 */
379	public function getCurrentEngineDetails()
380	{
381		global $prefs;
382		global $tikilib;
383
384		switch ($prefs['unified_engine']) {
385			case 'lucene':
386				$engine = 'Lucene';
387				$version = '';
388				$index = $prefs['unified_lucene_location'];
389				break;
390			case 'elastic':
391				$elasticsearch = new \Search_Elastic_Connection($prefs['unified_elastic_url']);
392				$engine = 'Elastic';
393				$version = $elasticsearch->getVersion();
394				$index = $prefs['unified_elastic_index_current'];
395				break;
396			case 'mysql':
397				$engine = 'MySQL';
398				$version = $tikilib->getMySQLVersion();
399				$index = $prefs['unified_mysql_index_current'];
400				break;
401			default:
402				$engine = '';
403				$version = '';
404				$index = '';
405				break;
406		}
407
408		return [$engine, $version, $index];
409	}
410
411	/**
412	 * Get the index location depending on $tikidomain for multi-tiki
413	 *
414	 * @param string $indexType
415	 * @param string $engine If not set, it uses default unified search engine
416	 * @return string    path to index directory
417	 * @throws Exception
418	 */
419	private function getIndexLocation($indexType = 'data', $engine = null)
420	{
421		global $prefs, $tikidomain;
422		$mapping = [
423			'lucene' => [
424				'data' => $prefs['unified_lucene_location'],
425				'data-old' => $prefs['unified_lucene_location'] . '-old',
426				'data-new' => $prefs['unified_lucene_location'] . '-new',
427				'preference' => $prefs['tmpDir'] . '/unified-preference-index-' . $prefs['language'],
428			],
429			'elastic' => [
430				'data' => $prefs['unified_elastic_index_prefix'] . 'main',
431				'preference' => $prefs['unified_elastic_index_prefix'] . 'pref_' . $prefs['language'],
432			],
433			'mysql' => [
434				'data' => $prefs['unified_mysql_index_current'],
435				'preference' => 'index_' . 'pref_' . $prefs['language'],
436			],
437		];
438
439		$engine = $engine ?: $prefs['unified_engine'];
440
441		if (isset($mapping[$engine][$indexType])) {
442			$index = $mapping[$engine][$indexType];
443
444			if ($engine == 'lucene' && ! empty($tikidomain)) {
445				$temp = $prefs['tmpDir'];
446				if (strpos($index, $tikidomain) === false && strpos($index, "$temp/") === 0) {
447					$index = str_replace("$temp/", "$temp/$tikidomain/", $index);
448				}
449			}
450
451			return $index;
452		} else {
453			throw new Exception('Internal: Invalid index requested: ' . $indexType);
454		}
455	}
456
457	/**
458	 * @param $type
459	 * @param $objectId
460	 */
461	public function invalidateObject($type, $objectId)
462	{
463		TikiLib::lib('queue')->push(
464			self::INCREMENT_QUEUE,
465			[
466				'object_type' => $type,
467				'object_id' => $objectId
468			]
469		);
470	}
471
472	/**
473	 * @return array
474	 */
475	public function getSupportedTypes()
476	{
477		global $prefs;
478		$types = [];
479
480		if ($prefs['feature_wiki'] == 'y') {
481			$types['wiki page'] = tra('wiki page');
482		}
483
484		if ($prefs['feature_blogs'] == 'y') {
485			$types['blog post'] = tra('blog post');
486		}
487
488		if ($prefs['feature_articles'] == 'y') {
489			$types['article'] = tra('article');
490		}
491
492		if ($prefs['feature_file_galleries'] == 'y') {
493			$types['file'] = tra('file');
494			$types['file gallery'] = tra('file gallery');
495		}
496
497		if ($prefs['feature_forums'] == 'y') {
498			$types['forum post'] = tra('forum post');
499			$types['forum'] = tra('forum');
500		}
501
502		if ($prefs['feature_trackers'] == 'y') {
503			$types['trackeritem'] = tra('tracker item');
504			$types['tracker'] = tra('tracker');
505			$types['trackerfield'] = tra('tracker field');
506		}
507
508		if ($prefs['feature_sheet'] == 'y') {
509			$types['sheet'] = tra('sheet');
510		}
511
512		if ($prefs['feature_wiki_comments'] == 'y'
513			|| $prefs['feature_article_comments'] == 'y'
514			|| $prefs['feature_poll_comments'] == 'y'
515			|| $prefs['feature_file_galleries_comments'] == 'y'
516			|| $prefs['feature_trackers'] == 'y'
517		) {
518			$types['comment'] = tra('comment');
519		}
520
521		if ($prefs['feature_categories'] === 'y') {
522			$types['category'] = tra('category');
523		}
524
525		if ($prefs['feature_webservices'] === 'y') {
526			$types['webservice'] = tra('webservice');
527		}
528
529		if ($prefs['activity_basic_events'] === 'y' || $prefs['activity_custom_events'] === 'y') {
530			$types['activity'] = tra('activity');
531		}
532
533		if ($prefs['feature_calendar'] === 'y') {
534			$types['calendaritem'] = tra('calendar item');
535			$types['calendar'] = tra('calendar');
536		}
537
538		$types['user'] = tra('user');
539		$types['group'] = tra('group');
540
541		return $types;
542	}
543
544
545	public function getLastLogItem()
546	{
547		global $prefs;
548		$files['web'] = $this->getLogFilename(1);
549		$files['console'] = $this->getLogFilename(2);
550		foreach ($files as $type => $file) {
551			if ($fp = @fopen($file, "r")) {
552				$pos = -2;
553				$t = " ";
554				while ($t != "\n") {
555					if (! fseek($fp, $pos, SEEK_END)) {
556						$t = fgetc($fp);
557						$pos = $pos - 1;
558					} else {
559						rewind($fp);
560						break;
561					}
562				}
563				$t = fgets($fp);
564				fclose($fp);
565				$ret[$type] = $t;
566			} else {
567				$ret[$type] = '';
568			}
569		}
570		return $ret;
571	}
572
573	/**
574	 * @param $index
575	 * @param int $loggit 0=no logging, 1=log to Search_Indexer.log, 2=log to Search_Indexer_console.log
576	 * @return Search_Indexer
577	 */
578	private function buildIndexer($index, $loggit = 0)
579	{
580		global $prefs;
581
582		$isRepository = $index instanceof Search_Index_QueryRepository;
583
584		if (! $isRepository && method_exists($index, 'getRealIndex')) {
585			$isRepository = $index->getRealIndex() instanceof Search_Index_QueryRepository;
586		}
587
588		if (! $this->isRebuildingNow && $isRepository && $prefs['storedsearch_enabled'] == 'y') {
589			$index = new Search_Index_QueryAlertDecorator($index);
590		}
591
592		if (! empty($prefs['unified_excluded_categories'])) {
593			$index = new Search_Index_CategoryFilterDecorator(
594				$index,
595				array_filter(
596					array_map(
597						'intval',
598						$prefs['unified_excluded_categories']
599					)
600				)
601			);
602		}
603
604		$logWriter = null;
605
606		if ($loggit) {
607			$logWriter = new Zend\Log\Writer\Stream($this->getLogFilename($loggit), 'w');
608		}
609
610		$indexer = new Search_Indexer($index, $logWriter);
611		$this->addSources($indexer, 'indexing');
612
613		if ($prefs['unified_tokenize_version_numbers'] == 'y') {
614			$indexer->addContentFilter(new Search_ContentFilter_VersionNumber);
615		}
616
617		return $indexer;
618	}
619
620	public function getDocuments($type, $object)
621	{
622		$indexer = $this->buildIndexer($this->getIndex());
623		return $indexer->getDocuments($type, $object);
624	}
625
626	public function getAvailableFields()
627	{
628		$indexer = $this->buildIndexer($this->getIndex());
629		return $indexer->getAvailableFields();
630	}
631
632	/**
633	 * @param Search_Indexer $aggregator
634	 * @param string $mode
635	 */
636	private function addSources($aggregator, $mode = 'indexing')
637	{
638		global $prefs;
639
640		$types = $this->getSupportedTypes();
641
642		// Content Sources
643		if (isset($types['trackeritem'])) {
644			$aggregator->addContentSource('trackeritem', new Search_ContentSource_TrackerItemSource($mode));
645			$aggregator->addContentSource('tracker', new Search_ContentSource_TrackerSource);
646			$aggregator->addContentSource('trackerfield', new Search_ContentSource_TrackerFieldSource);
647		}
648
649		if (isset($types['forum post'])) {
650			$aggregator->addContentSource('forum post', new Search_ContentSource_ForumPostSource);
651			$aggregator->addContentSource('forum', new Search_ContentSource_ForumSource);
652		}
653
654		if (isset($types['blog post'])) {
655			$aggregator->addContentSource('blog post', new Search_ContentSource_BlogPostSource);
656		}
657
658		if (isset($types['article'])) {
659			$articleSource = new Search_ContentSource_ArticleSource;
660			$aggregator->addContentSource('article', $articleSource);
661			$aggregator->addGlobalSource(new Search_GlobalSource_ArticleAttachmentSource($articleSource));
662		}
663
664		if (isset($types['file'])) {
665			$fileSource = new Search_ContentSource_FileSource;
666			$aggregator->addContentSource('file', $fileSource);
667			$aggregator->addContentSource('file gallery', new Search_ContentSource_FileGallerySource);
668			$aggregator->addGlobalSource(new Search_GlobalSource_FileAttachmentSource($fileSource));
669		}
670
671		if (isset($types['sheet'])) {
672			$aggregator->addContentSource('sheet', new Search_ContentSource_SheetSource);
673		}
674
675		if (isset($types['comment'])) {
676			$commentTypes = [];
677			if ($prefs['feature_wiki_comments'] == 'y') {
678				$commentTypes[] = 'wiki page';
679			}
680			if ($prefs['feature_article_comments'] == 'y') {
681				$commentTypes[] = 'article';
682			}
683			if ($prefs['feature_poll_comments'] == 'y') {
684				$commentTypes[] = 'poll';
685			}
686			if ($prefs['feature_file_galleries_comments'] == 'y') {
687				$commentTypes[] = 'file gallery';
688			}
689			if ($prefs['feature_trackers'] == 'y') {
690				$commentTypes[] = 'trackeritem';
691			}
692
693			$aggregator->addContentSource('comment', new Search_ContentSource_CommentSource($commentTypes));
694			$aggregator->addGlobalSource(new Search_GlobalSource_CommentSource);
695		}
696
697		if (isset($types['user'])) {
698			$aggregator->addContentSource('user', new Search_ContentSource_UserSource($prefs['user_in_search_result']));
699		}
700
701		if (isset($types['group'])) {
702			$aggregator->addContentSource('group', new Search_ContentSource_GroupSource);
703		}
704
705		if (isset($types['calendar'])) {
706			$aggregator->addContentSource('calendaritem', new Search_ContentSource_CalendarItemSource());
707			$aggregator->addContentSource('calendar', new Search_ContentSource_CalendarSource());
708		}
709
710		if ($prefs['activity_custom_events'] == 'y' || $prefs['activity_basic_events'] == 'y' || $prefs['monitor_enabled'] == 'y') {
711			$aggregator->addContentSource('activity', new Search_ContentSource_ActivityStreamSource($aggregator instanceof Search_Indexer ? $aggregator : null));
712		}
713
714		if ($prefs['goal_enabled'] == 'y') {
715			$aggregator->addContentSource('goalevent', new Search_ContentSource_GoalEventSource);
716		}
717
718		if ($prefs['feature_webservices'] === 'y') {
719			$aggregator->addContentSource('webservice', new Search_ContentSource_WebserviceSource());
720		}
721
722		if (isset($types['wiki page'])) {
723			$aggregator->addContentSource('wiki page', new Search_ContentSource_WikiSource);
724		}
725
726		// Global Sources
727		if ($prefs['feature_categories'] == 'y') {
728			$aggregator->addGlobalSource(new Search_GlobalSource_CategorySource);
729			$aggregator->addContentSource('category', new Search_ContentSource_CategorySource);
730		}
731
732		if ($prefs['feature_freetags'] == 'y') {
733			$aggregator->addGlobalSource(new Search_GlobalSource_FreeTagSource);
734		}
735
736		if ($prefs['rating_advanced'] == 'y' && $mode == 'indexing') {
737			$aggregator->addGlobalSource(new Search_GlobalSource_AdvancedRatingSource($prefs['rating_recalculation'] == 'indexing'));
738		}
739
740		$aggregator->addGlobalSource(new Search_GlobalSource_Geolocation);
741
742		if ($prefs['feature_search_show_visit_count'] === 'y') {
743			$aggregator->addGlobalSource(new Search_GlobalSource_VisitsSource);
744		}
745
746		if ($prefs['feature_friends'] === 'y') {
747			$aggregator->addGlobalSource(new Search_GlobalSource_SocialSource);
748		}
749
750		if ($mode == 'indexing') {
751			$aggregator->addGlobalSource(new Search_GlobalSource_PermissionSource(Perms::getInstance()));
752			$aggregator->addGlobalSource(new Search_GlobalSource_RelationSource);
753		}
754
755		$aggregator->addGlobalSource(new Search_GlobalSource_TitleInitialSource);
756		$aggregator->addGlobalSource(new Search_GlobalSource_SearchableSource);
757		$aggregator->addGlobalSource(new Search_GlobalSource_UrlSource);
758	}
759
760	/**
761	 * @return Search_Index_Interface
762	 */
763	public function getIndex($indexType = 'data', $useCache = true)
764	{
765		global $prefs, $tiki_p_admin;
766
767		if (isset($this->indices[$indexType]) && $useCache) {
768			return $this->indices[$indexType];
769		}
770
771		$writeMode = false;
772		if ($indexType == 'data-write') {
773			$indexType = 'data';
774			$writeMode = true;
775		}
776
777		$engine = $prefs['unified_engine'];
778		$fallbackMySQL = false;
779
780		if ($engine == 'lucene') {
781			ZendSearch\Lucene\Lucene::setTermsPerQueryLimit($prefs['unified_lucene_terms_limit']);
782			$index = new Search_Lucene_Index($this->getIndexLocation($indexType), $prefs['language'], $prefs['unified_lucene_highlight'] == 'y');
783			$index->setCache(TikiLib::lib('cache'));
784			$index->setMaxResults($prefs['unified_lucene_max_result']);
785			$index->setResultSetLimit($prefs['unified_lucene_max_resultset_limit']);
786
787			return $index;
788		}
789
790		if ($engine == 'elastic' && $index = $this->getIndexLocation($indexType)) {
791			$connection = $this->getElasticConnection($writeMode);
792			if ($connection->getStatus()->status === 200) {
793				$index = new Search_Elastic_Index($connection, $index);
794				$index->setCamelCaseEnabled($prefs['unified_elastic_camel_case'] == 'y');
795				$index->setPossessiveStemmerEnabled($prefs['unified_elastic_possessive_stemmer'] == 'y');
796				$index->setFacetCount($prefs['search_facet_default_amount']);
797
798				if ($useCache) {
799					$this->indices[$indexType] = $index;
800				}
801				return $index;
802			}
803
804			if ($prefs['unified_elastic_mysql_search_fallback'] === 'y') {
805				$fallbackMySQL = true;
806				Feedback::warning(['mes' => tr('Unable to connect to the main search index, MySQL full-text search used, the search results might not be accurate')]);
807				$prefs['unified_incremental_update'] = 'n';
808			}
809		}
810
811		if (($engine == 'mysql' || $fallbackMySQL) && $index = $this->getIndexLocation($indexType, 'mysql')) {
812			$index = new Search_MySql_Index(TikiDb::get(), $index);
813
814			if ($useCache) {
815				$this->indices[$indexType] = $index;
816			}
817			return $index;
818		}
819
820		// Do nothing, provide a fake index.
821		if ($tiki_p_admin != 'y') {
822			Feedback::error(tr('Contact the site administrator. The index needs rebuilding.'));
823		} else {
824			Feedback::error('<a title="' . tr("Rebuild search index") . '" href="tiki-admin.php?page=search&rebuild=now">'
825				. tr("Click here to rebuild index") . '</a>');
826		}
827
828
829		return new Search_Index_Memory;
830	}
831
832	public function getEngineInfo()
833	{
834		global $prefs;
835
836		switch ($prefs['unified_engine']) {
837			case 'elastic':
838				$info = [];
839
840				try {
841					$connection = $this->getElasticConnection(true);
842					$root = $connection->rawApi('/');
843					$info[tr('Client Node')] = $root->name;
844					$info[tr('Elasticsearch Version')] = $root->version->number;
845					$info[tr('Lucene Version')] = $root->version->lucene_version;
846
847					$cluster = $connection->rawApi('/_cluster/health');
848					$info[tr('Cluster Name')] = $cluster->cluster_name;
849					$info[tr('Cluster Status')] = $cluster->status;
850					$info[tr('Cluster Node Count')] = $cluster->number_of_nodes;
851
852					if (version_compare($root->version->number, '1.0.0') === -1) {
853						$status = $connection->rawApi('/_status');
854						foreach ($status->indices as $indexName => $data) {
855							if (strpos($indexName, $prefs['unified_elastic_index_prefix']) === 0) {
856								$info[tr('Index %0', $indexName)] = tr(
857									'%0 documents, totaling %1',
858									$data->docs->num_docs,
859									$data->index->primary_size
860								);
861							}
862						}
863
864						$nodes = $connection->rawApi('/_nodes/jvm/stats');
865						foreach ($nodes->nodes as $node) {
866							$info[tr('Node %0', $node->name)] = tr('Using %0, since %1', $node->jvm->mem->heap_used, $node->jvm->uptime);
867						}
868					} else {
869						$status = $connection->getIndexStatus();
870
871						foreach ($status->indices as $indexName => $data) {
872							if (strpos($indexName, $prefs['unified_elastic_index_prefix']) === 0) {
873								if (isset($data->primaries)) {	// v2
874									$info[tr('Index %0', $indexName)] = tr(
875										'%0 documents, totaling %1 bytes',
876										$data->primaries->docs->count,
877										number_format($data->primaries->store->size_in_bytes)
878									);
879								} else {					// v1
880									$info[tr('Index %0', $indexName)] = tr(
881										'%0 documents, totaling %1 bytes',
882										$data->docs->num_docs,
883										number_format($data->index->primary_size_in_bytes)
884									);
885								}
886							}
887						}
888
889						$nodes = $connection->rawApi('/_nodes/stats');
890						foreach ($nodes->nodes as $node) {
891							$info[tr('Node %0', $node->name)] = tr('Using %0 bytes, since %1', number_format($node->jvm->mem->heap_used_in_bytes), date('Y-m-d H:i:s', $node->jvm->timestamp / 1000));
892						}
893
894						if (! empty($prefs['unified_field_count'])) {
895							$info[tr('Field Count Tried on Last Rebuild')] = $prefs['unified_field_count'];
896							if ($prefs['unified_field_count'] > $prefs['unified_elastic_field_limit']) {
897								$info[tr('Warning')] = tr('Field limit setting is lower than Tiki needs to store in the index!');
898							}
899						}
900					}
901				} catch (Search_Elastic_Exception $e) {
902					$info[tr('Information Missing')] = $e->getMessage();
903				}
904
905				return $info;
906			default:
907				return [];
908		}
909	}
910
911	public function getElasticIndexInfo($indexName)
912	{
913		$connection = $this->getElasticConnection(false);
914
915		try {
916			$mapping = $connection->rawApi("/$indexName/_mapping");
917
918			return $mapping;
919		} catch (Search_Elastic_Exception $e) {
920			return false;
921		}
922	}
923
924	private function getElasticConnection($useMasterOnly)
925	{
926		global $prefs;
927		static $connections = [];
928
929		$target = $prefs['unified_elastic_url'];
930
931		if (! $useMasterOnly && $prefs['federated_elastic_url']) {
932			$target = $prefs['federated_elastic_url'];
933		}
934
935		if (! empty($connections[$target])) {
936			return $connections[$target];
937		}
938
939		$connection = new Search_Elastic_Connection($target);
940		$connection->startBulk();
941		$connection->persistDirty(TikiLib::events());
942
943		$connections[$target] = $connection;
944		return $connection;
945	}
946
947	/**
948	 * @param string $mode
949	 * @return Search_Formatter_DataSource_Interface
950	 */
951	public function getDataSource($mode = 'formatting')
952	{
953		global $prefs;
954
955		$dataSource = new Search_Formatter_DataSource_Declarative;
956
957		$this->addSources($dataSource, $mode);
958
959		if ($mode === 'formatting') {
960			if ($prefs['unified_engine'] === 'mysql') {
961				$dataSource->setPrefilter(
962					function ($fields, $entry) {
963						return array_filter(
964							$fields,
965							function ($field) use ($entry) {
966								if (! empty($entry[$field])) {
967									return preg_match('/token[a-z]{20,}/', $entry[$field]);
968								}
969								return true;
970							}
971						);
972					}
973				);
974			} elseif ($prefs['unified_engine'] === 'elastic') {
975				$dataSource->setPrefilter(
976					function ($fields, $entry) {
977						return array_filter(
978							$fields,
979							function ($field) use ($entry) {
980								return ! isset($entry[$field]);
981							}
982						);
983					}
984				);
985			}
986		}
987
988		return $dataSource;
989	}
990
991	public function getProfileExportHelper()
992	{
993		$helper = new Tiki_Profile_Writer_SearchFieldHelper;
994		$this->addSources($helper, 'indexing'); // Need all fields, so use indexing
995
996		return $helper;
997	}
998
999	/**
1000	 * @return Search_Query_WeightCalculator_Field
1001	 */
1002	public function getWeightCalculator()
1003	{
1004		global $prefs;
1005
1006		$lines = explode("\n", $prefs['unified_field_weight']);
1007
1008		$weights = [];
1009		foreach ($lines as $line) {
1010			$parts = explode(':', $line, 2);
1011			if (count($parts) == 2) {
1012				$parts = array_map('trim', $parts);
1013
1014				$weights[$parts[0]] = $parts[1];
1015			}
1016		}
1017
1018		return new Search_Query_WeightCalculator_Field($weights);
1019	}
1020
1021	public function initQuery(Search_Query $query)
1022	{
1023		$this->initQueryBase($query);
1024		$this->initQueryPermissions($query);
1025		$this->initQueryPresentation($query);
1026	}
1027
1028	public function initQueryBase($query, $applyJail = true)
1029	{
1030		global $prefs;
1031
1032		$query->setWeightCalculator($this->getWeightCalculator());
1033		$query->setIdentifierFields($prefs['unified_identifier_fields']);
1034
1035		$categlib = TikiLib::lib('categ');
1036		if ($applyJail && $jail = $categlib->get_jail(false)) {
1037			$query->filterCategory(implode(' or ', $jail), true);
1038		}
1039	}
1040
1041	public function initQueryPermissions($query)
1042	{
1043		global $user;
1044
1045		if (! Perms::get()->admin) {
1046			$query->filterPermissions(Perms::get()->getGroups(), $user);
1047		}
1048	}
1049
1050	public function initQueryPresentation($query)
1051	{
1052		$query->applyTransform(new Search_Formatter_Transform_DynamicLoader($this->getDataSource('formatting')));
1053	}
1054
1055	/**
1056	 * @param array $filter
1057	 * @return Search_Query
1058	 */
1059	public function buildQuery(array $filter, $query = null)
1060	{
1061		if (! $query) {
1062			$query = new Search_Query;
1063			$this->initQuery($query);
1064		}
1065
1066		if (! is_array($filter)) {
1067			throw new Exception('Invalid filter type provided in query. It must be an array.');
1068		}
1069
1070		if (isset($filter['type']) && $filter['type']) {
1071			$query->filterType($filter['type']);
1072		}
1073
1074		if (isset($filter['categories']) && $filter['categories']) {
1075			$query->filterCategory($filter['categories'], isset($filter['deep']));
1076		}
1077
1078		if (isset($filter['tags']) && $filter['tags']) {
1079			$query->filterTags($filter['tags']);
1080		}
1081
1082		if (isset($filter['content']) && $filter['content']) {
1083			$o = TikiLib::lib('tiki')->get_preference('unified_default_content', ['contents'], true);
1084			if (count($o) == 1 && empty($o[0])) {
1085				// Use "contents" field by default, if no default is specified
1086				$query->filterContent($filter['content'], ['contents']);
1087			} else {
1088				$query->filterContent($filter['content'], $o);
1089			}
1090		}
1091
1092		if (isset($filter['autocomplete']) && $filter['autocomplete']) {
1093			$query->filterInitial($filter['autocomplete']);
1094		}
1095
1096		if (isset($filter['language']) && $filter['language']) {
1097			$q = $filter['language'];
1098			if (preg_match('/^\w+\-\w+$/', $q)) {
1099				$q = "\"$q\"";
1100			}
1101
1102			if (isset($filter['language_unspecified'])) {
1103				$q = "($q) or unknown";
1104			}
1105
1106			$query->filterLanguage($q);
1107		}
1108
1109		if (isset($filter['groups'])) {
1110			$query->filterMultivalue($filter['groups'], 'groups');
1111		}
1112
1113		if (isset($filter['prefix']) && is_array($filter['prefix'])) {
1114			foreach ($filter['prefix'] as $field => $prefix) {
1115				$query->filterInitial((string) $prefix, $field);
1116			}
1117
1118			unset($filter['prefix']);
1119		}
1120
1121		if (isset($filter['not_prefix']) && is_array($filter['not_prefix'])) {
1122			foreach ($filter['not_prefix'] as $field => $prefix) {
1123				$query->filterNotInitial((string) $prefix, $field);
1124			}
1125
1126			unset($filter['not_prefix']);
1127		}
1128
1129		if (isset($filter['distance']) && is_array($filter['distance']) &&
1130					isset($filter['distance']['distance'], $filter['distance']['lat'], $filter['distance']['lon'])) {
1131			$query->filterDistance($filter['distance']['distance'], $filter['distance']['lat'], $filter['distance']['lon']);
1132
1133			unset($filter['distance']);
1134		}
1135
1136		unset($filter['type']);
1137		unset($filter['categories']);
1138		unset($filter['deep']);
1139		unset($filter['tags']);
1140		unset($filter['content']);
1141		unset($filter['language']);
1142		unset($filter['language_unspecified']);
1143		unset($filter['autocomplete']);
1144		unset($filter['groups']);
1145
1146		foreach ($filter as $key => $value) {
1147			if ($value) {
1148				$query->filterContent($value, $key);
1149			}
1150		}
1151
1152		return $query;
1153	}
1154
1155	public function getFacetProvider()
1156	{
1157		global $prefs;
1158		$types = $this->getSupportedTypes();
1159
1160		$facets = [
1161			Search_Query_Facet_Term::fromField('object_type')
1162				->setLabel(tr('Object Type'))
1163				->setRenderMap($types),
1164		];
1165
1166		if ($prefs['feature_multilingual'] == 'y') {
1167			$facets[] = Search_Query_Facet_Term::fromField('language')
1168				->setLabel(tr('Language'))
1169				->setRenderMap(TikiLib::lib('language')->get_language_map());
1170		}
1171
1172		if ($prefs['search_date_facets'] == 'y') {
1173			$facets[] = Search_Query_Facet_DateHistogram::fromField('date')
1174				->setName(tr('date_histogram'))
1175				->setLabel(tr('Date Histogram'))
1176				->setInterval($prefs['search_date_facets_interval'])
1177				->setRenderCallback(function ($date) {
1178					$out = TikiLib::lib('tiki')->get_short_date($date / 1000);
1179					return $out;
1180				});
1181
1182			if ($prefs['search_date_facets_ranges']) {
1183				$facet = Search_Query_Facet_DateRange::fromField('date')
1184					->setName(tr('date_range'))
1185					->setLabel(tr('Date Range'))
1186					->setRenderCallback(function ($label) {
1187						return $label;
1188					});
1189
1190				$ranges = explode("\n", $prefs['search_date_facets_ranges']);
1191				foreach (array_filter($ranges) as & $range) {
1192					$range = explode(',', $range);
1193					if (count($range) > 2) {
1194						$facet->addRange($range[1], $range[0], $range[2]);
1195					} elseif (count($range) > 1) {
1196						$facet->addRange($range[1], $range[0]);
1197					}
1198				}
1199
1200
1201				$facets[] = $facet;
1202			}
1203		}
1204
1205		if ($prefs['federated_enabled'] === 'y') {
1206			$tiki_extwiki = TikiDb::get()->table('tiki_extwiki');
1207
1208			$indexMap = [
1209				$this->getIndexLocation() => tr('Local Search'),
1210			];
1211
1212			foreach (TikiLib::lib('federatedsearch')->getIndices() as $indexname => $index) {
1213				$indexMap[$indexname] = $tiki_extwiki->fetchOne('name', [
1214					'indexname' => $indexname,
1215				]);
1216			}
1217
1218			$facets[] = Search_Query_Facet_Term::fromField('_index')
1219				->setLabel(tr('Federated Search'))
1220				->setRenderCallback(function ($index) use (&$indexMap) {
1221					$out = tr('Index not found');
1222					if (isset($indexMap[$index])) {
1223						$out = $indexMap[$index];
1224					} else {
1225						foreach ($indexMap as $candidate => $name) {
1226							if (0 === strpos($index, $candidate . '_')) {
1227								$indicesMap[$index] = $name;
1228								$out = $name;
1229								break;
1230							}
1231						}
1232					}
1233					return $out;
1234				});
1235		}
1236
1237		$provider = new Search_FacetProvider;
1238		$provider->addFacets($facets);
1239		$this->addSources($provider);
1240
1241		return $provider;
1242	}
1243
1244	public function getRawArray($document)
1245	{
1246		return array_map(function ($entry) {
1247			if (is_object($entry)) {
1248				if (method_exists($entry, 'getRawValue')) {
1249					return $entry->getRawValue();
1250				} else {
1251					return $entry->getValue();
1252				}
1253			} else {
1254				return $entry;
1255			}
1256		}, $document);
1257	}
1258
1259	public function isOutdated()
1260	{
1261
1262		global $prefs;
1263
1264		// If incremental update is enabled we cannot rely on the unified_last_rebuild date.
1265		if ($prefs['feature_search'] == 'n' || $prefs['unified_incremental_update'] == 'y') {
1266			return false;
1267		}
1268
1269		$tikilib = TikiLib::lib('tiki');
1270
1271		$last_rebuild = $tikilib->get_preference('unified_last_rebuild');
1272		$threshold = strtotime('+ ' . $prefs['search_index_outdated'] . ' days', $last_rebuild);
1273
1274		$types = $this->getSupportedTypes();
1275
1276		// Content Sources
1277		if (isset($types['wiki page'])) {
1278			$last_page = $tikilib->list_pages(0, 1, 'lastModif_desc', '', '', true, false, false, false);
1279			if (! empty($last_page['data'][0]['lastModif']) && $last_page['data'][0]['lastModif'] > $threshold) {
1280				return true;
1281			}
1282		}
1283
1284		if (isset($types['forum post'])) {
1285			$commentslib = TikiLib::lib('comments');
1286
1287			$last_forum_post = $commentslib->get_all_comments('forum', 0, -1, 'commentDate_desc');
1288			if (! empty($last_forum_post['data'][0]['commentDate']) && $last_forum_post['data'][0]['commentDate'] > $threshold) {
1289				return true;
1290			}
1291
1292			$last_forum = $commentslib->list_forums(0, 1, 'created_desc');
1293			if (! empty($last_forum['data'][0]['created']) && $last_forum['data'][0]['created'] > $threshold) {
1294				return true;
1295			}
1296		}
1297
1298		if (isset($types['blog post'])) {
1299			$last_blog_post = Tikilib::lib('blog')->list_blog_posts(0, false, 0, 1, 'lastModif_desc');
1300			if (! empty($last_blog_post['data'][0]['lastModif']) && $last_blog_post['data'][0]['lastModif'] > $threshold) {
1301				return true;
1302			}
1303		}
1304
1305		if (isset($types['article'])) {
1306			$last_article = Tikilib::lib('art')->list_articles(0, 1, 'lastModif_desc');
1307			if (! empty($last_article['data'][0]['lastModif']) && $last_article['data'][0]['lastModif'] > $threshold) {
1308				return true;
1309			}
1310		}
1311
1312		if (isset($types['file'])) {
1313			// todo: files are indexed automatically, probably nothing to do here.
1314		}
1315
1316		if (isset($types['trackeritem'])) {
1317			$trackerlib = TikiLib::lib('trk');
1318
1319			$last_tracker_item = $trackerlib->list_tracker_items(-1, 0, 1, 'lastModif_desc', null);
1320			if (! empty($last_tracker_item['data'][0]['lastModif']) && $last_tracker_item['data'][0]['lastModif'] > $threshold) {
1321				return true;
1322			}
1323
1324			$last_tracker = $trackerlib->list_trackers(0, 1, 'lastModif_desc');
1325			if (! empty($last_tracker['data'][0]['lastModif']) && $last_tracker['data'][0]['lastModif'] > $threshold) {
1326				return true;
1327			}
1328
1329			// todo: Missing tracker_fields
1330		}
1331
1332		if (isset($types['sheet'])) {
1333			$sheetlib = TikiLib::lib('sheet');
1334
1335			$last_sheet = $sheetlib->list_sheets(0, 1, 'begin_desc');
1336			if (! empty($last_sheet['data'][0]['begin']) && $last_sheet['data'][0]['begin'] > $threshold) {
1337				return true;
1338			}
1339		}
1340
1341		if (isset($types['comment'])) {
1342			$commentTypes = [];
1343			if ($prefs['feature_wiki_comments'] == 'y') {
1344				$commentTypes[] = 'wiki page';
1345			}
1346			if ($prefs['feature_article_comments'] == 'y') {
1347				$commentTypes[] = 'article';
1348			}
1349			if ($prefs['feature_poll_comments'] == 'y') {
1350				$commentTypes[] = 'poll';
1351			}
1352			if ($prefs['feature_file_galleries_comments'] == 'y') {
1353				$commentTypes[] = 'file gallery';
1354			}
1355			if ($prefs['feature_trackers'] == 'y') {
1356				$commentTypes[] = 'trackeritem';
1357			}
1358
1359			$commentslib = TikiLib::lib('comments');
1360
1361			$last_comment = $commentslib->get_all_comments($commentTypes, 0, 1, 'commentDate_desc');
1362			if (! empty($last_comment['data'][0]['commentDate']) && $last_comment['data'][0]['commentDate'] > $threshold) {
1363				return true;
1364			}
1365		}
1366
1367		if (isset($types['user'])) {
1368			$userlib = TikiLib::lib('user');
1369
1370			$last_user = $userlib->get_users(0, 1, 'created_desc');
1371			if (! empty($last_user['data'][0]['created']) && $last_user['data'][0]['created'] > $threshold) {
1372				return true;
1373			}
1374		}
1375
1376		if (isset($types['group'])) {
1377			// todo: unable to track groups by dates
1378		}
1379	}
1380
1381	/**
1382	 * Provide the name of the log file
1383	 *
1384	 * @param int $rebuildType    0: no log, 1: browser rebuild, 2: console rebuild
1385	 * @return string
1386	 */
1387	public function getLogFilename($rebuildType = 0): string
1388	{
1389		global $prefs;
1390
1391		$logName = 'Search_Indexer';
1392
1393		switch ($prefs['unified_engine']) {
1394			case 'elastic':
1395				$logName .= '_elastic_' . rtrim($prefs['unified_elastic_index_prefix'], '_');
1396				break;
1397			case 'mysql':
1398				$logName .= '_mysql_' . TikiDb::get()->getOne('SELECT DATABASE()');
1399				break;
1400			case 'lucene':
1401				$logName .= '_lucene';
1402				break;
1403		}
1404		if ($rebuildType == 2) {
1405			$logName .= '_console';
1406		}
1407		$logName = $prefs['tmpDir'] . (substr($prefs['tmpDir'], -1) === '/' ? '' : '/') . $logName . '.log';
1408		return $logName;
1409	}
1410
1411	/**
1412	 * Return the fallback search engine name
1413	 *
1414	 * @return array|null
1415	 */
1416	public function getFallbackEngineDetails()
1417	{
1418		global $prefs, $tikilib;
1419
1420		if ($prefs['unified_engine'] == 'elastic' && $prefs['unified_elastic_mysql_search_fallback'] === 'y') {
1421			$engine = 'mysql';
1422			$engineName = 'MySQL';
1423			$version = $tikilib->getMySQLVersion();
1424			$index = $prefs['unified_mysql_index_current'];
1425
1426			return [$engine, $engineName, $version, $index];
1427		}
1428
1429		return null;
1430	}
1431}
1432