1<?php
2// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project
3//
4// All Rights Reserved. See copyright.txt for details and a complete list of authors.
5// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details.
6// $Id$
7
8use Search_Expr_Token as Token;
9use Search_Expr_And as AndX;
10use Search_Expr_Or as OrX;
11use Search_Expr_Not as NotX;
12use Search_Expr_Range as Range;
13use Search_Expr_Initial as Initial;
14use Search_Expr_MoreLikeThis as MoreLikeThis;
15use Search_Expr_ImplicitPhrase as ImplicitPhrase;
16use Search_Expr_ExplicitPhrase as ExplicitPhrase;
17
18/**
19 * The rescore query builder generates a list of match_phrase queries to re-score
20 * the first few results based on the phrase proximities.
21 */
22class Search_Elastic_RescoreQueryBuilder
23{
24	private $factory;
25	private $documentReader;
26	private $accumulate;
27
28	function __construct()
29	{
30		$this->factory = new Search_Elastic_TypeFactory;
31		$this->documentReader = function ($type, $object) {
32			return null;
33		};
34	}
35
36	function build(Search_Expr_Interface $expr)
37	{
38		$this->accumulate = [];
39
40		$expr->walk($this);
41
42		$query = [
43			'rescore' => [
44				'window_size' => 50,
45				'query' => [
46					'rescore_query' => [
47						'bool' => [
48							'should' => array_values($this->accumulate),
49						],
50					],
51				],
52			],
53		];
54
55		return $query;
56	}
57
58	function setDocumentReader($callback)
59	{
60		$this->documentReader = $callback;
61	}
62
63	/**
64	 * Used when a negation, or a more complete phrase makes a subtree irrelevant
65	 */
66	private function cancelNode($node)
67	{
68		$node->walk(function ($node) {
69			$hash = spl_object_hash($node);
70			unset($this->accumulate[$hash]);
71		});
72	}
73
74	private function addPhrase($node, $field = null, $phrase = null)
75	{
76		$field = $field ?: $node->getField();
77		$phrase = $phrase ?: $this->getTerm($node);
78
79		$boost = $node->getWeight();
80
81		$this->cancelNode($node);
82
83		$hash = spl_object_hash($node);
84		$this->accumulate[$hash] = [
85			'match_phrase' => [
86				$field => [
87					'query' => $phrase,
88					'boost' => $boost,
89					'slop' => 50,
90				],
91			],
92		];
93	}
94
95	function __invoke($node, $childNodes)
96	{
97		if ($node instanceof ExplicitPhrase) {
98			$type = $node->getType();
99			if ($type == 'plaintext') {
100				$this->addPhrase($node);
101			}
102			return $node;
103		} elseif ($node instanceof Token) {
104			$type = $node->getType();
105			if ($type == 'plaintext') {
106				$this->addPhrase($node);
107			}
108			return $node;
109		} elseif ($node instanceof ImplicitPhrase) {
110			$first = reset($childNodes);
111			if ($first && $first instanceof Token) {
112				$firstType = $first->getType();
113				$firstField = $first->getField();
114				$terms = [];
115				foreach ($childNodes as $child) {
116					if ($child instanceof Token && $firstType == $child->getType() && $firstField == $child->getField()) {
117						$terms[] = $this->getTerm($child);
118					}
119				}
120
121				if (count($terms) == count($childNodes)) {
122					$this->addPhrase($node, $firstField, implode(' ', $terms));
123				}
124			}
125
126			return $node;
127		} elseif ($node instanceof NotX) {
128			$this->cancelNode($node);
129		}
130	}
131
132	private function getTerm($node)
133	{
134		$value = $node->getValue($this->factory);
135		return mb_strtolower($value->getValue());
136	}
137}
138