1<?php 2// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project 3// 4// All Rights Reserved. See copyright.txt for details and a complete list of authors. 5// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details. 6// $Id$ 7 8use Search_Expr_Token as Token; 9use Search_Expr_And as AndX; 10use Search_Expr_Or as OrX; 11use Search_Expr_Not as NotX; 12use Search_Expr_Range as Range; 13use Search_Expr_Initial as Initial; 14use Search_Expr_MoreLikeThis as MoreLikeThis; 15use Search_Expr_ImplicitPhrase as ImplicitPhrase; 16use Search_Expr_ExplicitPhrase as ExplicitPhrase; 17 18/** 19 * The rescore query builder generates a list of match_phrase queries to re-score 20 * the first few results based on the phrase proximities. 21 */ 22class Search_Elastic_RescoreQueryBuilder 23{ 24 private $factory; 25 private $documentReader; 26 private $accumulate; 27 28 function __construct() 29 { 30 $this->factory = new Search_Elastic_TypeFactory; 31 $this->documentReader = function ($type, $object) { 32 return null; 33 }; 34 } 35 36 function build(Search_Expr_Interface $expr) 37 { 38 $this->accumulate = []; 39 40 $expr->walk($this); 41 42 $query = [ 43 'rescore' => [ 44 'window_size' => 50, 45 'query' => [ 46 'rescore_query' => [ 47 'bool' => [ 48 'should' => array_values($this->accumulate), 49 ], 50 ], 51 ], 52 ], 53 ]; 54 55 return $query; 56 } 57 58 function setDocumentReader($callback) 59 { 60 $this->documentReader = $callback; 61 } 62 63 /** 64 * Used when a negation, or a more complete phrase makes a subtree irrelevant 65 */ 66 private function cancelNode($node) 67 { 68 $node->walk(function ($node) { 69 $hash = spl_object_hash($node); 70 unset($this->accumulate[$hash]); 71 }); 72 } 73 74 private function addPhrase($node, $field = null, $phrase = null) 75 { 76 $field = $field ?: $node->getField(); 77 $phrase = $phrase ?: $this->getTerm($node); 78 79 $boost = $node->getWeight(); 80 81 $this->cancelNode($node); 82 83 $hash = spl_object_hash($node); 84 $this->accumulate[$hash] = [ 85 'match_phrase' => [ 86 $field => [ 87 'query' => $phrase, 88 'boost' => $boost, 89 'slop' => 50, 90 ], 91 ], 92 ]; 93 } 94 95 function __invoke($node, $childNodes) 96 { 97 if ($node instanceof ExplicitPhrase) { 98 $type = $node->getType(); 99 if ($type == 'plaintext') { 100 $this->addPhrase($node); 101 } 102 return $node; 103 } elseif ($node instanceof Token) { 104 $type = $node->getType(); 105 if ($type == 'plaintext') { 106 $this->addPhrase($node); 107 } 108 return $node; 109 } elseif ($node instanceof ImplicitPhrase) { 110 $first = reset($childNodes); 111 if ($first && $first instanceof Token) { 112 $firstType = $first->getType(); 113 $firstField = $first->getField(); 114 $terms = []; 115 foreach ($childNodes as $child) { 116 if ($child instanceof Token && $firstType == $child->getType() && $firstField == $child->getField()) { 117 $terms[] = $this->getTerm($child); 118 } 119 } 120 121 if (count($terms) == count($childNodes)) { 122 $this->addPhrase($node, $firstField, implode(' ', $terms)); 123 } 124 } 125 126 return $node; 127 } elseif ($node instanceof NotX) { 128 $this->cancelNode($node); 129 } 130 } 131 132 private function getTerm($node) 133 { 134 $value = $node->getValue($this->factory); 135 return mb_strtolower($value->getValue()); 136 } 137} 138