1<?php
2/**
3 * Zend Framework (http://framework.zend.com/)
4 *
5 * @link      http://github.com/zendframework/zf2 for the canonical source repository
6 * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
7 * @license   http://framework.zend.com/license/new-bsd New BSD License
8 * @package   Zend_Search
9 */
10
11namespace ZendSearch\Lucene\Search\Query;
12
13use ZendSearch\Lucene;
14use ZendSearch\Lucene\Document;
15use ZendSearch\Lucene\Search\Highlighter\DefaultHighlighter;
16use ZendSearch\Lucene\Search\Highlighter\HighlighterInterface as Highlighter;
17
18/**
19 * @category   Zend
20 * @package    Zend_Search_Lucene
21 * @subpackage Search
22 */
23abstract class AbstractQuery
24{
25    /**
26     * query boost factor
27     *
28     * @var float
29     */
30    private $_boost = 1;
31
32    /**
33     * AbstractQuery weight
34     *
35     * @var \ZendSearch\Lucene\Search\Weight\AbstractWeight
36     */
37    protected $_weight = null;
38
39    /**
40     * Gets the boost for this clause.  Documents matching
41     * this clause will (in addition to the normal weightings) have their score
42     * multiplied by boost.   The boost is 1.0 by default.
43     *
44     * @return float
45     */
46    public function getBoost()
47    {
48        return $this->_boost;
49    }
50
51    /**
52     * Sets the boost for this query clause to $boost.
53     *
54     * @param float $boost
55     */
56    public function setBoost($boost)
57    {
58        $this->_boost = $boost;
59    }
60
61    /**
62     * Score specified document
63     *
64     * @param integer $docId
65     * @param \ZendSearch\Lucene\SearchIndexInterface $reader
66     * @return float
67     */
68    abstract public function score($docId, Lucene\SearchIndexInterface $reader);
69
70    /**
71     * Get document ids likely matching the query
72     *
73     * It's an array with document ids as keys (performance considerations)
74     *
75     * @return array
76     */
77    abstract public function matchedDocs();
78
79    /**
80     * Execute query in context of index reader
81     * It also initializes necessary internal structures
82     *
83     * AbstractQuery specific implementation
84     *
85     * @param \ZendSearch\Lucene\SearchIndexInterface $reader
86     * @param \ZendSearch\Lucene\Index\DocsFilter|null $docsFilter
87     */
88    abstract public function execute(Lucene\SearchIndexInterface $reader, $docsFilter = null);
89
90    /**
91     * Constructs an appropriate Weight implementation for this query.
92     *
93     * @param \ZendSearch\Lucene\SearchIndexInterface $reader
94     * @return \ZendSearch\Lucene\Search\Weight\AbstractWeight
95     */
96    abstract public function createWeight(Lucene\SearchIndexInterface $reader);
97
98    /**
99     * Constructs an initializes a Weight for a _top-level_query_.
100     *
101     * @param \ZendSearch\Lucene\SearchIndexInterface $reader
102     */
103    protected function _initWeight(Lucene\SearchIndexInterface $reader)
104    {
105        // Check, that it's a top-level query and query weight is not initialized yet.
106        if ($this->_weight !== null) {
107            return $this->_weight;
108        }
109
110        $this->createWeight($reader);
111        $sum = $this->_weight->sumOfSquaredWeights();
112        $queryNorm = $reader->getSimilarity()->queryNorm($sum);
113        $this->_weight->normalize($queryNorm);
114    }
115
116    /**
117     * Re-write query into primitive queries in the context of specified index
118     *
119     * @param \ZendSearch\Lucene\SearchIndexInterface $index
120     * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
121     */
122    abstract public function rewrite(Lucene\SearchIndexInterface $index);
123
124    /**
125     * Optimize query in the context of specified index
126     *
127     * @param \ZendSearch\Lucene\SearchIndexInterface $index
128     * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
129     */
130    abstract public function optimize(Lucene\SearchIndexInterface $index);
131
132    /**
133     * Reset query, so it can be reused within other queries or
134     * with other indeces
135     */
136    public function reset()
137    {
138        $this->_weight = null;
139    }
140
141
142    /**
143     * Print a query
144     *
145     * @return string
146     */
147    abstract public function __toString();
148
149    /**
150     * Return query terms
151     *
152     * @return array
153     */
154    abstract public function getQueryTerms();
155
156    /**
157     * AbstractQuery specific matches highlighting
158     *
159     * @param Highlighter $highlighter  Highlighter object (also contains doc for highlighting)
160     */
161    abstract protected function _highlightMatches(Highlighter $highlighter);
162
163    /**
164     * Highlight matches in $inputHTML
165     *
166     * @param string $inputHTML
167     * @param string  $defaultEncoding   HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
168     * @param Highlighter|null $highlighter
169     * @return string
170     */
171    public function highlightMatches($inputHTML, $defaultEncoding = '', $highlighter = null)
172    {
173        if ($highlighter === null) {
174            $highlighter = new DefaultHighlighter();
175        }
176
177        $doc = Document\HTML::loadHTML($inputHTML, false, $defaultEncoding);
178        $highlighter->setDocument($doc);
179
180        $this->_highlightMatches($highlighter);
181
182        return $doc->getHTML();
183    }
184
185    /**
186     * Highlight matches in $inputHTMLFragment and return it (without HTML header and body tag)
187     *
188     * @param string $inputHTMLFragment
189     * @param string  $encoding   Input HTML string encoding
190     * @param Highlighter|null $highlighter
191     * @return string
192     */
193    public function htmlFragmentHighlightMatches($inputHTMLFragment, $encoding = 'UTF-8', $highlighter = null)
194    {
195        if ($highlighter === null) {
196            $highlighter = new DefaultHighlighter();
197        }
198
199        $inputHTML = '<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
200                   . iconv($encoding, 'UTF-8//IGNORE', $inputHTMLFragment) . '</body></html>';
201
202        $doc = Document\HTML::loadHTML($inputHTML);
203        $highlighter->setDocument($doc);
204
205        $this->_highlightMatches($highlighter);
206
207        return $doc->getHTMLBody();
208    }
209}
210