1<?php
2/**
3 * SQLite search backend, based upon SearchMysql
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Search
22 */
23
24use MediaWiki\MediaWikiServices;
25
26/**
27 * Search engine hook for SQLite
28 * @ingroup Search
29 */
30class SearchSqlite extends SearchDatabase {
31	/**
32	 * Whether fulltext search is supported by current schema
33	 * @return bool
34	 */
35	private function fulltextSearchSupported() {
36		$dbr = $this->lb->getMaintenanceConnectionRef( DB_REPLICA );
37		$sql = (string)$dbr->selectField(
38			$dbr->addIdentifierQuotes( 'sqlite_master' ),
39			'sql',
40			[ 'tbl_name' => $dbr->tableName( 'searchindex', 'raw' ) ],
41			__METHOD__
42		);
43
44		return ( stristr( $sql, 'fts' ) !== false );
45	}
46
47	/**
48	 * Parse the user's query and transform it into an SQL fragment which will
49	 * become part of a WHERE clause
50	 *
51	 * @param string $filteredText
52	 * @param bool $fulltext
53	 * @return string
54	 */
55	private function parseQuery( $filteredText, $fulltext ) {
56		$lc = $this->legalSearchChars( self::CHARS_NO_SYNTAX ); // Minus syntax chars (" and *)
57		$searchon = '';
58		$this->searchTerms = [];
59
60		$m = [];
61		if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
62				$filteredText, $m, PREG_SET_ORDER ) ) {
63			foreach ( $m as $bits ) {
64				Wikimedia\suppressWarnings();
65				list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
66				Wikimedia\restoreWarnings();
67
68				if ( $nonQuoted != '' ) {
69					$term = $nonQuoted;
70					$quote = '';
71				} else {
72					$term = str_replace( '"', '', $term );
73					$quote = '"';
74				}
75
76				if ( $searchon !== '' ) {
77					$searchon .= ' ';
78				}
79
80				// Some languages such as Serbian store the input form in the search index,
81				// so we may need to search for matches in multiple writing system variants.
82
83				$converter = MediaWikiServices::getInstance()->getLanguageConverterFactory()
84					->getLanguageConverter();
85				$convertedVariants = $converter->autoConvertToAllVariants( $term );
86				if ( is_array( $convertedVariants ) ) {
87					$variants = array_unique( array_values( $convertedVariants ) );
88				} else {
89					$variants = [ $term ];
90				}
91
92				// The low-level search index does some processing on input to work
93				// around problems with minimum lengths and encoding in MySQL's
94				// fulltext engine.
95				// For Chinese this also inserts spaces between adjacent Han characters.
96				$strippedVariants = array_map(
97					[ MediaWikiServices::getInstance()->getContentLanguage(),
98						'normalizeForSearch' ],
99					$variants );
100
101				// Some languages such as Chinese force all variants to a canonical
102				// form when stripping to the low-level search index, so to be sure
103				// let's check our variants list for unique items after stripping.
104				$strippedVariants = array_unique( $strippedVariants );
105
106				$searchon .= $modifier;
107				if ( count( $strippedVariants ) > 1 ) {
108					$searchon .= '(';
109				}
110				foreach ( $strippedVariants as $stripped ) {
111					if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
112						// Hack for Chinese: we need to toss in quotes for
113						// multiple-character phrases since normalizeForSearch()
114						// added spaces between them to make word breaks.
115						$stripped = '"' . trim( $stripped ) . '"';
116					}
117					$searchon .= "$quote$stripped$quote$wildcard ";
118				}
119				if ( count( $strippedVariants ) > 1 ) {
120					$searchon .= ')';
121				}
122
123				// Match individual terms or quoted phrase in result highlighting...
124				// Note that variants will be introduced in a later stage for highlighting!
125				$regexp = $this->regexTerm( $term, $wildcard );
126				$this->searchTerms[] = $regexp;
127			}
128
129		} else {
130			wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'" );
131		}
132
133		$dbr = $this->lb->getConnectionRef( DB_REPLICA );
134		$searchon = $dbr->addQuotes( $searchon );
135		$field = $this->getIndexField( $fulltext );
136
137		return " $field MATCH $searchon ";
138	}
139
140	private function regexTerm( $string, $wildcard ) {
141		$regex = preg_quote( $string, '/' );
142		if ( MediaWikiServices::getInstance()->getContentLanguage()->hasWordBreaks() ) {
143			if ( $wildcard ) {
144				// Don't cut off the final bit!
145				$regex = "\b$regex";
146			} else {
147				$regex = "\b$regex\b";
148			}
149		} else {
150			// For Chinese, words may legitimately abut other words in the text literal.
151			// Don't add \b boundary checks... note this could cause false positives
152			// for Latin chars.
153		}
154		return $regex;
155	}
156
157	public function legalSearchChars( $type = self::CHARS_ALL ) {
158		$searchChars = parent::legalSearchChars( $type );
159		if ( $type === self::CHARS_ALL ) {
160			// " for phrase, * for wildcard
161			$searchChars = "\"*" . $searchChars;
162		}
163		return $searchChars;
164	}
165
166	/**
167	 * Perform a full text search query and return a result set.
168	 *
169	 * @param string $term Raw search term
170	 * @return SqlSearchResultSet|null
171	 */
172	protected function doSearchTextInDB( $term ) {
173		return $this->searchInternal( $term, true );
174	}
175
176	/**
177	 * Perform a title-only search query and return a result set.
178	 *
179	 * @param string $term Raw search term
180	 * @return SqlSearchResultSet|null
181	 */
182	protected function doSearchTitleInDB( $term ) {
183		return $this->searchInternal( $term, false );
184	}
185
186	protected function searchInternal( $term, $fulltext ) {
187		if ( !$this->fulltextSearchSupported() ) {
188			return null;
189		}
190
191		$filteredTerm =
192			$this->filter( MediaWikiServices::getInstance()->getContentLanguage()->lc( $term ) );
193		$dbr = $this->lb->getConnectionRef( DB_REPLICA );
194		$resultSet = $dbr->query( $this->getQuery( $filteredTerm, $fulltext ), __METHOD__ );
195
196		$total = null;
197		$totalResult = $dbr->query( $this->getCountQuery( $filteredTerm, $fulltext ), __METHOD__ );
198		$row = $totalResult->fetchObject();
199		if ( $row ) {
200			$total = intval( $row->c );
201		}
202		$totalResult->free();
203
204		return new SqlSearchResultSet( $resultSet, $this->searchTerms, $total );
205	}
206
207	/**
208	 * Return a partial WHERE clause to limit the search to the given namespaces
209	 * @return string
210	 */
211	private function queryNamespaces() {
212		if ( $this->namespaces === null ) {
213			return '';  # search all
214		}
215		if ( $this->namespaces === [] ) {
216			$namespaces = '0';
217		} else {
218			$dbr = $this->lb->getConnectionRef( DB_REPLICA );
219			$namespaces = $dbr->makeList( $this->namespaces );
220		}
221		return 'AND page_namespace IN (' . $namespaces . ')';
222	}
223
224	/**
225	 * Returns a query with limit for number of results set.
226	 * @param string $sql
227	 * @return string
228	 */
229	private function limitResult( $sql ) {
230		$dbr = $this->lb->getConnectionRef( DB_REPLICA );
231
232		return $dbr->limitResult( $sql, $this->limit, $this->offset );
233	}
234
235	/**
236	 * Construct the full SQL query to do the search.
237	 * The guts shoulds be constructed in queryMain()
238	 * @param string $filteredTerm
239	 * @param bool $fulltext
240	 * @return string
241	 */
242	private function getQuery( $filteredTerm, $fulltext ) {
243		return $this->limitResult(
244			$this->queryMain( $filteredTerm, $fulltext ) . ' ' .
245			$this->queryNamespaces()
246		);
247	}
248
249	/**
250	 * Picks which field to index on, depending on what type of query.
251	 * @param bool $fulltext
252	 * @return string
253	 */
254	private function getIndexField( $fulltext ) {
255		return $fulltext ? 'si_text' : 'si_title';
256	}
257
258	/**
259	 * Get the base part of the search query.
260	 *
261	 * @param string $filteredTerm
262	 * @param bool $fulltext
263	 * @return string
264	 */
265	private function queryMain( $filteredTerm, $fulltext ) {
266		$match = $this->parseQuery( $filteredTerm, $fulltext );
267		$dbr = $this->lb->getMaintenanceConnectionRef( DB_REPLICA );
268		$page = $dbr->tableName( 'page' );
269		$searchindex = $dbr->tableName( 'searchindex' );
270		return "SELECT $searchindex.rowid, page_namespace, page_title " .
271			"FROM $page,$searchindex " .
272			"WHERE page_id=$searchindex.rowid AND $match";
273	}
274
275	private function getCountQuery( $filteredTerm, $fulltext ) {
276		$match = $this->parseQuery( $filteredTerm, $fulltext );
277		$dbr = $this->lb->getMaintenanceConnectionRef( DB_REPLICA );
278		$page = $dbr->tableName( 'page' );
279		$searchindex = $dbr->tableName( 'searchindex' );
280		return "SELECT COUNT(*) AS c " .
281			"FROM $page,$searchindex " .
282			"WHERE page_id=$searchindex.rowid AND $match " .
283			$this->queryNamespaces();
284	}
285
286	/**
287	 * Create or update the search index record for the given page.
288	 * Title and text should be pre-processed.
289	 *
290	 * @param int $id
291	 * @param string $title
292	 * @param string $text
293	 */
294	public function update( $id, $title, $text ) {
295		if ( !$this->fulltextSearchSupported() ) {
296			return;
297		}
298		// @todo find a method to do it in a single request,
299		// couldn't do it so far due to typelessness of FTS3 tables.
300		$dbw = $this->lb->getConnectionRef( DB_PRIMARY );
301		$dbw->delete( 'searchindex', [ 'rowid' => $id ], __METHOD__ );
302		$dbw->insert( 'searchindex',
303			[
304				'rowid' => $id,
305				'si_title' => $title,
306				'si_text' => $text
307			], __METHOD__ );
308	}
309
310	/**
311	 * Update a search index record's title only.
312	 * Title should be pre-processed.
313	 *
314	 * @param int $id
315	 * @param string $title
316	 */
317	public function updateTitle( $id, $title ) {
318		if ( !$this->fulltextSearchSupported() ) {
319			return;
320		}
321
322		$dbw = $this->lb->getConnectionRef( DB_PRIMARY );
323		$dbw->update( 'searchindex',
324			[ 'si_title' => $title ],
325			[ 'rowid' => $id ],
326			__METHOD__ );
327	}
328}
329