1<?php 2/** 3 * SQLite search backend, based upon SearchMysql 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Search 22 */ 23 24use MediaWiki\MediaWikiServices; 25 26/** 27 * Search engine hook for SQLite 28 * @ingroup Search 29 */ 30class SearchSqlite extends SearchDatabase { 31 /** 32 * Whether fulltext search is supported by current schema 33 * @return bool 34 */ 35 private function fulltextSearchSupported() { 36 $dbr = $this->lb->getMaintenanceConnectionRef( DB_REPLICA ); 37 $sql = (string)$dbr->selectField( 38 $dbr->addIdentifierQuotes( 'sqlite_master' ), 39 'sql', 40 [ 'tbl_name' => $dbr->tableName( 'searchindex', 'raw' ) ], 41 __METHOD__ 42 ); 43 44 return ( stristr( $sql, 'fts' ) !== false ); 45 } 46 47 /** 48 * Parse the user's query and transform it into an SQL fragment which will 49 * become part of a WHERE clause 50 * 51 * @param string $filteredText 52 * @param bool $fulltext 53 * @return string 54 */ 55 private function parseQuery( $filteredText, $fulltext ) { 56 $lc = $this->legalSearchChars( self::CHARS_NO_SYNTAX ); // Minus syntax chars (" and *) 57 $searchon = ''; 58 $this->searchTerms = []; 59 60 $m = []; 61 if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', 62 $filteredText, $m, PREG_SET_ORDER ) ) { 63 foreach ( $m as $bits ) { 64 Wikimedia\suppressWarnings(); 65 list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits; 66 Wikimedia\restoreWarnings(); 67 68 if ( $nonQuoted != '' ) { 69 $term = $nonQuoted; 70 $quote = ''; 71 } else { 72 $term = str_replace( '"', '', $term ); 73 $quote = '"'; 74 } 75 76 if ( $searchon !== '' ) { 77 $searchon .= ' '; 78 } 79 80 // Some languages such as Serbian store the input form in the search index, 81 // so we may need to search for matches in multiple writing system variants. 82 83 $converter = MediaWikiServices::getInstance()->getLanguageConverterFactory() 84 ->getLanguageConverter(); 85 $convertedVariants = $converter->autoConvertToAllVariants( $term ); 86 if ( is_array( $convertedVariants ) ) { 87 $variants = array_unique( array_values( $convertedVariants ) ); 88 } else { 89 $variants = [ $term ]; 90 } 91 92 // The low-level search index does some processing on input to work 93 // around problems with minimum lengths and encoding in MySQL's 94 // fulltext engine. 95 // For Chinese this also inserts spaces between adjacent Han characters. 96 $strippedVariants = array_map( 97 [ MediaWikiServices::getInstance()->getContentLanguage(), 98 'normalizeForSearch' ], 99 $variants ); 100 101 // Some languages such as Chinese force all variants to a canonical 102 // form when stripping to the low-level search index, so to be sure 103 // let's check our variants list for unique items after stripping. 104 $strippedVariants = array_unique( $strippedVariants ); 105 106 $searchon .= $modifier; 107 if ( count( $strippedVariants ) > 1 ) { 108 $searchon .= '('; 109 } 110 foreach ( $strippedVariants as $stripped ) { 111 if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) { 112 // Hack for Chinese: we need to toss in quotes for 113 // multiple-character phrases since normalizeForSearch() 114 // added spaces between them to make word breaks. 115 $stripped = '"' . trim( $stripped ) . '"'; 116 } 117 $searchon .= "$quote$stripped$quote$wildcard "; 118 } 119 if ( count( $strippedVariants ) > 1 ) { 120 $searchon .= ')'; 121 } 122 123 // Match individual terms or quoted phrase in result highlighting... 124 // Note that variants will be introduced in a later stage for highlighting! 125 $regexp = $this->regexTerm( $term, $wildcard ); 126 $this->searchTerms[] = $regexp; 127 } 128 129 } else { 130 wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'" ); 131 } 132 133 $dbr = $this->lb->getConnectionRef( DB_REPLICA ); 134 $searchon = $dbr->addQuotes( $searchon ); 135 $field = $this->getIndexField( $fulltext ); 136 137 return " $field MATCH $searchon "; 138 } 139 140 private function regexTerm( $string, $wildcard ) { 141 $regex = preg_quote( $string, '/' ); 142 if ( MediaWikiServices::getInstance()->getContentLanguage()->hasWordBreaks() ) { 143 if ( $wildcard ) { 144 // Don't cut off the final bit! 145 $regex = "\b$regex"; 146 } else { 147 $regex = "\b$regex\b"; 148 } 149 } else { 150 // For Chinese, words may legitimately abut other words in the text literal. 151 // Don't add \b boundary checks... note this could cause false positives 152 // for Latin chars. 153 } 154 return $regex; 155 } 156 157 public function legalSearchChars( $type = self::CHARS_ALL ) { 158 $searchChars = parent::legalSearchChars( $type ); 159 if ( $type === self::CHARS_ALL ) { 160 // " for phrase, * for wildcard 161 $searchChars = "\"*" . $searchChars; 162 } 163 return $searchChars; 164 } 165 166 /** 167 * Perform a full text search query and return a result set. 168 * 169 * @param string $term Raw search term 170 * @return SqlSearchResultSet|null 171 */ 172 protected function doSearchTextInDB( $term ) { 173 return $this->searchInternal( $term, true ); 174 } 175 176 /** 177 * Perform a title-only search query and return a result set. 178 * 179 * @param string $term Raw search term 180 * @return SqlSearchResultSet|null 181 */ 182 protected function doSearchTitleInDB( $term ) { 183 return $this->searchInternal( $term, false ); 184 } 185 186 protected function searchInternal( $term, $fulltext ) { 187 if ( !$this->fulltextSearchSupported() ) { 188 return null; 189 } 190 191 $filteredTerm = 192 $this->filter( MediaWikiServices::getInstance()->getContentLanguage()->lc( $term ) ); 193 $dbr = $this->lb->getConnectionRef( DB_REPLICA ); 194 $resultSet = $dbr->query( $this->getQuery( $filteredTerm, $fulltext ), __METHOD__ ); 195 196 $total = null; 197 $totalResult = $dbr->query( $this->getCountQuery( $filteredTerm, $fulltext ), __METHOD__ ); 198 $row = $totalResult->fetchObject(); 199 if ( $row ) { 200 $total = intval( $row->c ); 201 } 202 $totalResult->free(); 203 204 return new SqlSearchResultSet( $resultSet, $this->searchTerms, $total ); 205 } 206 207 /** 208 * Return a partial WHERE clause to limit the search to the given namespaces 209 * @return string 210 */ 211 private function queryNamespaces() { 212 if ( $this->namespaces === null ) { 213 return ''; # search all 214 } 215 if ( $this->namespaces === [] ) { 216 $namespaces = '0'; 217 } else { 218 $dbr = $this->lb->getConnectionRef( DB_REPLICA ); 219 $namespaces = $dbr->makeList( $this->namespaces ); 220 } 221 return 'AND page_namespace IN (' . $namespaces . ')'; 222 } 223 224 /** 225 * Returns a query with limit for number of results set. 226 * @param string $sql 227 * @return string 228 */ 229 private function limitResult( $sql ) { 230 $dbr = $this->lb->getConnectionRef( DB_REPLICA ); 231 232 return $dbr->limitResult( $sql, $this->limit, $this->offset ); 233 } 234 235 /** 236 * Construct the full SQL query to do the search. 237 * The guts shoulds be constructed in queryMain() 238 * @param string $filteredTerm 239 * @param bool $fulltext 240 * @return string 241 */ 242 private function getQuery( $filteredTerm, $fulltext ) { 243 return $this->limitResult( 244 $this->queryMain( $filteredTerm, $fulltext ) . ' ' . 245 $this->queryNamespaces() 246 ); 247 } 248 249 /** 250 * Picks which field to index on, depending on what type of query. 251 * @param bool $fulltext 252 * @return string 253 */ 254 private function getIndexField( $fulltext ) { 255 return $fulltext ? 'si_text' : 'si_title'; 256 } 257 258 /** 259 * Get the base part of the search query. 260 * 261 * @param string $filteredTerm 262 * @param bool $fulltext 263 * @return string 264 */ 265 private function queryMain( $filteredTerm, $fulltext ) { 266 $match = $this->parseQuery( $filteredTerm, $fulltext ); 267 $dbr = $this->lb->getMaintenanceConnectionRef( DB_REPLICA ); 268 $page = $dbr->tableName( 'page' ); 269 $searchindex = $dbr->tableName( 'searchindex' ); 270 return "SELECT $searchindex.rowid, page_namespace, page_title " . 271 "FROM $page,$searchindex " . 272 "WHERE page_id=$searchindex.rowid AND $match"; 273 } 274 275 private function getCountQuery( $filteredTerm, $fulltext ) { 276 $match = $this->parseQuery( $filteredTerm, $fulltext ); 277 $dbr = $this->lb->getMaintenanceConnectionRef( DB_REPLICA ); 278 $page = $dbr->tableName( 'page' ); 279 $searchindex = $dbr->tableName( 'searchindex' ); 280 return "SELECT COUNT(*) AS c " . 281 "FROM $page,$searchindex " . 282 "WHERE page_id=$searchindex.rowid AND $match " . 283 $this->queryNamespaces(); 284 } 285 286 /** 287 * Create or update the search index record for the given page. 288 * Title and text should be pre-processed. 289 * 290 * @param int $id 291 * @param string $title 292 * @param string $text 293 */ 294 public function update( $id, $title, $text ) { 295 if ( !$this->fulltextSearchSupported() ) { 296 return; 297 } 298 // @todo find a method to do it in a single request, 299 // couldn't do it so far due to typelessness of FTS3 tables. 300 $dbw = $this->lb->getConnectionRef( DB_PRIMARY ); 301 $dbw->delete( 'searchindex', [ 'rowid' => $id ], __METHOD__ ); 302 $dbw->insert( 'searchindex', 303 [ 304 'rowid' => $id, 305 'si_title' => $title, 306 'si_text' => $text 307 ], __METHOD__ ); 308 } 309 310 /** 311 * Update a search index record's title only. 312 * Title should be pre-processed. 313 * 314 * @param int $id 315 * @param string $title 316 */ 317 public function updateTitle( $id, $title ) { 318 if ( !$this->fulltextSearchSupported() ) { 319 return; 320 } 321 322 $dbw = $this->lb->getConnectionRef( DB_PRIMARY ); 323 $dbw->update( 'searchindex', 324 [ 'si_title' => $title ], 325 [ 'rowid' => $id ], 326 __METHOD__ ); 327 } 328} 329