1<?php 2 3namespace Drupal\Tests\search\Kernel; 4 5use Drupal\Core\Database\Database; 6use Drupal\Core\Language\LanguageInterface; 7use Drupal\KernelTests\KernelTestBase; 8use Drupal\search\SearchIndexInterface; 9 10/** 11 * Indexes content and queries it. 12 * 13 * @group search 14 */ 15class SearchMatchTest extends KernelTestBase { 16 17 // The search index can contain different types of content. Typically the type 18 // is 'node'. Here we test with _test_ and _test2_ as the type. 19 const SEARCH_TYPE = '_test_'; 20 const SEARCH_TYPE_2 = '_test2_'; 21 const SEARCH_TYPE_JPN = '_test3_'; 22 23 /** 24 * Modules to enable. 25 * 26 * @var array 27 */ 28 public static $modules = ['search']; 29 30 /** 31 * {@inheritdoc} 32 */ 33 protected function setUp() { 34 parent::setUp(); 35 $this->installSchema('search', ['search_index', 'search_dataset', 'search_total']); 36 $this->installConfig(['search']); 37 } 38 39 /** 40 * Test search indexing. 41 */ 42 public function testMatching() { 43 $this->_setup(); 44 $this->_testQueries(); 45 } 46 47 /** 48 * Set up a small index of items to test against. 49 */ 50 public function _setup() { 51 $this->config('search.settings')->set('index.minimum_word_size', 3)->save(); 52 53 $search_index = \Drupal::service('search.index'); 54 assert($search_index instanceof SearchIndexInterface); 55 for ($i = 1; $i <= 7; ++$i) { 56 $search_index->index(static::SEARCH_TYPE, $i, LanguageInterface::LANGCODE_NOT_SPECIFIED, $this->getText($i)); 57 } 58 for ($i = 1; $i <= 5; ++$i) { 59 $search_index->index(static::SEARCH_TYPE_2, $i + 7, LanguageInterface::LANGCODE_NOT_SPECIFIED, $this->getText2($i)); 60 } 61 // No getText builder function for Japanese text; just a simple array. 62 foreach ([ 63 13 => '以呂波耳・ほへとち。リヌルヲ。', 64 14 => 'ドルーパルが大好きよ!', 65 15 => 'コーヒーとケーキ', 66 ] as $i => $jpn) { 67 $search_index->index(static::SEARCH_TYPE_JPN, $i, LanguageInterface::LANGCODE_NOT_SPECIFIED, $jpn); 68 } 69 } 70 71 /** 72 * _test_: Helper method for generating snippets of content. 73 * 74 * Generated items to test against: 75 * 1 ipsum 76 * 2 dolore sit 77 * 3 sit am ut 78 * 4 am ut enim am 79 * 5 ut enim am minim veniam 80 * 6 enim am minim veniam es cillum 81 * 7 am minim veniam es cillum dolore eu 82 */ 83 public function getText($n) { 84 $words = explode(' ', "Ipsum dolore sit am. Ut enim am minim veniam. Es cillum dolore eu."); 85 return implode(' ', array_slice($words, $n - 1, $n)); 86 } 87 88 /** 89 * _test2_: Helper method for generating snippets of content. 90 * 91 * Generated items to test against: 92 * 8 dear 93 * 9 king philip 94 * 10 philip came over 95 * 11 came over from germany 96 * 12 over from germany swimming 97 */ 98 public function getText2($n) { 99 $words = explode(' ', "Dear King Philip came over from Germany swimming."); 100 return implode(' ', array_slice($words, $n - 1, $n)); 101 } 102 103 /** 104 * Run predefine queries looking for indexed terms. 105 */ 106 public function _testQueries() { 107 // Note: OR queries that include short words in OR groups are only accepted 108 // if the ORed terms are ANDed with at least one long word in the rest of 109 // the query. Examples: 110 // enim dolore OR ut = enim (dolore OR ut) = (enim dolor) OR (enim ut) 111 // is good, and 112 // dolore OR ut = (dolore) OR (ut) 113 // is bad. This is a design limitation to avoid full table scans. 114 $queries = [ 115 // Simple AND queries. 116 'ipsum' => [1], 117 'enim' => [4, 5, 6], 118 'xxxxx' => [], 119 'enim minim' => [5, 6], 120 'enim xxxxx' => [], 121 'dolore eu' => [7], 122 'dolore xx' => [], 123 'ut minim' => [5], 124 'xx minim' => [], 125 'enim veniam am minim ut' => [5], 126 // Simple OR and AND/OR queries. 127 'dolore OR ipsum' => [1, 2, 7], 128 'dolore OR xxxxx' => [2, 7], 129 'dolore OR ipsum OR enim' => [1, 2, 4, 5, 6, 7], 130 'ipsum OR dolore sit OR cillum' => [2, 7], 131 'minim dolore OR ipsum' => [7], 132 'dolore OR ipsum veniam' => [7], 133 'minim dolore OR ipsum OR enim' => [5, 6, 7], 134 'dolore xx OR yy' => [], 135 'xxxxx dolore OR ipsum' => [], 136 // Sequence of OR queries. 137 'minim' => [5, 6, 7], 138 'minim OR xxxx' => [5, 6, 7], 139 'minim OR xxxx OR minim' => [5, 6, 7], 140 'minim OR xxxx minim' => [5, 6, 7], 141 'minim OR xxxx minim OR yyyy' => [5, 6, 7], 142 'minim OR xxxx minim OR cillum' => [6, 7, 5], 143 'minim OR xxxx minim OR xxxx' => [5, 6, 7], 144 // Negative queries. 145 'dolore -sit' => [7], 146 'dolore -eu' => [2], 147 'dolore -xxxxx' => [2, 7], 148 'dolore -xx' => [2, 7], 149 // Phrase queries. 150 '"dolore sit"' => [2], 151 '"sit dolore"' => [], 152 '"am minim veniam es"' => [6, 7], 153 '"minim am veniam es"' => [], 154 // Mixed queries. 155 '"am minim veniam es" OR dolore' => [2, 6, 7], 156 '"minim am veniam es" OR "dolore sit"' => [2], 157 '"minim am veniam es" OR "sit dolore"' => [], 158 '"am minim veniam es" -eu' => [6], 159 '"am minim veniam" -"cillum dolore"' => [5, 6], 160 '"am minim veniam" -"dolore cillum"' => [5, 6, 7], 161 'xxxxx "minim am veniam es" OR dolore' => [], 162 'xx "minim am veniam es" OR dolore' => [], 163 ]; 164 $connection = Database::getConnection(); 165 foreach ($queries as $query => $results) { 166 $result = $connection->select('search_index', 'i') 167 ->extend('Drupal\search\SearchQuery') 168 ->searchExpression($query, static::SEARCH_TYPE) 169 ->execute(); 170 171 $set = $result ? $result->fetchAll() : []; 172 $this->_testQueryMatching($query, $set, $results); 173 $this->_testQueryScores($query, $set, $results); 174 } 175 176 // These queries are run against the second index type, SEARCH_TYPE_2. 177 $queries = [ 178 // Simple AND queries. 179 'ipsum' => [], 180 'enim' => [], 181 'enim minim' => [], 182 'dear' => [8], 183 'germany' => [11, 12], 184 ]; 185 foreach ($queries as $query => $results) { 186 $result = $connection->select('search_index', 'i') 187 ->extend('Drupal\search\SearchQuery') 188 ->searchExpression($query, static::SEARCH_TYPE_2) 189 ->execute(); 190 191 $set = $result ? $result->fetchAll() : []; 192 $this->_testQueryMatching($query, $set, $results); 193 $this->_testQueryScores($query, $set, $results); 194 } 195 196 // These queries are run against the third index type, SEARCH_TYPE_JPN. 197 $queries = [ 198 // Simple AND queries. 199 '呂波耳' => [13], 200 '以呂波耳' => [13], 201 'ほへと ヌルヲ' => [13], 202 'とちリ' => [], 203 'ドルーパル' => [14], 204 'パルが大' => [14], 205 'コーヒー' => [15], 206 'ヒーキ' => [], 207 ]; 208 foreach ($queries as $query => $results) { 209 $result = $connection->select('search_index', 'i') 210 ->extend('Drupal\search\SearchQuery') 211 ->searchExpression($query, static::SEARCH_TYPE_JPN) 212 ->execute(); 213 214 $set = $result ? $result->fetchAll() : []; 215 $this->_testQueryMatching($query, $set, $results); 216 $this->_testQueryScores($query, $set, $results); 217 } 218 } 219 220 /** 221 * Test the matching abilities of the engine. 222 * 223 * Verify if a query produces the correct results. 224 */ 225 public function _testQueryMatching($query, $set, $results) { 226 // Get result IDs. 227 $found = []; 228 foreach ($set as $item) { 229 $found[] = $item->sid; 230 } 231 232 // Compare $results and $found. 233 sort($found); 234 sort($results); 235 $this->assertEqual($found, $results, "Query matching '$query'"); 236 } 237 238 /** 239 * Test the scoring abilities of the engine. 240 * 241 * Verify if a query produces normalized, monotonous scores. 242 */ 243 public function _testQueryScores($query, $set, $results) { 244 // Get result scores. 245 $scores = []; 246 foreach ($set as $item) { 247 $scores[] = $item->calculated_score; 248 } 249 250 // Check order. 251 $sorted = $scores; 252 sort($sorted); 253 $this->assertEqual($scores, array_reverse($sorted), "Query order '$query'"); 254 255 // Check range. 256 $this->assertEqual(!count($scores) || (min($scores) > 0.0 && max($scores) <= 1.0001), TRUE, "Query scoring '$query'"); 257 } 258 259} 260