1<?php
2
3namespace Drupal\Tests\search\Kernel;
4
5use Drupal\Core\Database\Database;
6use Drupal\Core\Language\LanguageInterface;
7use Drupal\KernelTests\KernelTestBase;
8use Drupal\search\SearchIndexInterface;
9
10/**
11 * Indexes content and queries it.
12 *
13 * @group search
14 */
15class SearchMatchTest extends KernelTestBase {
16
17  // The search index can contain different types of content. Typically the type
18  // is 'node'. Here we test with _test_ and _test2_ as the type.
19  const SEARCH_TYPE = '_test_';
20  const SEARCH_TYPE_2 = '_test2_';
21  const SEARCH_TYPE_JPN = '_test3_';
22
23  /**
24   * Modules to enable.
25   *
26   * @var array
27   */
28  public static $modules = ['search'];
29
30  /**
31   * {@inheritdoc}
32   */
33  protected function setUp() {
34    parent::setUp();
35    $this->installSchema('search', ['search_index', 'search_dataset', 'search_total']);
36    $this->installConfig(['search']);
37  }
38
39  /**
40   * Test search indexing.
41   */
42  public function testMatching() {
43    $this->_setup();
44    $this->_testQueries();
45  }
46
47  /**
48   * Set up a small index of items to test against.
49   */
50  public function _setup() {
51    $this->config('search.settings')->set('index.minimum_word_size', 3)->save();
52
53    $search_index = \Drupal::service('search.index');
54    assert($search_index instanceof SearchIndexInterface);
55    for ($i = 1; $i <= 7; ++$i) {
56      $search_index->index(static::SEARCH_TYPE, $i, LanguageInterface::LANGCODE_NOT_SPECIFIED, $this->getText($i));
57    }
58    for ($i = 1; $i <= 5; ++$i) {
59      $search_index->index(static::SEARCH_TYPE_2, $i + 7, LanguageInterface::LANGCODE_NOT_SPECIFIED, $this->getText2($i));
60    }
61    // No getText builder function for Japanese text; just a simple array.
62    foreach ([
63      13 => '以呂波耳・ほへとち。リヌルヲ。',
64      14 => 'ドルーパルが大好きよ!',
65      15 => 'コーヒーとケーキ',
66    ] as $i => $jpn) {
67      $search_index->index(static::SEARCH_TYPE_JPN, $i, LanguageInterface::LANGCODE_NOT_SPECIFIED, $jpn);
68    }
69  }
70
71  /**
72   * _test_: Helper method for generating snippets of content.
73   *
74   * Generated items to test against:
75   *   1  ipsum
76   *   2  dolore sit
77   *   3  sit am ut
78   *   4  am ut enim am
79   *   5  ut enim am minim veniam
80   *   6  enim am minim veniam es cillum
81   *   7  am minim veniam es cillum dolore eu
82   */
83  public function getText($n) {
84    $words = explode(' ', "Ipsum dolore sit am. Ut enim am minim veniam. Es cillum dolore eu.");
85    return implode(' ', array_slice($words, $n - 1, $n));
86  }
87
88  /**
89   * _test2_: Helper method for generating snippets of content.
90   *
91   * Generated items to test against:
92   *   8  dear
93   *   9  king philip
94   *   10 philip came over
95   *   11 came over from germany
96   *   12 over from germany swimming
97   */
98  public function getText2($n) {
99    $words = explode(' ', "Dear King Philip came over from Germany swimming.");
100    return implode(' ', array_slice($words, $n - 1, $n));
101  }
102
103  /**
104   * Run predefine queries looking for indexed terms.
105   */
106  public function _testQueries() {
107    // Note: OR queries that include short words in OR groups are only accepted
108    // if the ORed terms are ANDed with at least one long word in the rest of
109    // the query. Examples:
110    //   enim dolore OR ut = enim (dolore OR ut) = (enim dolor) OR (enim ut)
111    // is good, and
112    //   dolore OR ut = (dolore) OR (ut)
113    // is bad. This is a design limitation to avoid full table scans.
114    $queries = [
115      // Simple AND queries.
116      'ipsum' => [1],
117      'enim' => [4, 5, 6],
118      'xxxxx' => [],
119      'enim minim' => [5, 6],
120      'enim xxxxx' => [],
121      'dolore eu' => [7],
122      'dolore xx' => [],
123      'ut minim' => [5],
124      'xx minim' => [],
125      'enim veniam am minim ut' => [5],
126      // Simple OR and AND/OR queries.
127      'dolore OR ipsum' => [1, 2, 7],
128      'dolore OR xxxxx' => [2, 7],
129      'dolore OR ipsum OR enim' => [1, 2, 4, 5, 6, 7],
130      'ipsum OR dolore sit OR cillum' => [2, 7],
131      'minim dolore OR ipsum' => [7],
132      'dolore OR ipsum veniam' => [7],
133      'minim dolore OR ipsum OR enim' => [5, 6, 7],
134      'dolore xx OR yy' => [],
135      'xxxxx dolore OR ipsum' => [],
136      // Sequence of OR queries.
137      'minim' => [5, 6, 7],
138      'minim OR xxxx' => [5, 6, 7],
139      'minim OR xxxx OR minim' => [5, 6, 7],
140      'minim OR xxxx minim' => [5, 6, 7],
141      'minim OR xxxx minim OR yyyy' => [5, 6, 7],
142      'minim OR xxxx minim OR cillum' => [6, 7, 5],
143      'minim OR xxxx minim OR xxxx' => [5, 6, 7],
144      // Negative queries.
145      'dolore -sit' => [7],
146      'dolore -eu' => [2],
147      'dolore -xxxxx' => [2, 7],
148      'dolore -xx' => [2, 7],
149      // Phrase queries.
150      '"dolore sit"' => [2],
151      '"sit dolore"' => [],
152      '"am minim veniam es"' => [6, 7],
153      '"minim am veniam es"' => [],
154      // Mixed queries.
155      '"am minim veniam es" OR dolore' => [2, 6, 7],
156      '"minim am veniam es" OR "dolore sit"' => [2],
157      '"minim am veniam es" OR "sit dolore"' => [],
158      '"am minim veniam es" -eu' => [6],
159      '"am minim veniam" -"cillum dolore"' => [5, 6],
160      '"am minim veniam" -"dolore cillum"' => [5, 6, 7],
161      'xxxxx "minim am veniam es" OR dolore' => [],
162      'xx "minim am veniam es" OR dolore' => [],
163    ];
164    $connection = Database::getConnection();
165    foreach ($queries as $query => $results) {
166      $result = $connection->select('search_index', 'i')
167        ->extend('Drupal\search\SearchQuery')
168        ->searchExpression($query, static::SEARCH_TYPE)
169        ->execute();
170
171      $set = $result ? $result->fetchAll() : [];
172      $this->_testQueryMatching($query, $set, $results);
173      $this->_testQueryScores($query, $set, $results);
174    }
175
176    // These queries are run against the second index type, SEARCH_TYPE_2.
177    $queries = [
178      // Simple AND queries.
179      'ipsum' => [],
180      'enim' => [],
181      'enim minim' => [],
182      'dear' => [8],
183      'germany' => [11, 12],
184    ];
185    foreach ($queries as $query => $results) {
186      $result = $connection->select('search_index', 'i')
187        ->extend('Drupal\search\SearchQuery')
188        ->searchExpression($query, static::SEARCH_TYPE_2)
189        ->execute();
190
191      $set = $result ? $result->fetchAll() : [];
192      $this->_testQueryMatching($query, $set, $results);
193      $this->_testQueryScores($query, $set, $results);
194    }
195
196    // These queries are run against the third index type, SEARCH_TYPE_JPN.
197    $queries = [
198      // Simple AND queries.
199      '呂波耳' => [13],
200      '以呂波耳' => [13],
201      'ほへと ヌルヲ' => [13],
202      'とちリ' => [],
203      'ドルーパル' => [14],
204      'パルが大' => [14],
205      'コーヒー' => [15],
206      'ヒーキ' => [],
207    ];
208    foreach ($queries as $query => $results) {
209      $result = $connection->select('search_index', 'i')
210        ->extend('Drupal\search\SearchQuery')
211        ->searchExpression($query, static::SEARCH_TYPE_JPN)
212        ->execute();
213
214      $set = $result ? $result->fetchAll() : [];
215      $this->_testQueryMatching($query, $set, $results);
216      $this->_testQueryScores($query, $set, $results);
217    }
218  }
219
220  /**
221   * Test the matching abilities of the engine.
222   *
223   * Verify if a query produces the correct results.
224   */
225  public function _testQueryMatching($query, $set, $results) {
226    // Get result IDs.
227    $found = [];
228    foreach ($set as $item) {
229      $found[] = $item->sid;
230    }
231
232    // Compare $results and $found.
233    sort($found);
234    sort($results);
235    $this->assertEqual($found, $results, "Query matching '$query'");
236  }
237
238  /**
239   * Test the scoring abilities of the engine.
240   *
241   * Verify if a query produces normalized, monotonous scores.
242   */
243  public function _testQueryScores($query, $set, $results) {
244    // Get result scores.
245    $scores = [];
246    foreach ($set as $item) {
247      $scores[] = $item->calculated_score;
248    }
249
250    // Check order.
251    $sorted = $scores;
252    sort($sorted);
253    $this->assertEqual($scores, array_reverse($sorted), "Query order '$query'");
254
255    // Check range.
256    $this->assertEqual(!count($scores) || (min($scores) > 0.0 && max($scores) <= 1.0001), TRUE, "Query scoring '$query'");
257  }
258
259}
260