1# 2015 Jan 13
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#***********************************************************************
11#
12# This file contains tests focused on prefix indexes.
13#
14
15source [file join [file dirname [info script]] fts5_common.tcl]
16set testprefix fts5prefix
17
18# If SQLITE_ENABLE_FTS5 is defined, omit this file.
19ifcapable !fts5 {
20  finish_test
21  return
22}
23
24do_execsql_test 1.0 {
25  CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1);
26  INSERT INTO xx VALUES('one two three');
27  INSERT INTO xx VALUES('four five six');
28  INSERT INTO xx VALUES('seven eight nine ten');
29}
30
31do_execsql_test 1.1 {
32  SELECT rowid FROM xx WHERE xx MATCH 't*'
33} {1 3}
34
35
36#-------------------------------------------------------------------------
37# Check that prefix indexes really do index n-character prefixes, not
38# n-byte prefixes. Use the ascii tokenizer so as not to be confused by
39# diacritic removal.
40#
41do_execsql_test 2.0 {
42  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2)
43}
44
45do_test 2.1 {
46  foreach {rowid string} {
47    1 "\xCA\xCB\xCC\xCD"
48    2 "\u1234\u5678\u4321\u8765"
49  } {
50    execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) }
51  }
52} {}
53
54do_execsql_test 2.2 {
55  INSERT INTO t1(t1) VALUES('integrity-check');
56}
57
58foreach {tn q res} {
59  1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1
60  2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2
61} {
62  do_execsql_test 2.3.$tn $q $res
63}
64
65#-------------------------------------------------------------------------
66# Check that prefix queries with:
67#
68#   * a column filter, and
69#   * no prefix index.
70#
71# work Ok.
72#
73do_execsql_test 3.0 {
74  CREATE VIRTUAL TABLE t3 USING fts5(a, b, c);
75  INSERT INTO t3(t3, rank) VALUES('pgsz', 32);
76  BEGIN;
77    INSERT INTO t3 VALUES('acb ccc bba', 'cca bba bca', 'bbc ccc bca'); -- 1
78    INSERT INTO t3 VALUES('cbb cac cab', 'abb aac bba', 'aab ccc cac'); -- 2
79    INSERT INTO t3 VALUES('aac bcb aac', 'acb bcb caa', 'aca bab bca'); -- 3
80    INSERT INTO t3 VALUES('aab ccb ccc', 'aca cba cca', 'aca aac cbb'); -- 4
81    INSERT INTO t3 VALUES('bac aab bab', 'ccb bac cba', 'acb aba abb'); -- 5
82    INSERT INTO t3 VALUES('bab abc ccb', 'acb cba abb', 'cbb aaa cab'); -- 6
83    INSERT INTO t3 VALUES('cbb bbc baa', 'aab aca baa', 'bcc cca aca'); -- 7
84    INSERT INTO t3 VALUES('abc bba abb', 'cac abc cba', 'acc aac cac'); -- 8
85    INSERT INTO t3 VALUES('bbc bbc cab', 'bcb ccb cba', 'bcc cac acb'); -- 9
86  COMMIT;
87}
88
89foreach {tn match res} {
90  1 "a : c*" {1 2 4 6 7 9}
91  2 "b : c*" {1 3 4 5 6 8 9}
92  3 "c : c*" {1 2 4 6 7 8 9}
93  4 "a : b*" {1 3 5 6 7 8 9}
94  5 "b : b*" {1 2 3 5 7 9}
95  6 "c : b*" {1 3 7 9}
96  7 "a : a*" {1 3 4 5 6 8}
97  8 "b : a*" {2 3 4 6 7 8}
98  9 "c : a*" {2 3 4 5 6 7 8 9}
99} {
100  do_execsql_test 3.1.$tn {
101    SELECT rowid FROM t3($match)
102  } $res
103}
104
105do_test 3.2 {
106  expr srand(0)
107  execsql { DELETE FROM t3 }
108  for {set i 0} {$i < 1000} {incr i} {
109    set a [fts5_rnddoc 3]
110    set b [fts5_rnddoc 8]
111    set c [fts5_rnddoc 20]
112    execsql { INSERT INTO t3 VALUES($a, $b, $c) }
113  }
114  execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
115} {}
116
117proc gmatch {col pattern} {
118  expr {[lsearch -glob $col $pattern]>=0}
119}
120db func gmatch gmatch
121
122proc ghl {col pattern} {
123  foreach t $col {
124    if {[string match $pattern $t]} {
125      lappend res "*$t*"
126    } else {
127      lappend res $t
128    }
129  }
130  set res
131}
132db func ghl ghl
133
134set COLS(a) 0
135set COLS(b) 1
136set COLS(c) 2
137
138for {set x 0} {$x<2} {incr x} {
139  foreach {tn pattern} {
140    1  {xa*}
141    2  {xb*}
142    3  {xc*}
143    4  {xd*}
144    5  {xe*}
145    6  {xf*}
146    7  {xg*}
147    8  {xh*}
148    9  {xi*}
149    10 {xj*}
150  } {
151    foreach col {a b c} {
152
153      # Check that the list of returned rowids is correct.
154      #
155      set res [db eval "SELECT rowid FROM t3 WHERE gmatch($col, '$pattern')"]
156      set query "$col : $pattern"
157      do_execsql_test 3.3.$x.$tn.$col.rowid {
158        SELECT rowid FROM t3($query);
159      } $res
160
161      # Check that the highlight() function works.
162      #
163      set res [db eval \
164        "SELECT ghl($col, '$pattern') FROM t3 WHERE gmatch($col, '$pattern')"
165      ]
166      set idx $COLS($col)
167      do_execsql_test 3.3.$x.$tn.$col.highlight {
168        SELECT highlight(t3, $idx, '*', '*') FROM t3($query);
169      } $res
170    }
171
172    foreach colset {{a b} {b c} {c a} {a c} {b a}} {
173      # Check that the list of returned rowids is correct.
174      #
175      foreach {col1 col2} $colset {}
176      set expr "gmatch($col1, '$pattern') OR gmatch($col2, '$pattern')"
177      set res [db eval "SELECT rowid FROM t3 WHERE $expr"]
178      set query "{$colset} : $pattern"
179      do_execsql_test 3.3.$x.$tn.{$colset}.rowid {
180        SELECT rowid FROM t3($query);
181      } $res
182
183      set resq    "SELECT ghl($col1, '$pattern'), ghl($col2, '$pattern')"
184      append resq " FROM t3 WHERE $expr"
185      set res [db eval $resq]
186      set idx1 $COLS($col1)
187      set idx2 $COLS($col2)
188      do_execsql_test 3.3.$x.$tn.{$colset}.highlight {
189        SELECT highlight(t3, $idx1, '*', '*'), highlight(t3, $idx2, '*', '*')
190          FROM t3($query)
191      } $res
192    }
193  }
194  execsql { INSERT INTO t3(t3) VALUES('optimize') }
195  execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
196}
197
198#-------------------------------------------------------------------------
199#
200reset_db
201do_execsql_test 4.0 {
202  CREATE VIRTUAL TABLE t2 USING fts5(c1, c2);
203  INSERT INTO t2 VALUES('xa xb', 'xb xa');
204
205  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2
206  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4
207  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 8
208  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 16
209  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 32
210  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 64
211  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 128
212  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 256
213  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 512
214  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 1024
215  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2048
216  INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4096
217
218  SELECT count(*) FROM t2('x*');
219} {4096}
220
221do_execsql_test 4.1 {
222  UPDATE t2 SET c2 = 'ya yb';
223  SELECT count(*) FROM t2('c1:x*');
224  SELECT count(*) FROM t2('c2:x*');
225} {4096 0}
226
227do_execsql_test 4.2 {
228  UPDATE t2 SET c2 = 'xa';
229  SELECT count(*) FROM t2('c1:x*');
230  SELECT count(*) FROM t2('c2:x*');
231} {4096 4096}
232
233#-------------------------------------------------------------------------
234#
235reset_db
236proc rnddoc {n} {
237  set map [list a b c d]
238  set doc [list]
239  for {set i 0} {$i < $n} {incr i} {
240    lappend doc "x[lindex $map [expr int(rand()*4)]]"
241  }
242  set doc
243}
244set cols [list]
245for {set i 1} {$i<250} {incr i} {
246  lappend cols "c$i"
247  lappend vals "'[rnddoc 10]'"
248}
249
250do_test 5.0 {
251  execsql "CREATE VIRTUAL TABLE t4 USING fts5([join $cols ,])"
252  execsql {INSERT INTO t4(t4, rank) VALUES('pgsz', 32)}
253  execsql "INSERT INTO t4 VALUES([join $vals ,])"
254  execsql "INSERT INTO t4 VALUES([join $vals ,])"
255  execsql "INSERT INTO t4 VALUES([join $vals ,])"
256  execsql "INSERT INTO t4 VALUES([join $vals ,])"
257} {}
258
259proc gmatch {col pattern} {
260  expr {[lsearch -glob $col $pattern]>=0}
261}
262db func gmatch gmatch
263foreach {tn col pattern} {
264  1 c100 {xa*}
265  2 c200 {xb*}
266} {
267  set res [db eval "SELECT rowid FROM t4 WHERE gmatch($col, \$pattern)"]
268  set query "$col : $pattern"
269  do_execsql_test 5.$tn { SELECT rowid FROM t4($query) } $res
270}
271
272reset_db
273db func fts5_rnddoc fts5_rnddoc
274do_test 6.0 {
275  execsql {
276    CREATE VIRTUAL TABLE t5 USING fts5(x, y);
277    INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
278    INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
279    INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
280    INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
281  }
282} {}
283
284proc gmatch {col pattern} {
285  expr {[lsearch -glob $col $pattern]>=0}
286}
287db func gmatch gmatch
288foreach {tn col pattern} {
289  1 y {xa*}
290  2 y {xb*}
291  3 y {xc*}
292  4 x {xa*}
293  5 x {xb*}
294  6 x {xc*}
295} {
296  set res [db eval "SELECT rowid FROM t5 WHERE gmatch($col, \$pattern)"]
297  set query "$col : $pattern"
298  do_execsql_test 6.$tn { SELECT rowid FROM t5($query) } $res
299}
300
301#-------------------------------------------------------------------------
302# Check that the various ways of creating prefix indexes produce the
303# same database on disk.
304#
305save_prng_state
306foreach {tn create} {
307  1 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1,2,3") }
308  2 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2 3") }
309  3 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix=1, prefix=2, prefix=3) }
310  4 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2", prefix=3) }
311} {
312  execsql { DROP TABLE IF EXISTS tt }
313  restore_prng_state
314  execsql $create
315  execsql {
316    INSERT INTO tt VALUES('cc b ggg ccc aa eee hh', 'aa g b hh a e');
317    INSERT INTO tt VALUES('cc bb cc gg j g cc', 'ii jjj ggg jjj cc cc');
318    INSERT INTO tt VALUES('h eee cc h iii', 'aaa iii dd iii dd');
319    INSERT INTO tt VALUES('jjj hh eee c e b gg', 'j bbb jj ddd jj');
320    INSERT INTO tt VALUES('ii hhh aaa ff c hhh iii', 'j cc hh bb e');
321    INSERT INTO tt VALUES('e fff hhh i aaa', 'g b aa gg c aa dd');
322    INSERT INTO tt VALUES('i aaa ccc gg hhh aa h', 'j bbb bbb d ff');
323    INSERT INTO tt VALUES('g f gg ff ff jjj d', 'jjj d j fff fff ee j');
324    INSERT INTO tt VALUES('a cc e ccc jjj c', 'ccc iii d bb a eee g');
325    INSERT INTO tt VALUES('jj hh hh bb bbb gg', 'j c jjj bb iii f');
326    INSERT INTO tt VALUES('a ggg g cc ccc aa', 'jjj j j aaa c');
327    INSERT INTO tt VALUES('ddd j dd b i', 'aaa bbb iii ggg ff ccc ddd');
328    INSERT INTO tt VALUES('jj ii hh c ii h gg', 'hhh bbb ddd bbb hh g ggg');
329    INSERT INTO tt VALUES('aa hhh ccc h ggg ccc', 'iii d jj a ff ii');
330  }
331
332  #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM tt_data} {puts $r}
333
334  if {$tn==1} {
335    set ::checksum [execsql {SELECT md5sum(id, block) FROM tt_data}]
336  } else {
337    do_execsql_test 7.$tn {
338      SELECT md5sum(id, block) FROM tt_data
339    } [list $::checksum]
340  }
341}
342
343finish_test
344