1# 2015 Jan 13 2# 3# The author disclaims copyright to this source code. In place of 4# a legal notice, here is a blessing: 5# 6# May you do good and not evil. 7# May you find forgiveness for yourself and forgive others. 8# May you share freely, never taking more than you give. 9# 10#*********************************************************************** 11# 12# This file contains tests focused on prefix indexes. 13# 14 15source [file join [file dirname [info script]] fts5_common.tcl] 16set testprefix fts5prefix 17 18# If SQLITE_ENABLE_FTS5 is defined, omit this file. 19ifcapable !fts5 { 20 finish_test 21 return 22} 23 24do_execsql_test 1.0 { 25 CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1); 26 INSERT INTO xx VALUES('one two three'); 27 INSERT INTO xx VALUES('four five six'); 28 INSERT INTO xx VALUES('seven eight nine ten'); 29} 30 31do_execsql_test 1.1 { 32 SELECT rowid FROM xx WHERE xx MATCH 't*' 33} {1 3} 34 35 36#------------------------------------------------------------------------- 37# Check that prefix indexes really do index n-character prefixes, not 38# n-byte prefixes. Use the ascii tokenizer so as not to be confused by 39# diacritic removal. 40# 41do_execsql_test 2.0 { 42 CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2) 43} 44 45do_test 2.1 { 46 foreach {rowid string} { 47 1 "\xCA\xCB\xCC\xCD" 48 2 "\u1234\u5678\u4321\u8765" 49 } { 50 execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) } 51 } 52} {} 53 54do_execsql_test 2.2 { 55 INSERT INTO t1(t1) VALUES('integrity-check'); 56} 57 58foreach {tn q res} { 59 1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1 60 2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2 61} { 62 do_execsql_test 2.3.$tn $q $res 63} 64 65#------------------------------------------------------------------------- 66# Check that prefix queries with: 67# 68# * a column filter, and 69# * no prefix index. 70# 71# work Ok. 72# 73do_execsql_test 3.0 { 74 CREATE VIRTUAL TABLE t3 USING fts5(a, b, c); 75 INSERT INTO t3(t3, rank) VALUES('pgsz', 32); 76 BEGIN; 77 INSERT INTO t3 VALUES('acb ccc bba', 'cca bba bca', 'bbc ccc bca'); -- 1 78 INSERT INTO t3 VALUES('cbb cac cab', 'abb aac bba', 'aab ccc cac'); -- 2 79 INSERT INTO t3 VALUES('aac bcb aac', 'acb bcb caa', 'aca bab bca'); -- 3 80 INSERT INTO t3 VALUES('aab ccb ccc', 'aca cba cca', 'aca aac cbb'); -- 4 81 INSERT INTO t3 VALUES('bac aab bab', 'ccb bac cba', 'acb aba abb'); -- 5 82 INSERT INTO t3 VALUES('bab abc ccb', 'acb cba abb', 'cbb aaa cab'); -- 6 83 INSERT INTO t3 VALUES('cbb bbc baa', 'aab aca baa', 'bcc cca aca'); -- 7 84 INSERT INTO t3 VALUES('abc bba abb', 'cac abc cba', 'acc aac cac'); -- 8 85 INSERT INTO t3 VALUES('bbc bbc cab', 'bcb ccb cba', 'bcc cac acb'); -- 9 86 COMMIT; 87} 88 89foreach {tn match res} { 90 1 "a : c*" {1 2 4 6 7 9} 91 2 "b : c*" {1 3 4 5 6 8 9} 92 3 "c : c*" {1 2 4 6 7 8 9} 93 4 "a : b*" {1 3 5 6 7 8 9} 94 5 "b : b*" {1 2 3 5 7 9} 95 6 "c : b*" {1 3 7 9} 96 7 "a : a*" {1 3 4 5 6 8} 97 8 "b : a*" {2 3 4 6 7 8} 98 9 "c : a*" {2 3 4 5 6 7 8 9} 99} { 100 do_execsql_test 3.1.$tn { 101 SELECT rowid FROM t3($match) 102 } $res 103} 104 105do_test 3.2 { 106 expr srand(0) 107 execsql { DELETE FROM t3 } 108 for {set i 0} {$i < 1000} {incr i} { 109 set a [fts5_rnddoc 3] 110 set b [fts5_rnddoc 8] 111 set c [fts5_rnddoc 20] 112 execsql { INSERT INTO t3 VALUES($a, $b, $c) } 113 } 114 execsql { INSERT INTO t3(t3) VALUES('integrity-check') } 115} {} 116 117proc gmatch {col pattern} { 118 expr {[lsearch -glob $col $pattern]>=0} 119} 120db func gmatch gmatch 121 122proc ghl {col pattern} { 123 foreach t $col { 124 if {[string match $pattern $t]} { 125 lappend res "*$t*" 126 } else { 127 lappend res $t 128 } 129 } 130 set res 131} 132db func ghl ghl 133 134set COLS(a) 0 135set COLS(b) 1 136set COLS(c) 2 137 138for {set x 0} {$x<2} {incr x} { 139 foreach {tn pattern} { 140 1 {xa*} 141 2 {xb*} 142 3 {xc*} 143 4 {xd*} 144 5 {xe*} 145 6 {xf*} 146 7 {xg*} 147 8 {xh*} 148 9 {xi*} 149 10 {xj*} 150 } { 151 foreach col {a b c} { 152 153 # Check that the list of returned rowids is correct. 154 # 155 set res [db eval "SELECT rowid FROM t3 WHERE gmatch($col, '$pattern')"] 156 set query "$col : $pattern" 157 do_execsql_test 3.3.$x.$tn.$col.rowid { 158 SELECT rowid FROM t3($query); 159 } $res 160 161 # Check that the highlight() function works. 162 # 163 set res [db eval \ 164 "SELECT ghl($col, '$pattern') FROM t3 WHERE gmatch($col, '$pattern')" 165 ] 166 set idx $COLS($col) 167 do_execsql_test 3.3.$x.$tn.$col.highlight { 168 SELECT highlight(t3, $idx, '*', '*') FROM t3($query); 169 } $res 170 } 171 172 foreach colset {{a b} {b c} {c a} {a c} {b a}} { 173 # Check that the list of returned rowids is correct. 174 # 175 foreach {col1 col2} $colset {} 176 set expr "gmatch($col1, '$pattern') OR gmatch($col2, '$pattern')" 177 set res [db eval "SELECT rowid FROM t3 WHERE $expr"] 178 set query "{$colset} : $pattern" 179 do_execsql_test 3.3.$x.$tn.{$colset}.rowid { 180 SELECT rowid FROM t3($query); 181 } $res 182 183 set resq "SELECT ghl($col1, '$pattern'), ghl($col2, '$pattern')" 184 append resq " FROM t3 WHERE $expr" 185 set res [db eval $resq] 186 set idx1 $COLS($col1) 187 set idx2 $COLS($col2) 188 do_execsql_test 3.3.$x.$tn.{$colset}.highlight { 189 SELECT highlight(t3, $idx1, '*', '*'), highlight(t3, $idx2, '*', '*') 190 FROM t3($query) 191 } $res 192 } 193 } 194 execsql { INSERT INTO t3(t3) VALUES('optimize') } 195 execsql { INSERT INTO t3(t3) VALUES('integrity-check') } 196} 197 198#------------------------------------------------------------------------- 199# 200reset_db 201do_execsql_test 4.0 { 202 CREATE VIRTUAL TABLE t2 USING fts5(c1, c2); 203 INSERT INTO t2 VALUES('xa xb', 'xb xa'); 204 205 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2 206 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4 207 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 8 208 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 16 209 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 32 210 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 64 211 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 128 212 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 256 213 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 512 214 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 1024 215 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2048 216 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4096 217 218 SELECT count(*) FROM t2('x*'); 219} {4096} 220 221do_execsql_test 4.1 { 222 UPDATE t2 SET c2 = 'ya yb'; 223 SELECT count(*) FROM t2('c1:x*'); 224 SELECT count(*) FROM t2('c2:x*'); 225} {4096 0} 226 227do_execsql_test 4.2 { 228 UPDATE t2 SET c2 = 'xa'; 229 SELECT count(*) FROM t2('c1:x*'); 230 SELECT count(*) FROM t2('c2:x*'); 231} {4096 4096} 232 233#------------------------------------------------------------------------- 234# 235reset_db 236proc rnddoc {n} { 237 set map [list a b c d] 238 set doc [list] 239 for {set i 0} {$i < $n} {incr i} { 240 lappend doc "x[lindex $map [expr int(rand()*4)]]" 241 } 242 set doc 243} 244set cols [list] 245for {set i 1} {$i<250} {incr i} { 246 lappend cols "c$i" 247 lappend vals "'[rnddoc 10]'" 248} 249 250do_test 5.0 { 251 execsql "CREATE VIRTUAL TABLE t4 USING fts5([join $cols ,])" 252 execsql {INSERT INTO t4(t4, rank) VALUES('pgsz', 32)} 253 execsql "INSERT INTO t4 VALUES([join $vals ,])" 254 execsql "INSERT INTO t4 VALUES([join $vals ,])" 255 execsql "INSERT INTO t4 VALUES([join $vals ,])" 256 execsql "INSERT INTO t4 VALUES([join $vals ,])" 257} {} 258 259proc gmatch {col pattern} { 260 expr {[lsearch -glob $col $pattern]>=0} 261} 262db func gmatch gmatch 263foreach {tn col pattern} { 264 1 c100 {xa*} 265 2 c200 {xb*} 266} { 267 set res [db eval "SELECT rowid FROM t4 WHERE gmatch($col, \$pattern)"] 268 set query "$col : $pattern" 269 do_execsql_test 5.$tn { SELECT rowid FROM t4($query) } $res 270} 271 272reset_db 273db func fts5_rnddoc fts5_rnddoc 274do_test 6.0 { 275 execsql { 276 CREATE VIRTUAL TABLE t5 USING fts5(x, y); 277 INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) ); 278 INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) ); 279 INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) ); 280 INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) ); 281 } 282} {} 283 284proc gmatch {col pattern} { 285 expr {[lsearch -glob $col $pattern]>=0} 286} 287db func gmatch gmatch 288foreach {tn col pattern} { 289 1 y {xa*} 290 2 y {xb*} 291 3 y {xc*} 292 4 x {xa*} 293 5 x {xb*} 294 6 x {xc*} 295} { 296 set res [db eval "SELECT rowid FROM t5 WHERE gmatch($col, \$pattern)"] 297 set query "$col : $pattern" 298 do_execsql_test 6.$tn { SELECT rowid FROM t5($query) } $res 299} 300 301#------------------------------------------------------------------------- 302# Check that the various ways of creating prefix indexes produce the 303# same database on disk. 304# 305save_prng_state 306foreach {tn create} { 307 1 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1,2,3") } 308 2 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2 3") } 309 3 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix=1, prefix=2, prefix=3) } 310 4 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2", prefix=3) } 311} { 312 execsql { DROP TABLE IF EXISTS tt } 313 restore_prng_state 314 execsql $create 315 execsql { 316 INSERT INTO tt VALUES('cc b ggg ccc aa eee hh', 'aa g b hh a e'); 317 INSERT INTO tt VALUES('cc bb cc gg j g cc', 'ii jjj ggg jjj cc cc'); 318 INSERT INTO tt VALUES('h eee cc h iii', 'aaa iii dd iii dd'); 319 INSERT INTO tt VALUES('jjj hh eee c e b gg', 'j bbb jj ddd jj'); 320 INSERT INTO tt VALUES('ii hhh aaa ff c hhh iii', 'j cc hh bb e'); 321 INSERT INTO tt VALUES('e fff hhh i aaa', 'g b aa gg c aa dd'); 322 INSERT INTO tt VALUES('i aaa ccc gg hhh aa h', 'j bbb bbb d ff'); 323 INSERT INTO tt VALUES('g f gg ff ff jjj d', 'jjj d j fff fff ee j'); 324 INSERT INTO tt VALUES('a cc e ccc jjj c', 'ccc iii d bb a eee g'); 325 INSERT INTO tt VALUES('jj hh hh bb bbb gg', 'j c jjj bb iii f'); 326 INSERT INTO tt VALUES('a ggg g cc ccc aa', 'jjj j j aaa c'); 327 INSERT INTO tt VALUES('ddd j dd b i', 'aaa bbb iii ggg ff ccc ddd'); 328 INSERT INTO tt VALUES('jj ii hh c ii h gg', 'hhh bbb ddd bbb hh g ggg'); 329 INSERT INTO tt VALUES('aa hhh ccc h ggg ccc', 'iii d jj a ff ii'); 330 } 331 332 #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM tt_data} {puts $r} 333 334 if {$tn==1} { 335 set ::checksum [execsql {SELECT md5sum(id, block) FROM tt_data}] 336 } else { 337 do_execsql_test 7.$tn { 338 SELECT md5sum(id, block) FROM tt_data 339 } [list $::checksum] 340 } 341} 342 343finish_test 344