1# -*- coding: utf-8 -*- 2 3from includes import * 4from common import * 5 6def search(env, r, *args): 7 return r.execute_command('ft.search', *args) 8 9def testTagIndex(env): 10 r = env 11 env.assertOk(r.execute_command( 12 'ft.create', 'idx', 'ON', 'HASH','schema', 'title', 'text', 'tags', 'tag')) 13 N = 10 14 for n in range(N): 15 16 env.assertOk(r.execute_command('ft.add', 'idx', 'doc%d' % n, 1.0, 'fields', 17 'title', 'hello world term%d' % n, 'tags', 'foo bar,xxx,tag %d' % n)) 18 for _ in r.retry_with_rdb_reload(): 19 waitForIndex(r, 'idx') 20 res = env.cmd('ft.search', 'idx', 'hello world') 21 env.assertEqual(10, res[0]) 22 23 res = env.cmd('ft.search', 'idx', 'foo bar') 24 env.assertEqual(0, res[0]) 25 26 res = env.cmd('ft.search', 'idx', '@tags:{foo bar}') 27 env.assertEqual(N, res[0]) 28 29 # inorder should not affect tags 30 res = env.cmd( 31 'ft.search', 'idx', '@tags:{tag 1} @tags:{foo bar}', 'slop', '0', 'inorder') 32 env.assertEqual(1, res[0]) 33 34 for n in range(N - 1): 35 res = env.cmd( 36 'ft.search', 'idx', '@tags:{tag %d}' % n, 'nocontent') 37 env.assertEqual(1, res[0]) 38 env.assertEqual('doc%d' % n, res[1]) 39 res = env.cmd( 40 'ft.search', 'idx', '@tags:{tag\\ %d}' % n, 'nocontent') 41 env.assertEqual(1, res[0]) 42 43 res = env.cmd( 44 'ft.search', 'idx', 'hello world @tags:{tag\\ %d|tag %d}' % (n, n + 1), 'nocontent') 45 env.assertEqual(2, res[0]) 46 res = sorted(res[1:]) 47 env.assertEqual('doc%d' % n, res[0]) 48 env.assertEqual('doc%d' % (n + 1), res[1]) 49 50 res = env.cmd( 51 'ft.search', 'idx', 'term%d @tags:{tag %d}' % (n, n), 'nocontent') 52 env.assertEqual(1, res[0]) 53 env.assertEqual('doc%d' % n, res[1]) 54 55def testSeparator(env): 56 r = env 57 env.assertOk(r.execute_command( 58 'ft.create', 'idx', 'ON', 'HASH', 59 'schema', 'title', 'text', 'tags', 'tag', 'separator', ':')) 60 61 env.assertOk(r.execute_command('ft.add', 'idx', 'doc1', 1.0, 'fields', 62 'title', 'hello world', 'tags', 'x:hello world: fooz bar:foo,bar:BOO FAR')) 63 for _ in r.retry_with_rdb_reload(): 64 waitForIndex(r, 'idx') 65 for q in ('@tags:{hello world}', '@tags:{fooz bar}', '@tags:{foo\\,bar}', '@tags:{boo\\ far}', '@tags:{x}'): 66 res = env.cmd('ft.search', 'idx', q) 67 env.assertEqual(1, res[0]) 68 69def testTagPrefix(env): 70 env.skipOnCluster() 71 r = env 72 env.assertOk(r.execute_command( 73 'ft.create', 'idx', 'ON', 'HASH', 74 'schema', 'title', 'text', 'tags', 'tag', 'separator', ',')) 75 76 env.assertOk(r.execute_command('ft.add', 'idx', 'doc1', 1.0, 'fields', 77 'title', 'hello world', 78 'tags', 'hello world,hello-world,hell,jell')) 79 env.expect('FT.DEBUG', 'dump_tagidx', 'idx', 'tags') \ 80 .equal([['hell', [1L]], ['hello world', [1L]], ['hello-world', [1L]], ['jell', [1L]]]) 81 82 for _ in r.retry_with_rdb_reload(): 83 waitForIndex(r, 'idx') 84 for q in ('@tags:{hello world}', '@tags:{hel*}', '@tags:{hello\\-*}', '@tags:{he*}'): 85 res = env.cmd('ft.search', 'idx', q) 86 env.assertEqual(res[0], 1) 87 88def testTagFieldCase(env): 89 r = env 90 env.assertOk(r.execute_command( 91 'ft.create', 'idx', 'ON', 'HASH', 92 'schema', 'title', 'text', 'TAgs', 'tag')) 93 94 env.assertOk(r.execute_command('ft.add', 'idx', 'doc1', 1.0, 'fields', 95 'title', 'hello world', 'TAgs', 'HELLO WORLD,FOO BAR')) 96 for _ in r.retry_with_rdb_reload(): 97 waitForIndex(r, 'idx') 98 env.assertListEqual([0], r.execute_command( 99 'FT.SEARCH', 'idx', '@tags:{HELLO WORLD}')) 100 env.assertListEqual([1, 'doc1'], r.execute_command( 101 'FT.SEARCH', 'idx', '@TAgs:{HELLO WORLD}', 'NOCONTENT')) 102 env.assertListEqual([1, 'doc1'], r.execute_command( 103 'FT.SEARCH', 'idx', '@TAgs:{foo bar}', 'NOCONTENT')) 104 env.assertListEqual([0], r.execute_command( 105 'FT.SEARCH', 'idx', '@TAGS:{foo bar}', 'NOCONTENT')) 106 107def testInvalidSyntax(env): 108 r = env 109 # invalid syntax 110 with env.assertResponseError(): 111 r.execute_command( 112 'ft.create', 'idx', 'ON', 'HASH', 113 'schema', 'title', 'text', 'tags', 'tag', 'separator') 114 with env.assertResponseError(): 115 r.execute_command( 116 'ft.create', 'idx', 'ON', 'HASH', 117 'schema', 'title', 'text', 'tags', 'tag', 'separator', "foo") 118 with env.assertResponseError(): 119 r.execute_command( 120 'ft.create', 'idx', 'ON', 'HASH', 121 'schema', 'title', 'text', 'tags', 'tag', 'separator', "") 122 123def testTagVals(env): 124 r = env 125 r.execute_command( 126 'ft.create', 'idx', 'ON', 'HASH', 127 'schema', 'title', 'text', 'tags', 'tag', 'othertags', 'tag') 128 129 N = 100 130 alltags = set() 131 for n in range(N): 132 tags = ('foo %d' % n, 'bar %d' % n, 'x') 133 alltags.add(tags[0]) 134 alltags.add(tags[1]) 135 alltags.add(tags[2]) 136 137 env.assertOk(r.execute_command('ft.add', 'idx', 'doc%d' % n, 1.0, 'fields', 138 'tags', ','.join(tags), 'othertags', 'baz %d' % int(n // 2))) 139 for _ in r.retry_with_rdb_reload(): 140 waitForIndex(r, 'idx') 141 res = r.execute_command('ft.tagvals', 'idx', 'tags') 142 env.assertEqual(N * 2 + 1, len(res)) 143 144 env.assertEqual(alltags, set(res)) 145 146 res = r.execute_command('ft.tagvals', 'idx', 'othertags') 147 env.assertEqual(N / 2, len(res)) 148 149 env.expect('ft.tagvals', 'idx').raiseError() 150 env.expect('ft.tagvals', 'idx', 'idx', 'idx').raiseError() 151 env.expect('ft.tagvals', 'fake_idx', 'tags').raiseError() 152 env.expect('ft.tagvals', 'idx', 'fake_tags').raiseError() 153 env.expect('ft.tagvals', 'idx', 'title').raiseError() 154 155def testSearchNotExistsTagValue(env): 156 # this test basically make sure we are not leaking 157 env.expect('FT.CREATE idx ON HASH SCHEMA t TAG SORTABLE').ok() 158 env.expect('FT.SEARCH idx @t:{val}').equal([0]) 159 160def testIssue1305(env): 161 env.expect('FT.CREATE myIdx ON HASH SCHEMA title TAG').ok() 162 env.expect('FT.ADD myIdx doc2 1.0 FIELDS title "work"').ok() 163 env.expect('FT.ADD myIdx doc2 1.0 FIELDS title "hello"').error() 164 env.expect('FT.ADD myIdx doc3 1.0 FIELDS title "hello"').ok() 165 env.expect('FT.ADD myIdx doc1 1.0 FIELDS title "hello,work"').ok() 166 expectedRes = {'doc1' : ['inf', ['title', '"hello,work"']], 'doc3' : ['inf', ['title', '"hello"']], 'doc2' : ['inf', ['title', '"work"']]} 167 res = env.cmd('ft.search', 'myIdx', '~@title:{wor} ~@title:{hell}', 'WITHSCORES')[1:] 168 res = {res[i]:res[i + 1: i + 3] for i in range(0, len(res), 3)} 169 env.assertEqual(res, expectedRes) 170 171def testTagCaseSensitive(env): 172 conn = getConnectionByEnv(env) 173 174 env.expect('FT.CREATE idx1 SCHEMA t TAG').ok() 175 env.expect('FT.CREATE idx2 SCHEMA t TAG CASESENSITIVE').ok() 176 env.expect('FT.CREATE idx3 SCHEMA t TAG SEPARATOR .').ok() 177 env.expect('FT.CREATE idx4 SCHEMA t TAG SEPARATOR . CASESENSITIVE').ok() 178 env.expect('FT.CREATE idx5 SCHEMA t TAG CASESENSITIVE SEPARATOR .').ok() 179 180 conn.execute_command('HSET', 'doc1', 't', 'foo,FOO') 181 conn.execute_command('HSET', 'doc2', 't', 'FOO') 182 conn.execute_command('HSET', 'doc3', 't', 'foo') 183 184 if not env.is_cluster(): 185 conn.execute_command('FT.CONFIG', 'SET', 'FORK_GC_CLEAN_THRESHOLD', '0') 186 env.expect('FT.DEBUG', 'dump_tagidx', 'idx1', 't').equal([['foo', [1L, 2L, 3L]]]) 187 env.expect('FT.DEBUG', 'dump_tagidx', 'idx2', 't').equal([['foo', [1L, 3L]], ['FOO', [1L, 2L]]]) 188 env.expect('FT.DEBUG', 'dump_tagidx', 'idx3', 't').equal([['foo', [2L, 3L]], ['foo,foo', [1L]]]) 189 env.expect('FT.DEBUG', 'dump_tagidx', 'idx4', 't').equal([['foo', [3L]], ['foo,FOO', [1L]], ['FOO', [2L]]]) 190 env.expect('FT.DEBUG', 'dump_tagidx', 'idx5', 't').equal([['foo', [3L]], ['foo,FOO', [1L]], ['FOO', [2L]]]) 191 192 env.expect('FT.SEARCH', 'idx1', '@t:{FOO}') \ 193 .equal([3L, 'doc1', ['t', 'foo,FOO'], 'doc2', ['t', 'FOO'], 'doc3', ['t', 'foo']]) 194 env.expect('FT.SEARCH', 'idx1', '@t:{foo}') \ 195 .equal([3L, 'doc1', ['t', 'foo,FOO'], 'doc2', ['t', 'FOO'], 'doc3', ['t', 'foo']]) 196 197 env.expect('FT.SEARCH', 'idx2', '@t:{FOO}') \ 198 .equal([2L, 'doc1', ['t', 'foo,FOO'], 'doc2', ['t', 'FOO']]) 199 env.expect('FT.SEARCH', 'idx2', '@t:{foo}') \ 200 .equal([2L, 'doc1', ['t', 'foo,FOO'], 'doc3', ['t', 'foo']]) 201 202 conn.execute_command('HSET', 'doc1', 't', 'f o,F O') 203 conn.execute_command('HSET', 'doc2', 't', 'F O') 204 conn.execute_command('HSET', 'doc3', 't', 'f o') 205 206 if not env.is_cluster(): 207 forceInvokeGC(env, 'idx1') 208 forceInvokeGC(env, 'idx2') 209 forceInvokeGC(env, 'idx3') 210 forceInvokeGC(env, 'idx4') 211 forceInvokeGC(env, 'idx5') 212 213 env.expect('FT.DEBUG', 'dump_tagidx', 'idx1', 't').equal([['f o', [4L, 5L, 6L]]]) 214 env.expect('FT.DEBUG', 'dump_tagidx', 'idx2', 't').equal([['f o', [4L, 6L]], ['F O', [4L, 5L]]]) 215 env.expect('FT.DEBUG', 'dump_tagidx', 'idx3', 't').equal([['f o', [5L, 6L]], ['f o,f o', [4L]]]) 216 env.expect('FT.DEBUG', 'dump_tagidx', 'idx4', 't').equal([['f o', [6L]], ['f o,F O', [4L]], ['F O', [5L]]]) 217 env.expect('FT.DEBUG', 'dump_tagidx', 'idx5', 't').equal([['f o', [6L]], ['f o,F O', [4L]], ['F O', [5L]]]) 218 219 # not casesensitive 220 env.expect('FT.SEARCH', 'idx1', '@t:{F\\ O}') \ 221 .equal([3L, 'doc1', ['t', 'f o,F O'], 'doc2', ['t', 'F O'], 'doc3', ['t', 'f o']]) 222 env.expect('FT.SEARCH', 'idx1', '@t:{f\\ o}') \ 223 .equal([3L, 'doc1', ['t', 'f o,F O'], 'doc2', ['t', 'F O'], 'doc3', ['t', 'f o']]) 224 225 # casesensitive 226 env.expect('FT.SEARCH', 'idx2', '@t:{F\\ O}') \ 227 .equal([2L, 'doc1', ['t', 'f o,F O'], 'doc2', ['t', 'F O']]) 228 env.expect('FT.SEARCH', 'idx2', '@t:{f\\ o}') \ 229 .equal([2L, 'doc1', ['t', 'f o,F O'], 'doc3', ['t', 'f o']]) 230 231 # not casesensitive 232 env.expect('FT.SEARCH', 'idx3', '@t:{f\\ o\\,f\\ o}') \ 233 .equal([1L, 'doc1', ['t', 'f o,F O']]) 234 env.expect('FT.SEARCH', 'idx3', '@t:{f\\ o\\,F\\ O}') \ 235 .equal([1L, 'doc1', ['t', 'f o,F O']]) 236 env.expect('FT.SEARCH', 'idx3', '@t:{F\\ O\\,F\\ O}') \ 237 .equal([1L, 'doc1', ['t', 'f o,F O']]) 238 env.expect('FT.SEARCH', 'idx3', '@t:{F\\ O}') \ 239 .equal([2L, 'doc2', ['t', 'F O'], 'doc3', ['t', 'f o']]) 240 env.expect('FT.SEARCH', 'idx3', '@t:{f\\ o}') \ 241 .equal([2L, 'doc2', ['t', 'F O'], 'doc3', ['t', 'f o']]) 242 243 # casesensitive 244 env.expect('FT.SEARCH', 'idx4', '@t:{f\\ o\\,f\\ o}') \ 245 .equal([0L]) 246 env.expect('FT.SEARCH', 'idx4', '@t:{f\\ o\\,F\\ O}') \ 247 .equal([1L, 'doc1', ['t', 'f o,F O']]) 248 env.expect('FT.SEARCH', 'idx4', '@t:{F\\ O\\,F\\ O}') \ 249 .equal([0L]) 250 env.expect('FT.SEARCH', 'idx4', '@t:{F\\ O}') \ 251 .equal([1L, 'doc2', ['t', 'F O']]) 252 env.expect('FT.SEARCH', 'idx4', '@t:{f\\ o}') \ 253 .equal([1L, 'doc3', ['t', 'f o']]) 254 255def testTagGCClearEmpty(env): 256 env.skipOnCluster() 257 258 conn = getConnectionByEnv(env) 259 conn.execute_command('FT.CONFIG', 'SET', 'FORK_GC_CLEAN_THRESHOLD', '0') 260 conn.execute_command('FT.CREATE', 'idx', 'SCHEMA', 't', 'TAG') 261 conn.execute_command('HSET', 'doc1', 't', 'foo') 262 conn.execute_command('HSET', 'doc2', 't', 'bar') 263 conn.execute_command('HSET', 'doc3', 't', 'baz') 264 env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([['foo', [1L]], ['bar', [2L]], ['baz', [3L]]]) 265 env.expect('FT.SEARCH', 'idx', '@t:{foo}').equal([1L, 'doc1', ['t', 'foo']]) 266 267 # delete two tags 268 conn.execute_command('DEL', 'doc1') 269 conn.execute_command('DEL', 'doc2') 270 forceInvokeGC(env, 'idx') 271 env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([['baz', [3L]]]) 272 env.expect('FT.SEARCH', 'idx', '@t:{foo}').equal([0L]) 273 274 # delete last tag 275 conn.execute_command('DEL', 'doc3') 276 forceInvokeGC(env, 'idx') 277 env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([]) 278 279 # check term can be used after being empty 280 conn.execute_command('HSET', 'doc4', 't', 'foo') 281 conn.execute_command('HSET', 'doc5', 't', 'foo') 282 env.expect('FT.SEARCH', 'idx', '@t:{foo}') \ 283 .equal([2L, 'doc4', ['t', 'foo'], 'doc5', ['t', 'foo']]) 284 285def testTagGCClearEmptyWithCursor(env): 286 env.skipOnCluster() 287 288 conn = getConnectionByEnv(env) 289 conn.execute_command('FT.CONFIG', 'SET', 'FORK_GC_CLEAN_THRESHOLD', '0') 290 conn.execute_command('FT.CREATE', 'idx', 'SCHEMA', 't', 'TAG') 291 conn.execute_command('HSET', 'doc1', 't', 'foo') 292 conn.execute_command('HSET', 'doc2', 't', 'foo') 293 env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([['foo', [1L, 2L]]]) 294 295 res, cursor = env.cmd('FT.AGGREGATE', 'idx', '@t:{foo}', 'WITHCURSOR', 'COUNT', '1') 296 env.assertEqual(res, [1L, []]) 297 298 # delete both documents and run the GC to clean 'foo' inverted index 299 env.expect('DEL', 'doc1').equal(1) 300 env.expect('DEL', 'doc2').equal(1) 301 302 forceInvokeGC(env, 'idx') 303 304 # make sure the inverted index was cleaned 305 env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([]) 306 307 # read from the cursor 308 res, cursor = env.cmd('FT.CURSOR', 'READ', 'idx', cursor) 309 env.assertEqual(res, [0L]) 310 env.assertEqual(cursor, 0) 311 312def testTagGCClearEmptyWithCursorAndMoreData(env): 313 env.skipOnCluster() 314 315 conn = getConnectionByEnv(env) 316 conn.execute_command('FT.CONFIG', 'SET', 'FORK_GC_CLEAN_THRESHOLD', '0') 317 conn.execute_command('FT.CREATE', 'idx', 'SCHEMA', 't', 'TAG') 318 conn.execute_command('HSET', 'doc1', 't', 'foo') 319 conn.execute_command('HSET', 'doc2', 't', 'foo') 320 env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([['foo', [1L, 2L]]]) 321 322 res, cursor = env.cmd('FT.AGGREGATE', 'idx', '@t:{foo}', 'WITHCURSOR', 'COUNT', '1') 323 env.assertEqual(res, [1L, []]) 324 325 # delete both documents and run the GC to clean 'foo' inverted index 326 env.expect('DEL', 'doc1').equal(1) 327 env.expect('DEL', 'doc2').equal(1) 328 329 forceInvokeGC(env, 'idx') 330 331 # make sure the inverted index was cleaned 332 env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([]) 333 334 # add data 335 conn.execute_command('HSET', 'doc3', 't', 'foo') 336 conn.execute_command('HSET', 'doc4', 't', 'foo') 337 env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([['foo', [3L, 4L]]]) 338 339 # read from the cursor 340 res, cursor = conn.execute_command('FT.CURSOR', 'READ', 'idx', cursor) 341 env.assertEqual(res, [0L]) 342 env.assertEqual(cursor, 0) 343 344 # ensure later documents with same tag are read 345 res = conn.execute_command('FT.AGGREGATE', 'idx', '@t:{foo}') 346 env.assertEqual(res, [1L, [], []]) 347 348@unstable 349def testEmptyTagLeak(env): 350 env.skipOnCluster() 351 352 cycles = 1 353 tags = 30 354 355 conn = getConnectionByEnv(env) 356 conn.execute_command('FT.CONFIG', 'SET', 'FORK_GC_CLEAN_THRESHOLD', '0') 357 conn.execute_command('FT.CREATE', 'idx', 'SCHEMA', 't', 'TAG') 358 pl = conn.pipeline() 359 360 for i in range(cycles): 361 for j in range(tags): 362 x = j + i * tags 363 pl.execute_command('HSET', 'doc{}'.format(x), 't', 'tag{}'.format(x)) 364 pl.execute() 365 for j in range(tags): 366 pl.execute_command('DEL', 'doc{}'.format(j + i * tags)) 367 pl.execute() 368 forceInvokeGC(env, 'idx') 369 env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([]) 370