1# -*- coding: utf-8 -*-
2
3from includes import *
4from common import *
5
6def search(env, r, *args):
7    return r.execute_command('ft.search', *args)
8
9def testTagIndex(env):
10    r = env
11    env.assertOk(r.execute_command(
12        'ft.create', 'idx', 'ON', 'HASH','schema', 'title', 'text', 'tags', 'tag'))
13    N = 10
14    for n in range(N):
15
16        env.assertOk(r.execute_command('ft.add', 'idx', 'doc%d' % n, 1.0, 'fields',
17                                       'title', 'hello world term%d' % n, 'tags', 'foo bar,xxx,tag %d' % n))
18    for _ in r.retry_with_rdb_reload():
19        waitForIndex(r, 'idx')
20        res = env.cmd('ft.search', 'idx', 'hello world')
21        env.assertEqual(10, res[0])
22
23        res = env.cmd('ft.search', 'idx', 'foo bar')
24        env.assertEqual(0, res[0])
25
26        res = env.cmd('ft.search', 'idx', '@tags:{foo bar}')
27        env.assertEqual(N, res[0])
28
29        # inorder should not affect tags
30        res = env.cmd(
31            'ft.search', 'idx', '@tags:{tag 1} @tags:{foo bar}', 'slop', '0', 'inorder')
32        env.assertEqual(1, res[0])
33
34        for n in range(N - 1):
35            res = env.cmd(
36                'ft.search', 'idx', '@tags:{tag %d}' % n, 'nocontent')
37            env.assertEqual(1, res[0])
38            env.assertEqual('doc%d' % n, res[1])
39            res = env.cmd(
40                'ft.search', 'idx', '@tags:{tag\\ %d}' % n, 'nocontent')
41            env.assertEqual(1, res[0])
42
43            res = env.cmd(
44                'ft.search', 'idx', 'hello world @tags:{tag\\ %d|tag %d}' % (n, n + 1), 'nocontent')
45            env.assertEqual(2, res[0])
46            res = sorted(res[1:])
47            env.assertEqual('doc%d' % n, res[0])
48            env.assertEqual('doc%d' % (n + 1), res[1])
49
50            res = env.cmd(
51                'ft.search', 'idx', 'term%d @tags:{tag %d}' % (n, n), 'nocontent')
52            env.assertEqual(1, res[0])
53            env.assertEqual('doc%d' % n, res[1])
54
55def testSeparator(env):
56    r = env
57    env.assertOk(r.execute_command(
58        'ft.create', 'idx', 'ON', 'HASH',
59        'schema', 'title', 'text', 'tags', 'tag', 'separator', ':'))
60
61    env.assertOk(r.execute_command('ft.add', 'idx', 'doc1', 1.0, 'fields',
62                                   'title', 'hello world', 'tags', 'x:hello world: fooz bar:foo,bar:BOO FAR'))
63    for _ in r.retry_with_rdb_reload():
64        waitForIndex(r, 'idx')
65        for q in ('@tags:{hello world}', '@tags:{fooz bar}', '@tags:{foo\\,bar}', '@tags:{boo\\ far}', '@tags:{x}'):
66            res = env.cmd('ft.search', 'idx', q)
67            env.assertEqual(1, res[0])
68
69def testTagPrefix(env):
70    env.skipOnCluster()
71    r = env
72    env.assertOk(r.execute_command(
73        'ft.create', 'idx', 'ON', 'HASH',
74        'schema', 'title', 'text', 'tags', 'tag', 'separator', ','))
75
76    env.assertOk(r.execute_command('ft.add', 'idx', 'doc1', 1.0, 'fields',
77                                   'title', 'hello world',
78                                   'tags', 'hello world,hello-world,hell,jell'))
79    env.expect('FT.DEBUG', 'dump_tagidx', 'idx', 'tags')    \
80        .equal([['hell', [1L]], ['hello world', [1L]], ['hello-world', [1L]], ['jell', [1L]]])
81
82    for _ in r.retry_with_rdb_reload():
83        waitForIndex(r, 'idx')
84        for q in ('@tags:{hello world}', '@tags:{hel*}', '@tags:{hello\\-*}', '@tags:{he*}'):
85            res = env.cmd('ft.search', 'idx', q)
86            env.assertEqual(res[0], 1)
87
88def testTagFieldCase(env):
89    r = env
90    env.assertOk(r.execute_command(
91        'ft.create', 'idx', 'ON', 'HASH',
92        'schema', 'title', 'text', 'TAgs', 'tag'))
93
94    env.assertOk(r.execute_command('ft.add', 'idx', 'doc1', 1.0, 'fields',
95                                   'title', 'hello world', 'TAgs', 'HELLO WORLD,FOO BAR'))
96    for _ in r.retry_with_rdb_reload():
97        waitForIndex(r, 'idx')
98        env.assertListEqual([0], r.execute_command(
99            'FT.SEARCH', 'idx', '@tags:{HELLO WORLD}'))
100        env.assertListEqual([1, 'doc1'], r.execute_command(
101            'FT.SEARCH', 'idx', '@TAgs:{HELLO WORLD}', 'NOCONTENT'))
102        env.assertListEqual([1, 'doc1'], r.execute_command(
103            'FT.SEARCH', 'idx', '@TAgs:{foo bar}', 'NOCONTENT'))
104        env.assertListEqual([0], r.execute_command(
105            'FT.SEARCH', 'idx', '@TAGS:{foo bar}', 'NOCONTENT'))
106
107def testInvalidSyntax(env):
108    r = env
109    # invalid syntax
110    with env.assertResponseError():
111        r.execute_command(
112            'ft.create', 'idx', 'ON', 'HASH',
113            'schema', 'title', 'text', 'tags', 'tag', 'separator')
114    with env.assertResponseError():
115        r.execute_command(
116            'ft.create', 'idx', 'ON', 'HASH',
117            'schema', 'title', 'text', 'tags', 'tag', 'separator', "foo")
118    with env.assertResponseError():
119        r.execute_command(
120            'ft.create', 'idx', 'ON', 'HASH',
121            'schema', 'title', 'text', 'tags', 'tag', 'separator', "")
122
123def testTagVals(env):
124    r = env
125    r.execute_command(
126        'ft.create', 'idx', 'ON', 'HASH',
127        'schema', 'title', 'text', 'tags', 'tag', 'othertags', 'tag')
128
129    N = 100
130    alltags = set()
131    for n in range(N):
132        tags = ('foo %d' % n, 'bar %d' % n, 'x')
133        alltags.add(tags[0])
134        alltags.add(tags[1])
135        alltags.add(tags[2])
136
137        env.assertOk(r.execute_command('ft.add', 'idx', 'doc%d' % n, 1.0, 'fields',
138                                       'tags', ','.join(tags), 'othertags', 'baz %d' % int(n // 2)))
139    for _ in r.retry_with_rdb_reload():
140        waitForIndex(r, 'idx')
141        res = r.execute_command('ft.tagvals', 'idx', 'tags')
142        env.assertEqual(N * 2 + 1, len(res))
143
144        env.assertEqual(alltags, set(res))
145
146        res = r.execute_command('ft.tagvals', 'idx', 'othertags')
147        env.assertEqual(N / 2, len(res))
148
149        env.expect('ft.tagvals', 'idx').raiseError()
150        env.expect('ft.tagvals', 'idx', 'idx', 'idx').raiseError()
151        env.expect('ft.tagvals', 'fake_idx', 'tags').raiseError()
152        env.expect('ft.tagvals', 'idx', 'fake_tags').raiseError()
153        env.expect('ft.tagvals', 'idx', 'title').raiseError()
154
155def testSearchNotExistsTagValue(env):
156    # this test basically make sure we are not leaking
157    env.expect('FT.CREATE idx ON HASH SCHEMA t TAG SORTABLE').ok()
158    env.expect('FT.SEARCH idx @t:{val}').equal([0])
159
160def testIssue1305(env):
161    env.expect('FT.CREATE myIdx ON HASH SCHEMA title TAG').ok()
162    env.expect('FT.ADD myIdx doc2 1.0 FIELDS title "work"').ok()
163    env.expect('FT.ADD myIdx doc2 1.0 FIELDS title "hello"').error()
164    env.expect('FT.ADD myIdx doc3 1.0 FIELDS title "hello"').ok()
165    env.expect('FT.ADD myIdx doc1 1.0 FIELDS title "hello,work"').ok()
166    expectedRes = {'doc1' : ['inf', ['title', '"hello,work"']], 'doc3' : ['inf', ['title', '"hello"']], 'doc2' : ['inf', ['title', '"work"']]}
167    res = env.cmd('ft.search', 'myIdx', '~@title:{wor} ~@title:{hell}', 'WITHSCORES')[1:]
168    res = {res[i]:res[i + 1: i + 3] for i in range(0, len(res), 3)}
169    env.assertEqual(res, expectedRes)
170
171def testTagCaseSensitive(env):
172    conn = getConnectionByEnv(env)
173
174    env.expect('FT.CREATE idx1 SCHEMA t TAG').ok()
175    env.expect('FT.CREATE idx2 SCHEMA t TAG CASESENSITIVE').ok()
176    env.expect('FT.CREATE idx3 SCHEMA t TAG SEPARATOR .').ok()
177    env.expect('FT.CREATE idx4 SCHEMA t TAG SEPARATOR . CASESENSITIVE').ok()
178    env.expect('FT.CREATE idx5 SCHEMA t TAG CASESENSITIVE SEPARATOR .').ok()
179
180    conn.execute_command('HSET', 'doc1', 't', 'foo,FOO')
181    conn.execute_command('HSET', 'doc2', 't', 'FOO')
182    conn.execute_command('HSET', 'doc3', 't', 'foo')
183
184    if not env.is_cluster():
185        conn.execute_command('FT.CONFIG', 'SET', 'FORK_GC_CLEAN_THRESHOLD', '0')
186        env.expect('FT.DEBUG', 'dump_tagidx', 'idx1', 't').equal([['foo', [1L, 2L, 3L]]])
187        env.expect('FT.DEBUG', 'dump_tagidx', 'idx2', 't').equal([['foo', [1L, 3L]], ['FOO', [1L, 2L]]])
188        env.expect('FT.DEBUG', 'dump_tagidx', 'idx3', 't').equal([['foo', [2L, 3L]], ['foo,foo', [1L]]])
189        env.expect('FT.DEBUG', 'dump_tagidx', 'idx4', 't').equal([['foo', [3L]], ['foo,FOO', [1L]], ['FOO', [2L]]])
190        env.expect('FT.DEBUG', 'dump_tagidx', 'idx5', 't').equal([['foo', [3L]], ['foo,FOO', [1L]], ['FOO', [2L]]])
191
192    env.expect('FT.SEARCH', 'idx1', '@t:{FOO}')         \
193        .equal([3L, 'doc1', ['t', 'foo,FOO'], 'doc2', ['t', 'FOO'], 'doc3', ['t', 'foo']])
194    env.expect('FT.SEARCH', 'idx1', '@t:{foo}')         \
195        .equal([3L, 'doc1', ['t', 'foo,FOO'], 'doc2', ['t', 'FOO'], 'doc3', ['t', 'foo']])
196
197    env.expect('FT.SEARCH', 'idx2', '@t:{FOO}')         \
198        .equal([2L, 'doc1', ['t', 'foo,FOO'], 'doc2', ['t', 'FOO']])
199    env.expect('FT.SEARCH', 'idx2', '@t:{foo}')         \
200        .equal([2L, 'doc1', ['t', 'foo,FOO'], 'doc3', ['t', 'foo']])
201
202    conn.execute_command('HSET', 'doc1', 't', 'f o,F O')
203    conn.execute_command('HSET', 'doc2', 't', 'F O')
204    conn.execute_command('HSET', 'doc3', 't', 'f o')
205
206    if not env.is_cluster():
207        forceInvokeGC(env, 'idx1')
208        forceInvokeGC(env, 'idx2')
209        forceInvokeGC(env, 'idx3')
210        forceInvokeGC(env, 'idx4')
211        forceInvokeGC(env, 'idx5')
212
213        env.expect('FT.DEBUG', 'dump_tagidx', 'idx1', 't').equal([['f o', [4L, 5L, 6L]]])
214        env.expect('FT.DEBUG', 'dump_tagidx', 'idx2', 't').equal([['f o', [4L, 6L]], ['F O', [4L, 5L]]])
215        env.expect('FT.DEBUG', 'dump_tagidx', 'idx3', 't').equal([['f o', [5L, 6L]], ['f o,f o', [4L]]])
216        env.expect('FT.DEBUG', 'dump_tagidx', 'idx4', 't').equal([['f o', [6L]], ['f o,F O', [4L]], ['F O', [5L]]])
217        env.expect('FT.DEBUG', 'dump_tagidx', 'idx5', 't').equal([['f o', [6L]], ['f o,F O', [4L]], ['F O', [5L]]])
218
219    # not casesensitive
220    env.expect('FT.SEARCH', 'idx1', '@t:{F\\ O}')         \
221        .equal([3L, 'doc1', ['t', 'f o,F O'], 'doc2', ['t', 'F O'], 'doc3', ['t', 'f o']])
222    env.expect('FT.SEARCH', 'idx1', '@t:{f\\ o}')         \
223        .equal([3L, 'doc1', ['t', 'f o,F O'], 'doc2', ['t', 'F O'], 'doc3', ['t', 'f o']])
224
225    # casesensitive
226    env.expect('FT.SEARCH', 'idx2', '@t:{F\\ O}')         \
227        .equal([2L, 'doc1', ['t', 'f o,F O'], 'doc2', ['t', 'F O']])
228    env.expect('FT.SEARCH', 'idx2', '@t:{f\\ o}')         \
229        .equal([2L, 'doc1', ['t', 'f o,F O'], 'doc3', ['t', 'f o']])
230
231    # not casesensitive
232    env.expect('FT.SEARCH', 'idx3', '@t:{f\\ o\\,f\\ o}')         \
233        .equal([1L, 'doc1', ['t', 'f o,F O']])
234    env.expect('FT.SEARCH', 'idx3', '@t:{f\\ o\\,F\\ O}')         \
235        .equal([1L, 'doc1', ['t', 'f o,F O']])
236    env.expect('FT.SEARCH', 'idx3', '@t:{F\\ O\\,F\\ O}')         \
237        .equal([1L, 'doc1', ['t', 'f o,F O']])
238    env.expect('FT.SEARCH', 'idx3', '@t:{F\\ O}')         \
239        .equal([2L, 'doc2', ['t', 'F O'], 'doc3', ['t', 'f o']])
240    env.expect('FT.SEARCH', 'idx3', '@t:{f\\ o}')         \
241        .equal([2L, 'doc2', ['t', 'F O'], 'doc3', ['t', 'f o']])
242
243    # casesensitive
244    env.expect('FT.SEARCH', 'idx4', '@t:{f\\ o\\,f\\ o}')         \
245        .equal([0L])
246    env.expect('FT.SEARCH', 'idx4', '@t:{f\\ o\\,F\\ O}')         \
247        .equal([1L, 'doc1', ['t', 'f o,F O']])
248    env.expect('FT.SEARCH', 'idx4', '@t:{F\\ O\\,F\\ O}')         \
249        .equal([0L])
250    env.expect('FT.SEARCH', 'idx4', '@t:{F\\ O}')         \
251        .equal([1L, 'doc2', ['t', 'F O']])
252    env.expect('FT.SEARCH', 'idx4', '@t:{f\\ o}')         \
253        .equal([1L, 'doc3', ['t', 'f o']])
254
255def testTagGCClearEmpty(env):
256    env.skipOnCluster()
257
258    conn = getConnectionByEnv(env)
259    conn.execute_command('FT.CONFIG', 'SET', 'FORK_GC_CLEAN_THRESHOLD', '0')
260    conn.execute_command('FT.CREATE', 'idx', 'SCHEMA', 't', 'TAG')
261    conn.execute_command('HSET', 'doc1', 't', 'foo')
262    conn.execute_command('HSET', 'doc2', 't', 'bar')
263    conn.execute_command('HSET', 'doc3', 't', 'baz')
264    env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([['foo', [1L]], ['bar', [2L]], ['baz', [3L]]])
265    env.expect('FT.SEARCH', 'idx', '@t:{foo}').equal([1L, 'doc1', ['t', 'foo']])
266
267    # delete two tags
268    conn.execute_command('DEL', 'doc1')
269    conn.execute_command('DEL', 'doc2')
270    forceInvokeGC(env, 'idx')
271    env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([['baz', [3L]]])
272    env.expect('FT.SEARCH', 'idx', '@t:{foo}').equal([0L])
273
274    # delete last tag
275    conn.execute_command('DEL', 'doc3')
276    forceInvokeGC(env, 'idx')
277    env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([])
278
279    # check term can be used after being empty
280    conn.execute_command('HSET', 'doc4', 't', 'foo')
281    conn.execute_command('HSET', 'doc5', 't', 'foo')
282    env.expect('FT.SEARCH', 'idx', '@t:{foo}')  \
283        .equal([2L, 'doc4', ['t', 'foo'], 'doc5', ['t', 'foo']])
284
285def testTagGCClearEmptyWithCursor(env):
286    env.skipOnCluster()
287
288    conn = getConnectionByEnv(env)
289    conn.execute_command('FT.CONFIG', 'SET', 'FORK_GC_CLEAN_THRESHOLD', '0')
290    conn.execute_command('FT.CREATE', 'idx', 'SCHEMA', 't', 'TAG')
291    conn.execute_command('HSET', 'doc1', 't', 'foo')
292    conn.execute_command('HSET', 'doc2', 't', 'foo')
293    env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([['foo', [1L, 2L]]])
294
295    res, cursor = env.cmd('FT.AGGREGATE', 'idx', '@t:{foo}', 'WITHCURSOR', 'COUNT', '1')
296    env.assertEqual(res, [1L, []])
297
298    # delete both documents and run the GC to clean 'foo' inverted index
299    env.expect('DEL', 'doc1').equal(1)
300    env.expect('DEL', 'doc2').equal(1)
301
302    forceInvokeGC(env, 'idx')
303
304    # make sure the inverted index was cleaned
305    env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([])
306
307    # read from the cursor
308    res, cursor = env.cmd('FT.CURSOR', 'READ', 'idx', cursor)
309    env.assertEqual(res, [0L])
310    env.assertEqual(cursor, 0)
311
312def testTagGCClearEmptyWithCursorAndMoreData(env):
313    env.skipOnCluster()
314
315    conn = getConnectionByEnv(env)
316    conn.execute_command('FT.CONFIG', 'SET', 'FORK_GC_CLEAN_THRESHOLD', '0')
317    conn.execute_command('FT.CREATE', 'idx', 'SCHEMA', 't', 'TAG')
318    conn.execute_command('HSET', 'doc1', 't', 'foo')
319    conn.execute_command('HSET', 'doc2', 't', 'foo')
320    env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([['foo', [1L, 2L]]])
321
322    res, cursor = env.cmd('FT.AGGREGATE', 'idx', '@t:{foo}', 'WITHCURSOR', 'COUNT', '1')
323    env.assertEqual(res, [1L, []])
324
325    # delete both documents and run the GC to clean 'foo' inverted index
326    env.expect('DEL', 'doc1').equal(1)
327    env.expect('DEL', 'doc2').equal(1)
328
329    forceInvokeGC(env, 'idx')
330
331    # make sure the inverted index was cleaned
332    env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([])
333
334    # add data
335    conn.execute_command('HSET', 'doc3', 't', 'foo')
336    conn.execute_command('HSET', 'doc4', 't', 'foo')
337    env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([['foo', [3L, 4L]]])
338
339    # read from the cursor
340    res, cursor = conn.execute_command('FT.CURSOR', 'READ', 'idx', cursor)
341    env.assertEqual(res, [0L])
342    env.assertEqual(cursor, 0)
343
344    # ensure later documents with same tag are read
345    res = conn.execute_command('FT.AGGREGATE', 'idx', '@t:{foo}')
346    env.assertEqual(res, [1L, [], []])
347
348@unstable
349def testEmptyTagLeak(env):
350    env.skipOnCluster()
351
352    cycles = 1
353    tags = 30
354
355    conn = getConnectionByEnv(env)
356    conn.execute_command('FT.CONFIG', 'SET', 'FORK_GC_CLEAN_THRESHOLD', '0')
357    conn.execute_command('FT.CREATE', 'idx', 'SCHEMA', 't', 'TAG')
358    pl = conn.pipeline()
359
360    for i in range(cycles):
361        for j in range(tags):
362            x = j + i * tags
363            pl.execute_command('HSET', 'doc{}'.format(x), 't', 'tag{}'.format(x))
364        pl.execute()
365        for j in range(tags):
366            pl.execute_command('DEL', 'doc{}'.format(j + i * tags))
367        pl.execute()
368    forceInvokeGC(env, 'idx')
369    env.expect('FT.DEBUG', 'DUMP_TAGIDX', 'idx', 't').equal([])
370