1
2import unittest
3from RLTest import Env
4import platform
5from time import sleep
6from includes import *
7from common import *
8
9
10def testBasicGC(env):
11    if env.isCluster():
12        raise unittest.SkipTest()
13    env.expect('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0).equal('OK')
14    env.assertOk(env.cmd('ft.create', 'idx', 'ON', 'HASH',
15                         'schema', 'title', 'text', 'id', 'numeric', 't', 'tag'))
16    waitForIndex(env, 'idx')
17    for i in range(101):
18        env.assertOk(env.cmd('ft.add', 'idx', 'doc%d' % i, 1.0, 'fields',
19                             'title', 'hello world',
20                             'id', '5',
21                             't', 'tag1'))
22
23    env.assertEqual(env.cmd('ft.debug', 'DUMP_INVIDX', 'idx', 'world'), [long(i) for i in range(1, 102)])
24    env.assertEqual(env.cmd('ft.debug', 'DUMP_NUMIDX', 'idx', 'id'), [[long(i) for i in range(1, 102)]])
25    env.assertEqual(env.cmd('ft.debug', 'DUMP_TAGIDX', 'idx', 't'), [['tag1', [long(i) for i in range(1, 102)]]])
26
27    env.assertEqual(env.cmd('ft.del', 'idx', 'doc0'), 1)
28
29    for i in range(100):
30        # gc is random so we need to do it long enough times for it to work
31        forceInvokeGC(env, 'idx')
32
33    # check that the gc collected the deleted docs
34    env.assertEqual(env.cmd('ft.debug', 'DUMP_INVIDX', 'idx', 'world'), [long(i) for i in range(2, 102)])
35    env.assertEqual(env.cmd('ft.debug', 'DUMP_NUMIDX', 'idx', 'id'), [[long(i) for i in range(2, 102)]])
36    env.assertEqual(env.cmd('ft.debug', 'DUMP_TAGIDX', 'idx', 't'), [['tag1', [long(i) for i in range(2, 102)]]])
37
38def testBasicGCWithEmptyInvIdx(env):
39    if env.isCluster():
40        raise unittest.SkipTest()
41    if env.moduleArgs is not None and 'GC_POLICY LEGACY' in env.moduleArgs:
42        # this test is not relevent for legacy gc cause its not squeshing inverted index
43        raise unittest.SkipTest()
44    env.expect('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0).equal('OK')
45    env.assertOk(env.cmd('ft.create', 'idx', 'ON', 'HASH', 'schema', 'title', 'text'))
46    waitForIndex(env, 'idx')
47    env.assertOk(env.cmd('ft.add', 'idx', 'doc1', 1.0, 'fields',
48                         'title', 'hello world'))
49
50    env.assertEqual(env.cmd('ft.debug', 'DUMP_INVIDX', 'idx', 'world'), [1])
51
52    env.assertEqual(env.cmd('ft.del', 'idx', 'doc1'), 1)
53
54    forceInvokeGC(env, 'idx')
55
56    # check that the gc collected the deleted docs
57    env.expect('ft.debug', 'DUMP_INVIDX', 'idx', 'world').error().contains('Can not find the inverted index')
58
59def testNumericGCIntensive(env):
60    if env.isCluster():
61        raise unittest.SkipTest()
62    NumberOfDocs = 1000
63    env.expect('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0).equal('OK')
64    env.assertOk(env.cmd('ft.create', 'idx', 'ON', 'HASH', 'schema', 'id', 'numeric'))
65    waitForIndex(env, 'idx')
66
67    for i in range(NumberOfDocs):
68        env.assertOk(env.cmd('ft.add', 'idx', 'doc%d' % i, 1.0, 'fields', 'id', '1'))
69
70    for i in range(0, NumberOfDocs, 2):
71        env.assertEqual(env.cmd('ft.del', 'idx', 'doc%d' % i), 1)
72
73    for i in range(100):
74        forceInvokeGC(env, 'idx')
75
76    res = env.cmd('ft.debug', 'DUMP_NUMIDX', 'idx', 'id')
77    for r1 in res:
78        for r2 in r1:
79            # if r2 is greater then 900 its on the last block and fork GC does not clean the last block
80            env.assertTrue(r2 % 2 == 0 or r2 > 900)
81
82def testGeoGCIntensive(env):
83    if env.isCluster():
84        raise unittest.SkipTest()
85    NumberOfDocs = 1000
86    env.expect('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0).equal('OK')
87    env.assertOk(env.cmd('ft.create', 'idx', 'ON', 'HASH', 'schema', 'g', 'geo'))
88    waitForIndex(env, 'idx')
89
90    for i in range(NumberOfDocs):
91        env.assertOk(env.cmd('ft.add', 'idx', 'doc%d' % i, 1.0, 'fields', 'g', '12.34,56.78'))
92
93    for i in range(0, NumberOfDocs, 2):
94        env.assertEqual(env.cmd('ft.del', 'idx', 'doc%d' % i), 1)
95
96    for i in range(100):
97        forceInvokeGC(env, 'idx')
98
99    res = env.cmd('ft.debug', 'DUMP_NUMIDX', 'idx', 'g')
100    for r1 in res:
101        for r2 in r1:
102            # if r2 is greater then 900 its on the last block and fork GC does not clean the last block
103            env.assertTrue(r2 % 2 == 0 or r2 > 900)
104
105def testTagGC(env):
106    if env.isCluster():
107        raise unittest.SkipTest()
108    NumberOfDocs = 101
109    env.expect('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0).equal('OK')
110    env.assertOk(env.cmd('ft.create', 'idx', 'ON', 'HASH', 'schema', 't', 'tag'))
111    waitForIndex(env, 'idx')
112
113    for i in range(NumberOfDocs):
114        env.assertOk(env.cmd('ft.add', 'idx', 'doc%d' % i, 1.0, 'fields', 't', '1'))
115
116    for i in range(0, NumberOfDocs, 2):
117        env.assertEqual(env.cmd('ft.del', 'idx', 'doc%d' % i), 1)
118
119    for i in range(100):
120        # gc is random so we need to do it long enough times for it to work
121        forceInvokeGC(env, 'idx')
122
123    res = env.cmd('ft.debug', 'DUMP_TAGIDX', 'idx', 't')
124    for r1 in res:
125        for r2 in r1[1]:
126            # if r2 is greater then 100 its on the last block and fork GC does not clean the last block
127            env.assertTrue(r2 % 2 == 0 or r2 > 100)
128
129def testDeleteEntireBlock(env):
130    if env.isCluster():
131        raise unittest.SkipTest()
132    env.expect('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0).equal('OK')
133    env.expect('FT.CREATE', 'idx', 'ON', 'HASH',
134               'SCHEMA', 'test', 'TEXT', 'SORTABLE', 'test2', 'TEXT', 'SORTABLE', ).ok()
135    waitForIndex(env, 'idx')
136    # creating 5 blocks on 'checking' inverted index
137    for i in range(700):
138        env.expect('FT.ADD', 'idx', 'doc%d' % i, '1.0', 'FIELDS', 'test', 'checking', 'test2', 'checking%d' % i).ok()
139
140    # delete docs in the midle of the inverted index, make sure the binary search are not braken
141    for i in range(400, 501):
142        env.expect('FT.DEL', 'idx', 'doc%d' % i).equal(1)
143    res = env.cmd('FT.SEARCH', 'idx', '@test:checking @test2:checking250')
144    env.assertEqual(res[0:2],[1L, 'doc250'])
145    env.assertEqual(set(res[2]), set(['test', 'checking', 'test2', 'checking250']))
146
147    # actually clean the inverted index, make sure the binary search are not braken, check also after rdb reload
148    for i in range(100):
149        # gc is random so we need to do it long enough times for it to work
150        forceInvokeGC(env, 'idx')
151    for _ in env.reloading_iterator():
152        waitForIndex(env, 'idx')
153        res = env.cmd('FT.SEARCH', 'idx', '@test:checking @test2:checking250')
154        env.assertEqual(res[0:2],[1L, 'doc250'])
155        env.assertEqual(set(res[2]), set(['test', 'checking', 'test2', 'checking250']))
156
157def testGCIntegrationWithRedisFork(env):
158    if env.env == 'existing-env':
159        env.skip()
160    if env.env == 'enterprise':
161        env.skip()
162    if env.isCluster():
163        raise unittest.SkipTest()
164    if env.cmd('FT.CONFIG', 'GET', 'GC_POLICY')[0][1] != 'fork':
165        raise unittest.SkipTest()
166    env.expect('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0).equal('OK')
167    env.expect('FT.CONFIG', 'SET', 'FORKGC_SLEEP_BEFORE_EXIT', '4').ok()
168    env.assertOk(env.execute_command('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0))
169    env.expect('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 'title', 'TEXT', 'SORTABLE').ok()
170    waitForIndex(env, 'idx')
171    env.expect('FT.ADD', 'idx', 'doc1', 1.0, 'FIELDS', 'title', 'hello world').ok()
172    env.expect('bgsave').true()
173    forceInvokeGC(env, 'idx')
174    env.expect('bgsave').true()
175    env.cmd('FT.CONFIG', 'SET', 'FORKGC_SLEEP_BEFORE_EXIT', '0')
176
177def testGCThreshold(env):
178    if env.env == 'existing-env':
179        env.skip()
180    if env.isCluster():
181        raise unittest.SkipTest()
182
183    env = Env(moduleArgs='GC_POLICY FORK FORK_GC_CLEAN_THRESHOLD 1000')
184    env.expect('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 'title', 'TEXT', 'SORTABLE').ok()
185    waitForIndex(env, 'idx')
186    for i in range(1000):
187        env.expect('FT.ADD', 'idx', 'doc%d' % i, '1.0', 'FIELDS', 'title', 'foo').ok()
188
189    debug_rep = env.cmd('FT.DEBUG', 'DUMP_INVIDX', 'idx', 'foo')
190
191    for i in range(999):
192        env.expect('FT.DEL', 'idx', 'doc%d' % i).equal(1)
193
194    forceInvokeGC(env, 'idx')
195
196    env.expect('FT.DEBUG', 'DUMP_INVIDX', 'idx', 'foo').equal(debug_rep)
197
198    env.expect('FT.DEL', 'idx', 'doc999').equal(1)
199
200    forceInvokeGC(env, 'idx')
201
202    env.expect('FT.DEBUG', 'DUMP_INVIDX', 'idx', 'foo').error().contains('Can not find the inverted index')
203
204    # retry with replace
205    for i in range(1000):
206        env.expect('FT.ADD', 'idx', 'doc%d' % i, '1.0', 'FIELDS', 'title', 'foo').ok()
207
208    debug_rep = env.cmd('FT.DEBUG', 'DUMP_INVIDX', 'idx', 'foo')
209
210    for i in range(999):
211        env.expect('FT.ADD', 'idx', 'doc%d' % i, '1.0', 'REPLACE', 'FIELDS', 'title', 'foo1').ok()
212
213    forceInvokeGC(env, 'idx')
214
215    env.expect('FT.DEBUG', 'DUMP_INVIDX', 'idx', 'foo').equal(debug_rep)
216
217    env.expect('FT.ADD', 'idx', 'doc999', '1.0', 'REPLACE', 'FIELDS', 'title', 'foo1').ok()
218
219    forceInvokeGC(env, 'idx')
220
221    env.expect('FT.DEBUG', 'DUMP_INVIDX', 'idx', 'foo').error().contains('Can not find the inverted index')
222
223    # retry with replace partial
224
225    debug_rep = env.cmd('FT.DEBUG', 'DUMP_INVIDX', 'idx', 'foo1')
226
227    for i in range(999):
228        env.expect('FT.ADD', 'idx', 'doc%d' % i, '1.0', 'REPLACE', 'PARTIAL', 'FIELDS', 'title', 'foo2').ok()
229
230    forceInvokeGC(env, 'idx')
231
232    env.expect('FT.DEBUG', 'DUMP_INVIDX', 'idx', 'foo1').equal(debug_rep)
233
234    env.expect('FT.ADD', 'idx', 'doc999', '1.0', 'REPLACE', 'PARTIAL', 'FIELDS', 'title', 'foo2').ok()
235
236    forceInvokeGC(env, 'idx')
237
238    env.expect('FT.DEBUG', 'DUMP_INVIDX', 'idx', 'foo1').error().contains('Can not find the inverted index')
239
240def testGCShutDownOnExit(env):
241    if env.env == 'existing-env' or env.env == 'enterprise' or env.isCluster() or platform.system() == 'Darwin':
242        env.skip()
243    env.expect('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0).equal('OK')
244    env = Env(moduleArgs='GC_POLICY FORK FORKGC_SLEEP_BEFORE_EXIT 20')
245    env.assertOk(env.execute_command('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0))
246    env.expect('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 'title', 'TEXT', 'SORTABLE').ok()
247    waitForIndex(env, 'idx')
248    env.expect('FT.DEBUG', 'GC_FORCEBGINVOKE', 'idx').ok()
249    env.stop()
250    env.start()
251
252    # make sure server started successfully
253    env.cmd('flushall')
254    env.expect('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 'title', 'TEXT', 'SORTABLE').ok()
255    waitForIndex(env, 'idx')
256
257def testGFreeEmpryTerms(env):
258    if env.env == 'existing-env' or env.env == 'enterprise' or env.isCluster():
259        env.skip()
260
261    env = Env(moduleArgs='GC_POLICY FORK')
262    env.expect('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 't', 'TEXT').ok()
263
264    for i in range(200):
265        env.expect('hset', 'doc%d'%i, 't', 'foo')
266
267    for i in range(200):
268        env.expect('del', 'doc%d'%i)
269
270    env.expect('FT.DEBUG', 'DUMP_TERMS', 'idx').equal(['foo'])
271    forceInvokeGC(env, 'idx')
272    env.expect('FT.DEBUG', 'DUMP_TERMS', 'idx').equal([])
273
274