1#!/usr/bin/env python
2#
3# Public Domain 2014-2018 MongoDB, Inc.
4# Public Domain 2008-2014 WiredTiger, Inc.
5#
6# This is free and unencumbered software released into the public domain.
7#
8# Anyone is free to copy, modify, publish, use, compile, sell, or
9# distribute this software, either in source code form or as a compiled
10# binary, for any purpose, commercial or non-commercial, and by any
11# means.
12#
13# In jurisdictions that recognize copyright laws, the author or authors
14# of this software dedicate any and all copyright interest in the
15# software to the public domain. We make this dedication for the benefit
16# of the public at large and to the detriment of our heirs and
17# successors. We intend this dedication to be an overt act of
18# relinquishment in perpetuity of all present and future rights to this
19# software under copyright law.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
25# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
26# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28
29import wiredtiger, wttest
30from wtscenario import make_scenarios
31
32# test_join01.py
33#    Join operations
34# Basic tests for join
35class test_join01(wttest.WiredTigerTestCase):
36    nentries = 100
37
38    type_scen = [
39        ('table', dict(ref='table')),
40        ('index', dict(ref='index'))
41    ]
42    bloom0_scen = [
43        ('bloom0=0', dict(joincfg0='')),
44        ('bloom0=1000', dict(joincfg0=',strategy=bloom,count=1000')),
45        ('bloom0=10000', dict(joincfg0=',strategy=bloom,count=10000')),
46    ]
47    bloom1_scen = [
48        ('bloom1=0', dict(joincfg1='')),
49        ('bloom1=1000', dict(joincfg1=',strategy=bloom,count=1000')),
50        ('bloom1=10000', dict(joincfg1=',strategy=bloom,count=10000')),
51    ]
52    projection_scen = [
53        ('no-projection', dict(do_proj=False)),
54        ('projection', dict(do_proj=True))
55    ]
56    nested_scen = [
57        ('simple', dict(do_nested=False)),
58        ('nested', dict(do_nested=True))
59    ]
60    stats_scen = [
61        ('no-stats', dict(do_stats=False)),
62        ('stats', dict(do_stats=True))
63    ]
64    order_scen = [
65        ('order=0', dict(join_order=0)),
66        ('order=1', dict(join_order=1)),
67        ('order=2', dict(join_order=2)),
68        ('order=3', dict(join_order=3)),
69    ]
70    scenarios = make_scenarios(type_scen, bloom0_scen, bloom1_scen,
71                               projection_scen, nested_scen, stats_scen,
72                               order_scen, prune=50, prunelong=1000)
73
74    # We need statistics for these tests.
75    conn_config = 'statistics=(all)'
76
77    def gen_key(self, i):
78        return [ i + 1 ]
79
80    def gen_values(self, i):
81        s = str(i)
82        rs = s[::-1]
83        sort3 = (self.nentries * (i % 3)) + i    # multiples of 3 sort first
84        return [s, rs, sort3]
85
86    # Common function for testing iteration of join cursors
87    def iter_common(self, jc, do_proj, do_nested, join_order):
88        # See comments in join_common()
89        # The order that the results are seen depends on
90        # the ordering of the joins.  Specifically, the first
91        # join drives the order that results are seen.
92        if do_nested:
93            if join_order == 0:
94                expect = [73, 82, 83, 92]
95            elif join_order == 1:
96                expect = [73, 82, 83, 92]
97            elif join_order == 2:
98                expect = [82, 92, 73, 83]
99            elif join_order == 3:
100                expect = [92, 73, 82, 83]
101        else:
102            if join_order == 0:
103                expect = [73, 82, 62, 83, 92]
104            elif join_order == 1:
105                expect = [62, 73, 82, 83, 92]
106            elif join_order == 2:
107                expect = [62, 82, 92, 73, 83]
108            elif join_order == 3:
109                expect = [73, 82, 62, 83, 92]
110        while jc.next() == 0:
111            [k] = jc.get_keys()
112            i = k - 1
113            if do_proj:  # our projection reverses the values and adds the key
114                [v2,v1,v0,kproj] = jc.get_values()
115                self.assertEquals(k, kproj)
116            else:
117                [v0,v1,v2] = jc.get_values()
118            self.assertEquals(self.gen_values(i), [v0,v1,v2])
119            if len(expect) == 0 or i != expect[0]:
120                self.tty('ERROR: ' + str(i) + ' is not next in: ' +
121                         str(expect))
122                self.tty('JOIN ORDER=' + str(join_order) + ', NESTED=' + str(do_nested))
123            self.assertTrue(i == expect[0])
124            expect.remove(i)
125        self.assertEquals(0, len(expect))
126
127    # Stats are collected twice: after iterating
128    # through the join cursor once, and secondly after resetting
129    # the join cursor and iterating again.
130    def stats(self, jc, which):
131        statcur = self.session.open_cursor('statistics:join', jc, None)
132        # pick a stat we always expect to see
133        statdesc = 'bloom filter false positives'
134        expectstats = [
135            'join: index:join01:index1: ' + statdesc,
136            'join: index:join01:index2: ' + statdesc ]
137        if self.ref == 'index':
138            expectstats.append('join: index:join01:index0: ' + statdesc)
139        elif self.do_proj:
140            expectstats.append('join: table:join01(v2,v1,v0,k): ' + statdesc)
141        else:
142            expectstats.append('join: table:join01: ' + statdesc)
143        self.check_stats(statcur, expectstats)
144        statcur.reset()
145        self.check_stats(statcur, expectstats)
146        statcur.close()
147
148    def statstr_to_int(self, str):
149        """
150        Convert a statistics value string, which may be in either form:
151        '12345' or '33M (33604836)'
152        """
153        parts = str.rpartition('(')
154        return int(parts[2].rstrip(')'))
155
156    # All of the expect strings should appear
157    def check_stats(self, statcursor, expectstats):
158        stringclass = ''.__class__
159        intclass = (0).__class__
160
161        # Reset the cursor, we're called multiple times.
162        statcursor.reset()
163
164        self.printVerbose(3, 'statistics:')
165        for id, desc, valstr, val in statcursor:
166            self.assertEqual(type(desc), stringclass)
167            self.assertEqual(type(valstr), stringclass)
168            self.assertEqual(type(val), intclass)
169            self.assertEqual(val, self.statstr_to_int(valstr))
170            self.printVerbose(3, '  stat: \'' + desc + '\', \'' +
171                              valstr + '\', ' + str(val))
172            if desc in expectstats:
173                expectstats.remove(desc)
174
175        self.assertTrue(len(expectstats) == 0,
176                        'missing expected values in stats: ' + str(expectstats))
177
178    def session_record_join(self, jc, refc, config, order, joins):
179        joins.append([order, [jc, refc, config]])
180
181    def session_play_one_join(self, firsturi, jc, refc, config):
182        if refc.uri == firsturi and config != None:
183            config = config.replace('strategy=bloom','')
184        #self.tty('->join(jc, uri="' + refc.uri +
185        #         '", config="' + str(config) + '"')
186        self.session.join(jc, refc, config)
187
188    def session_play_joins(self, joins, join_order):
189        #self.tty('->')
190        firsturi = None
191        for [i, joinargs] in joins:
192            if i >= join_order:
193                if firsturi == None:
194                    firsturi = joinargs[1].uri
195                self.session_play_one_join(firsturi, *joinargs)
196        for [i, joinargs] in joins:
197            if i < join_order:
198                if firsturi == None:
199                    firsturi = joinargs[1].uri
200                self.session_play_one_join(firsturi, *joinargs)
201
202    # Common function for testing the most basic functionality
203    # of joins
204    def test_join(self):
205        joincfg0 = self.joincfg0
206        joincfg1 = self.joincfg1
207        do_proj = self.do_proj
208        do_nested = self.do_nested
209        do_stats = self.do_stats
210        join_order = self.join_order
211        #self.tty('join_common(' + joincfg0 + ',' + joincfg1 + ',' +
212        #         str(do_proj) + ',' + str(do_nested) + ',' +
213        #         str(do_stats) + ',' + str(join_order) + ')')
214
215        closeme = []
216        joins = []   # cursors to be joined
217
218        self.session.create('table:join01', 'key_format=r' +
219                            ',value_format=SSi,columns=(k,v0,v1,v2)')
220        self.session.create('index:join01:index0','columns=(v0)')
221        self.session.create('index:join01:index1','columns=(v1)')
222        self.session.create('index:join01:index2','columns=(v2)')
223
224        c = self.session.open_cursor('table:join01', None, None)
225        for i in range(0, self.nentries):
226            c.set_key(*self.gen_key(i))
227            c.set_value(*self.gen_values(i))
228            c.insert()
229        c.close()
230
231        if do_proj:
232            proj_suffix = '(v2,v1,v0,k)'  # Reversed values plus key
233        else:
234            proj_suffix = ''            # Default projection (v0,v1,v2)
235
236        # We join on index2 first, not using bloom indices.
237        # This defines the order that items are returned.
238        # index2 sorts multiples of 3 first (see gen_values())
239        # and by using 'gt' and key 99, we'll skip multiples of 3,
240        # and examine primary keys 2,5,8,...,95,98,1,4,7,...,94,97.
241        jc = self.session.open_cursor('join:table:join01' + proj_suffix,
242                                      None, None)
243        # Adding a projection to a reference cursor should be allowed.
244        c2 = self.session.open_cursor('index:join01:index2(v1)', None, None)
245        c2.set_key(99)   # skips all entries w/ primary key divisible by three
246        self.assertEquals(0, c2.search())
247        self.session_record_join(jc, c2, 'compare=gt', 0, joins)
248
249        # Then select all the numbers 0-99 whose string representation
250        # sort >= '60'.
251        if self.ref == 'index':
252            c0 = self.session.open_cursor('index:join01:index0', None, None)
253            c0.set_key('60')
254        else:
255            c0 = self.session.open_cursor('table:join01', None, None)
256            c0.set_key(60)
257        self.assertEquals(0, c0.search())
258        self.session_record_join(jc, c0, 'compare=ge' + joincfg0, 1, joins)
259
260        # Then select all numbers whose reverse string representation
261        # is in '20' < x < '40'.
262        c1a = self.session.open_cursor('index:join01:index1(v1)', None, None)
263        c1a.set_key('21')
264        self.assertEquals(0, c1a.search())
265        self.session_record_join(jc, c1a, 'compare=gt' + joincfg1, 2, joins)
266
267        c1b = self.session.open_cursor('index:join01:index1(v1)', None, None)
268        c1b.set_key('41')
269        self.assertEquals(0, c1b.search())
270        self.session_record_join(jc, c1b, 'compare=lt' + joincfg1, 2, joins)
271
272        # Numbers that satisfy these 3 conditions (with ordering implied by c2):
273        #    [73, 82, 62, 83, 92].
274        #
275        # After iterating, we should be able to reset and iterate again.
276        if do_nested:
277            # To test nesting, we create two new levels of conditions:
278            #
279            #     x == 72 or x == 73 or x == 82 or x == 83 or
280            #       (x >= 90 and x <= 99)
281            #
282            # that will get AND-ed into our existing join.  The expected
283            # result is   [73, 82, 83, 92].
284            #
285            # We don't specify the projection here, it should be picked up
286            # from the 'enclosing' join.
287            nest1 = self.session.open_cursor('join:table:join01', None, None)
288            nest2 = self.session.open_cursor('join:table:join01', None, None)
289
290            nc = self.session.open_cursor('index:join01:index0', None, None)
291            nc.set_key('90')
292            self.assertEquals(0, nc.search())
293            self.session.join(nest2, nc, 'compare=ge')  # joincfg left out
294            closeme.append(nc)
295
296            nc = self.session.open_cursor('index:join01:index0', None, None)
297            nc.set_key('99')
298            self.assertEquals(0, nc.search())
299            self.session.join(nest2, nc, 'compare=le')
300            closeme.append(nc)
301
302            self.session.join(nest1, nest2, "operation=or")
303
304            for val in [ '72', '73', '82', '83' ]:
305                nc = self.session.open_cursor('index:join01:index0', None, None)
306                nc.set_key(val)
307                self.assertEquals(0, nc.search())
308                self.session.join(nest1, nc, 'compare=eq,operation=or' +
309                                  joincfg0)
310                closeme.append(nc)
311            self.session_record_join(jc, nest1, None, 3, joins)
312
313        self.session_play_joins(joins, join_order)
314        self.iter_common(jc, do_proj, do_nested, join_order)
315        if do_stats:
316            self.stats(jc, 0)
317        jc.reset()
318        self.iter_common(jc, do_proj, do_nested, join_order)
319        if do_stats:
320            self.stats(jc, 1)
321        jc.reset()
322        self.iter_common(jc, do_proj, do_nested, join_order)
323        if do_stats:
324            self.stats(jc, 2)
325        jc.reset()
326        self.iter_common(jc, do_proj, do_nested, join_order)
327
328        jc.close()
329        c2.close()
330        c1a.close()
331        c1b.close()
332        c0.close()
333        if do_nested:
334            nest1.close()
335            nest2.close()
336            for c in closeme:
337                c.close()
338        self.session.drop('table:join01')
339
340if __name__ == '__main__':
341    wttest.run()
342