1#!/usr/bin/env python 2# 3# Public Domain 2014-2018 MongoDB, Inc. 4# Public Domain 2008-2014 WiredTiger, Inc. 5# 6# This is free and unencumbered software released into the public domain. 7# 8# Anyone is free to copy, modify, publish, use, compile, sell, or 9# distribute this software, either in source code form or as a compiled 10# binary, for any purpose, commercial or non-commercial, and by any 11# means. 12# 13# In jurisdictions that recognize copyright laws, the author or authors 14# of this software dedicate any and all copyright interest in the 15# software to the public domain. We make this dedication for the benefit 16# of the public at large and to the detriment of our heirs and 17# successors. We intend this dedication to be an overt act of 18# relinquishment in perpetuity of all present and future rights to this 19# software under copyright law. 20# 21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 23# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 24# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 25# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 26# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 27# OTHER DEALINGS IN THE SOFTWARE. 28 29import wiredtiger, wttest 30from wtscenario import make_scenarios 31 32# test_join01.py 33# Join operations 34# Basic tests for join 35class test_join01(wttest.WiredTigerTestCase): 36 nentries = 100 37 38 type_scen = [ 39 ('table', dict(ref='table')), 40 ('index', dict(ref='index')) 41 ] 42 bloom0_scen = [ 43 ('bloom0=0', dict(joincfg0='')), 44 ('bloom0=1000', dict(joincfg0=',strategy=bloom,count=1000')), 45 ('bloom0=10000', dict(joincfg0=',strategy=bloom,count=10000')), 46 ] 47 bloom1_scen = [ 48 ('bloom1=0', dict(joincfg1='')), 49 ('bloom1=1000', dict(joincfg1=',strategy=bloom,count=1000')), 50 ('bloom1=10000', dict(joincfg1=',strategy=bloom,count=10000')), 51 ] 52 projection_scen = [ 53 ('no-projection', dict(do_proj=False)), 54 ('projection', dict(do_proj=True)) 55 ] 56 nested_scen = [ 57 ('simple', dict(do_nested=False)), 58 ('nested', dict(do_nested=True)) 59 ] 60 stats_scen = [ 61 ('no-stats', dict(do_stats=False)), 62 ('stats', dict(do_stats=True)) 63 ] 64 order_scen = [ 65 ('order=0', dict(join_order=0)), 66 ('order=1', dict(join_order=1)), 67 ('order=2', dict(join_order=2)), 68 ('order=3', dict(join_order=3)), 69 ] 70 scenarios = make_scenarios(type_scen, bloom0_scen, bloom1_scen, 71 projection_scen, nested_scen, stats_scen, 72 order_scen, prune=50, prunelong=1000) 73 74 # We need statistics for these tests. 75 conn_config = 'statistics=(all)' 76 77 def gen_key(self, i): 78 return [ i + 1 ] 79 80 def gen_values(self, i): 81 s = str(i) 82 rs = s[::-1] 83 sort3 = (self.nentries * (i % 3)) + i # multiples of 3 sort first 84 return [s, rs, sort3] 85 86 # Common function for testing iteration of join cursors 87 def iter_common(self, jc, do_proj, do_nested, join_order): 88 # See comments in join_common() 89 # The order that the results are seen depends on 90 # the ordering of the joins. Specifically, the first 91 # join drives the order that results are seen. 92 if do_nested: 93 if join_order == 0: 94 expect = [73, 82, 83, 92] 95 elif join_order == 1: 96 expect = [73, 82, 83, 92] 97 elif join_order == 2: 98 expect = [82, 92, 73, 83] 99 elif join_order == 3: 100 expect = [92, 73, 82, 83] 101 else: 102 if join_order == 0: 103 expect = [73, 82, 62, 83, 92] 104 elif join_order == 1: 105 expect = [62, 73, 82, 83, 92] 106 elif join_order == 2: 107 expect = [62, 82, 92, 73, 83] 108 elif join_order == 3: 109 expect = [73, 82, 62, 83, 92] 110 while jc.next() == 0: 111 [k] = jc.get_keys() 112 i = k - 1 113 if do_proj: # our projection reverses the values and adds the key 114 [v2,v1,v0,kproj] = jc.get_values() 115 self.assertEquals(k, kproj) 116 else: 117 [v0,v1,v2] = jc.get_values() 118 self.assertEquals(self.gen_values(i), [v0,v1,v2]) 119 if len(expect) == 0 or i != expect[0]: 120 self.tty('ERROR: ' + str(i) + ' is not next in: ' + 121 str(expect)) 122 self.tty('JOIN ORDER=' + str(join_order) + ', NESTED=' + str(do_nested)) 123 self.assertTrue(i == expect[0]) 124 expect.remove(i) 125 self.assertEquals(0, len(expect)) 126 127 # Stats are collected twice: after iterating 128 # through the join cursor once, and secondly after resetting 129 # the join cursor and iterating again. 130 def stats(self, jc, which): 131 statcur = self.session.open_cursor('statistics:join', jc, None) 132 # pick a stat we always expect to see 133 statdesc = 'bloom filter false positives' 134 expectstats = [ 135 'join: index:join01:index1: ' + statdesc, 136 'join: index:join01:index2: ' + statdesc ] 137 if self.ref == 'index': 138 expectstats.append('join: index:join01:index0: ' + statdesc) 139 elif self.do_proj: 140 expectstats.append('join: table:join01(v2,v1,v0,k): ' + statdesc) 141 else: 142 expectstats.append('join: table:join01: ' + statdesc) 143 self.check_stats(statcur, expectstats) 144 statcur.reset() 145 self.check_stats(statcur, expectstats) 146 statcur.close() 147 148 def statstr_to_int(self, str): 149 """ 150 Convert a statistics value string, which may be in either form: 151 '12345' or '33M (33604836)' 152 """ 153 parts = str.rpartition('(') 154 return int(parts[2].rstrip(')')) 155 156 # All of the expect strings should appear 157 def check_stats(self, statcursor, expectstats): 158 stringclass = ''.__class__ 159 intclass = (0).__class__ 160 161 # Reset the cursor, we're called multiple times. 162 statcursor.reset() 163 164 self.printVerbose(3, 'statistics:') 165 for id, desc, valstr, val in statcursor: 166 self.assertEqual(type(desc), stringclass) 167 self.assertEqual(type(valstr), stringclass) 168 self.assertEqual(type(val), intclass) 169 self.assertEqual(val, self.statstr_to_int(valstr)) 170 self.printVerbose(3, ' stat: \'' + desc + '\', \'' + 171 valstr + '\', ' + str(val)) 172 if desc in expectstats: 173 expectstats.remove(desc) 174 175 self.assertTrue(len(expectstats) == 0, 176 'missing expected values in stats: ' + str(expectstats)) 177 178 def session_record_join(self, jc, refc, config, order, joins): 179 joins.append([order, [jc, refc, config]]) 180 181 def session_play_one_join(self, firsturi, jc, refc, config): 182 if refc.uri == firsturi and config != None: 183 config = config.replace('strategy=bloom','') 184 #self.tty('->join(jc, uri="' + refc.uri + 185 # '", config="' + str(config) + '"') 186 self.session.join(jc, refc, config) 187 188 def session_play_joins(self, joins, join_order): 189 #self.tty('->') 190 firsturi = None 191 for [i, joinargs] in joins: 192 if i >= join_order: 193 if firsturi == None: 194 firsturi = joinargs[1].uri 195 self.session_play_one_join(firsturi, *joinargs) 196 for [i, joinargs] in joins: 197 if i < join_order: 198 if firsturi == None: 199 firsturi = joinargs[1].uri 200 self.session_play_one_join(firsturi, *joinargs) 201 202 # Common function for testing the most basic functionality 203 # of joins 204 def test_join(self): 205 joincfg0 = self.joincfg0 206 joincfg1 = self.joincfg1 207 do_proj = self.do_proj 208 do_nested = self.do_nested 209 do_stats = self.do_stats 210 join_order = self.join_order 211 #self.tty('join_common(' + joincfg0 + ',' + joincfg1 + ',' + 212 # str(do_proj) + ',' + str(do_nested) + ',' + 213 # str(do_stats) + ',' + str(join_order) + ')') 214 215 closeme = [] 216 joins = [] # cursors to be joined 217 218 self.session.create('table:join01', 'key_format=r' + 219 ',value_format=SSi,columns=(k,v0,v1,v2)') 220 self.session.create('index:join01:index0','columns=(v0)') 221 self.session.create('index:join01:index1','columns=(v1)') 222 self.session.create('index:join01:index2','columns=(v2)') 223 224 c = self.session.open_cursor('table:join01', None, None) 225 for i in range(0, self.nentries): 226 c.set_key(*self.gen_key(i)) 227 c.set_value(*self.gen_values(i)) 228 c.insert() 229 c.close() 230 231 if do_proj: 232 proj_suffix = '(v2,v1,v0,k)' # Reversed values plus key 233 else: 234 proj_suffix = '' # Default projection (v0,v1,v2) 235 236 # We join on index2 first, not using bloom indices. 237 # This defines the order that items are returned. 238 # index2 sorts multiples of 3 first (see gen_values()) 239 # and by using 'gt' and key 99, we'll skip multiples of 3, 240 # and examine primary keys 2,5,8,...,95,98,1,4,7,...,94,97. 241 jc = self.session.open_cursor('join:table:join01' + proj_suffix, 242 None, None) 243 # Adding a projection to a reference cursor should be allowed. 244 c2 = self.session.open_cursor('index:join01:index2(v1)', None, None) 245 c2.set_key(99) # skips all entries w/ primary key divisible by three 246 self.assertEquals(0, c2.search()) 247 self.session_record_join(jc, c2, 'compare=gt', 0, joins) 248 249 # Then select all the numbers 0-99 whose string representation 250 # sort >= '60'. 251 if self.ref == 'index': 252 c0 = self.session.open_cursor('index:join01:index0', None, None) 253 c0.set_key('60') 254 else: 255 c0 = self.session.open_cursor('table:join01', None, None) 256 c0.set_key(60) 257 self.assertEquals(0, c0.search()) 258 self.session_record_join(jc, c0, 'compare=ge' + joincfg0, 1, joins) 259 260 # Then select all numbers whose reverse string representation 261 # is in '20' < x < '40'. 262 c1a = self.session.open_cursor('index:join01:index1(v1)', None, None) 263 c1a.set_key('21') 264 self.assertEquals(0, c1a.search()) 265 self.session_record_join(jc, c1a, 'compare=gt' + joincfg1, 2, joins) 266 267 c1b = self.session.open_cursor('index:join01:index1(v1)', None, None) 268 c1b.set_key('41') 269 self.assertEquals(0, c1b.search()) 270 self.session_record_join(jc, c1b, 'compare=lt' + joincfg1, 2, joins) 271 272 # Numbers that satisfy these 3 conditions (with ordering implied by c2): 273 # [73, 82, 62, 83, 92]. 274 # 275 # After iterating, we should be able to reset and iterate again. 276 if do_nested: 277 # To test nesting, we create two new levels of conditions: 278 # 279 # x == 72 or x == 73 or x == 82 or x == 83 or 280 # (x >= 90 and x <= 99) 281 # 282 # that will get AND-ed into our existing join. The expected 283 # result is [73, 82, 83, 92]. 284 # 285 # We don't specify the projection here, it should be picked up 286 # from the 'enclosing' join. 287 nest1 = self.session.open_cursor('join:table:join01', None, None) 288 nest2 = self.session.open_cursor('join:table:join01', None, None) 289 290 nc = self.session.open_cursor('index:join01:index0', None, None) 291 nc.set_key('90') 292 self.assertEquals(0, nc.search()) 293 self.session.join(nest2, nc, 'compare=ge') # joincfg left out 294 closeme.append(nc) 295 296 nc = self.session.open_cursor('index:join01:index0', None, None) 297 nc.set_key('99') 298 self.assertEquals(0, nc.search()) 299 self.session.join(nest2, nc, 'compare=le') 300 closeme.append(nc) 301 302 self.session.join(nest1, nest2, "operation=or") 303 304 for val in [ '72', '73', '82', '83' ]: 305 nc = self.session.open_cursor('index:join01:index0', None, None) 306 nc.set_key(val) 307 self.assertEquals(0, nc.search()) 308 self.session.join(nest1, nc, 'compare=eq,operation=or' + 309 joincfg0) 310 closeme.append(nc) 311 self.session_record_join(jc, nest1, None, 3, joins) 312 313 self.session_play_joins(joins, join_order) 314 self.iter_common(jc, do_proj, do_nested, join_order) 315 if do_stats: 316 self.stats(jc, 0) 317 jc.reset() 318 self.iter_common(jc, do_proj, do_nested, join_order) 319 if do_stats: 320 self.stats(jc, 1) 321 jc.reset() 322 self.iter_common(jc, do_proj, do_nested, join_order) 323 if do_stats: 324 self.stats(jc, 2) 325 jc.reset() 326 self.iter_common(jc, do_proj, do_nested, join_order) 327 328 jc.close() 329 c2.close() 330 c1a.close() 331 c1b.close() 332 c0.close() 333 if do_nested: 334 nest1.close() 335 nest2.close() 336 for c in closeme: 337 c.close() 338 self.session.drop('table:join01') 339 340if __name__ == '__main__': 341 wttest.run() 342