1#The MIT License (MIT)
2#Copyright (c) 2014 Microsoft Corporation
3
4#Permission is hereby granted, free of charge, to any person obtaining a copy
5#of this software and associated documentation files (the "Software"), to deal
6#in the Software without restriction, including without limitation the rights
7#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8#copies of the Software, and to permit persons to whom the Software is
9#furnished to do so, subject to the following conditions:
10
11#The above copyright notice and this permission notice shall be included in all
12#copies or substantial portions of the Software.
13
14#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20#SOFTWARE.
21
22import unittest
23import uuid
24import pytest
25import azure.cosmos.documents as documents
26import azure.cosmos.cosmos_client as cosmos_client
27from azure.cosmos import query_iterable
28import azure.cosmos.base as base
29from six.moves import xrange
30import test.test_config as test_config
31
32#IMPORTANT NOTES:
33
34#      Most test cases in this file create collections in your Azure Cosmos account.
35#      Collections are billing entities.  By running these test cases, you may incur monetary costs on your account.
36
37#      To Run the test, replace the two member fields (masterKey and host) with values
38#   associated with your Azure Cosmos account.
39
40@pytest.mark.usefixtures("teardown")
41class CrossPartitionTopOrderByTest(unittest.TestCase):
42    """Orderby Tests.
43    """
44
45    host = test_config._test_config.host
46    masterKey = test_config._test_config.masterKey
47    connectionPolicy = test_config._test_config.connectionPolicy
48
49    @classmethod
50    def setUpClass(cls):
51        # creates the database, collection, and insert all the documents
52        # we will gain some speed up in running the tests by creating the database, collection and inserting all the docs only once
53
54        if (cls.masterKey == '[YOUR_KEY_HERE]' or
55                cls.host == '[YOUR_ENDPOINT_HERE]'):
56            raise Exception(
57                "You must specify your Azure Cosmos account values for "
58                "'masterKey' and 'host' at the top of this class to run the "
59                "tests.")
60
61        cls.client = cosmos_client.CosmosClient(cls.host, {'masterKey': cls.masterKey}, cls.connectionPolicy)
62        cls.created_db = test_config._test_config.create_database_if_not_exist(cls.client)
63        cls.created_collection = CrossPartitionTopOrderByTest.create_collection(cls.client, cls.created_db)
64        cls.collection_link = cls.GetDocumentCollectionLink(cls.created_db, cls.created_collection)
65
66        # create a document using the document definition
67        cls.document_definitions = []
68        for i in xrange(20):
69            d = {'id' : str(i),
70                 'name': 'sample document',
71                 'spam': 'eggs' + str(i),
72                 'cnt': i,
73                 'key': 'value',
74                 'spam2': 'eggs' + str(i) if (i == 3) else i,
75                 'boolVar': (i % 2 == 0),
76                 'number': 1.1 * i
77                 }
78            cls.document_definitions.append(d)
79
80        CrossPartitionTopOrderByTest.insert_doc()
81
82    @classmethod
83    def tearDownClass(cls):
84        cls.client.DeleteContainer(cls.collection_link)
85
86    def setUp(self):
87
88        # sanity check:
89        partition_key_ranges = list(self.client._ReadPartitionKeyRanges(self.collection_link))
90        self.assertGreaterEqual(len(partition_key_ranges), 5)
91
92        # sanity check: read documents after creation
93        queried_docs = list(self.client.ReadItems(self.collection_link))
94        self.assertEqual(
95            len(queried_docs),
96            len(self.document_definitions),
97            'create should increase the number of documents')
98
99
100    def test_orderby_query(self):
101        # test a simply order by query
102
103        # an order by query
104        query = {
105                'query': 'SELECT * FROM root r order by r.spam',
106        }
107
108        options = {}
109        options['enableCrossPartitionQuery'] = True
110        options['maxItemCount'] = 2
111
112        def get_order_by_key(r):
113            return r['spam']
114        expected_ordered_ids = [r['id'] for r in sorted(self.document_definitions, key=get_order_by_key)]
115
116        # validates the results size and order
117        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
118
119    def test_orderby_query_as_string(self):
120        # test a simply order by query as string
121
122        # an order by query
123        query = 'SELECT * FROM root r order by r.spam'
124
125        options = {}
126        options['enableCrossPartitionQuery'] = True
127        options['maxItemCount'] = 2
128
129        def get_order_by_key(r):
130            return r['spam']
131        expected_ordered_ids = [r['id'] for r in sorted(self.document_definitions, key=get_order_by_key)]
132
133        # validates the results size and order
134        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
135
136    def test_orderby_asc_query(self):
137        # test an order by query with explicit ascending ordering
138
139        # an ascending order by query (ascending explicitly mentioned in the query)
140        query = {
141                'query': 'SELECT * FROM root r order by r.spam ASC',
142        }
143
144        options = {}
145        options['enableCrossPartitionQuery'] = True
146        options['maxItemCount'] = 2
147
148        def get_order_by_key(r):
149            return r['spam']
150        expected_ordered_ids = [r['id'] for r in sorted(self.document_definitions, key=get_order_by_key)]
151
152        # validates the results size and order
153        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
154
155    def test_orderby_desc_query(self):
156        # test an order by query with explicit descending ordering
157
158        # a descending order by query
159        query = {
160                'query': 'SELECT * FROM root r order by r.spam DESC',
161        }
162
163        options = {}
164        options['enableCrossPartitionQuery'] = True
165        options['maxItemCount'] = 2
166
167        def get_order_by_key(r):
168            return r['spam']
169        expected_ordered_ids = [r['id'] for r in sorted(self.document_definitions, key=get_order_by_key, reverse=True)]
170
171        # validates the results size and order
172        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
173
174    def test_orderby_top_query(self):
175        # test an order by query combined with top
176
177        top_count = 9
178        # sanity check
179        self.assertLess(top_count, len(self.document_definitions))
180
181        # an order by query with top, total existing docs more than requested top count
182        query = {
183                 'query': 'SELECT top %d * FROM root r order by r.spam' % top_count
184        }
185
186        options = {}
187        options['enableCrossPartitionQuery'] = True
188        options['maxItemCount'] = 2
189
190        def get_order_by_key(r):
191            return r['spam']
192        expected_ordered_ids = [r['id'] for r in sorted(self.document_definitions, key=get_order_by_key)[:top_count]]
193
194        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
195
196    def test_orderby_top_query_less_results_than_top_counts(self):
197        # test an order by query combined with top. where top is greater than the total number of docs
198
199        top_count = 30
200        # sanity check
201        self.assertGreater(top_count, len(self.document_definitions))
202
203        # an order by query with top, total existing docs less than requested top count
204        query = {
205                 'query': 'SELECT top %d * FROM root r order by r.spam' % top_count
206        }
207
208        options = {}
209        options['enableCrossPartitionQuery'] = True
210        options['maxItemCount'] = 2
211
212        def get_order_by_key(r):
213            return r['spam']
214        expected_ordered_ids = [r['id'] for r in sorted(self.document_definitions, key=get_order_by_key)]
215
216        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
217
218    def test_top_query(self):
219        # test a simple top query without order by.
220        # The rewrittenQuery in the query execution info responded by backend will be empty
221
222        partition_key_ranges = list(self.client._ReadPartitionKeyRanges(self.collection_link))
223
224        docs_by_partition_key_range_id = self.find_docs_by_partition_key_range_id()
225
226        # find the first two non-empty target partition key ranges
227        cnt = 0
228        first_two_ranges_results = []
229        for r in partition_key_ranges:
230            if cnt >= 2:
231                break
232            p_id = r['id']
233            if len(docs_by_partition_key_range_id[p_id]) > 0:
234                first_two_ranges_results.extend(docs_by_partition_key_range_id[p_id])
235                cnt += 1
236
237        # sanity checks
238        self.assertEqual(cnt, 2)
239        self.assertLess(2, len(partition_key_ranges))
240
241        options = {}
242        options['enableCrossPartitionQuery'] = True
243        options['maxItemCount'] = 2
244
245        # sanity check
246        self.assertLess(len(first_two_ranges_results), len(self.document_definitions))
247        self.assertGreater(len(first_two_ranges_results), 1)
248
249        expected_ordered_ids = [d['id'] for d in first_two_ranges_results]
250
251        # a top query, the results will be sorted based on the target partition key range
252        query = {
253                 'query': 'SELECT top %d * FROM root r' % len(expected_ordered_ids)
254        }
255        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
256
257    def test_top_query_as_string(self):
258        # test a simple top query without order by.
259        # The rewrittenQuery in the query execution info responded by backend will be empty
260
261        partition_key_ranges = list(self.client._ReadPartitionKeyRanges(self.collection_link))
262
263        docs_by_partition_key_range_id = self.find_docs_by_partition_key_range_id()
264
265        # find the first two non-empty target partition key ranges
266        cnt = 0
267        first_two_ranges_results = []
268        for r in partition_key_ranges:
269            if cnt >= 2:
270                break
271            p_id = r['id']
272            if len(docs_by_partition_key_range_id[p_id]) > 0:
273                first_two_ranges_results.extend(docs_by_partition_key_range_id[p_id])
274                cnt += 1
275
276        # sanity checks
277        self.assertEqual(cnt, 2)
278        self.assertLess(2, len(partition_key_ranges))
279
280        options = {}
281        options['enableCrossPartitionQuery'] = True
282        options['maxItemCount'] = 2
283
284        # sanity check
285        self.assertLess(len(first_two_ranges_results), len(self.document_definitions))
286        self.assertGreater(len(first_two_ranges_results), 1)
287
288        expected_ordered_ids = [d['id'] for d in first_two_ranges_results]
289
290        # a top query, the results will be sorted based on the target partition key range
291        query = 'SELECT top %d * FROM root r' % len(expected_ordered_ids)
292        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
293
294    def test_parametrized_top_query(self):
295        # test a simple parameterized query without order by.
296        # The rewrittenQuery in the query execution info responded by backend will be empty
297
298        partition_key_ranges = list(self.client._ReadPartitionKeyRanges(self.collection_link))
299
300        docs_by_partition_key_range_id = self.find_docs_by_partition_key_range_id()
301
302        # find the first two non-empty target partition key ranges
303        cnt = 0
304        first_two_ranges_results = []
305        for r in partition_key_ranges:
306            if cnt >= 2:
307                break
308            p_id = r['id']
309            if len(docs_by_partition_key_range_id[p_id]) > 0:
310                first_two_ranges_results.extend(docs_by_partition_key_range_id[p_id])
311                cnt += 1
312
313        # sanity checks
314        self.assertEqual(cnt, 2)
315        self.assertLess(2, len(partition_key_ranges))
316
317        options = {}
318        options['enableCrossPartitionQuery'] = True
319        options['maxItemCount'] = 2
320
321        # sanity check
322        self.assertLess(len(first_two_ranges_results), len(self.document_definitions))
323        self.assertGreater(len(first_two_ranges_results), 1)
324
325        expected_ordered_ids = [d['id'] for d in first_two_ranges_results]
326
327        # a top query, the results will be sorted based on the target partition key range
328        query = {
329                 'query': 'SELECT top @n * FROM root r',
330
331                    "parameters": [
332                                    {"name": "@n", "value": len(expected_ordered_ids)}
333                                ]
334        }
335        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
336
337    def test_orderby_query_with_parametrized_top(self):
338        # test an order by query combined with parametrized top
339
340        top_count = 9
341        # sanity check
342        self.assertLess(top_count, len(self.document_definitions))
343
344        options = {}
345        options['enableCrossPartitionQuery'] = True
346        options['maxItemCount'] = 2
347
348        def get_order_by_key(r):
349            return r['spam']
350        expected_ordered_ids = [r['id'] for r in sorted(self.document_definitions, key=get_order_by_key)[:top_count]]
351
352        # a parametrized top order by query
353        query = {
354                 'query': 'SELECT top @n * FROM root r order by r.spam',
355
356                    "parameters": [
357                                    {"name": "@n", "value": top_count}
358                                ]
359        }
360
361        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
362
363    def test_orderby_query_with_parametrized_predicate(self):
364        # test an order by query combined with parametrized predicate
365
366        options = {}
367        options['enableCrossPartitionQuery'] = True
368        options['maxItemCount'] = 2
369        # an order by query with parametrized predicate
370        query = {
371                 'query': 'SELECT * FROM root r where r.cnt > @cnt order by r.spam',
372
373                    "parameters": [
374                                    {"name": "@cnt", "value": 5}
375                                ]
376
377        }
378
379        def get_order_by_key(r):
380            return r['spam']
381        expected_ordered_ids = [r['id'] for r in sorted(self.document_definitions, key=get_order_by_key) if r['cnt'] > 5]
382
383        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
384
385    def test_orderby_query_noncomparable_orderby_item(self):
386        # test orderby with different order by item type
387
388        # an order by query
389        query = {
390                'query': 'SELECT * FROM root r order by r.spam2 DESC',
391        }
392
393        options = {}
394        options['enableCrossPartitionQuery'] = True
395        options['maxItemCount'] = 2
396
397        def get_order_by_key(r):
398            return r['id']
399        expected_ordered_ids = [r['id'] for r in sorted(self.document_definitions, key=get_order_by_key)]
400
401        # validates the results size and order
402        try:
403            self.execute_query_and_validate_results(query, options, expected_ordered_ids)
404            self.fail('non comparable order by items did not result in failure.')
405        except ValueError as e:
406            self.assertTrue(e.args[0] == "Expected String, but got Number." or e.message == "Expected Number, but got String.")
407
408    def test_orderby_integer_query(self):
409        # an order by integer query
410        query = {
411                'query': 'SELECT * FROM root r order by r.cnt',
412        }
413
414        options = {}
415        options['enableCrossPartitionQuery'] = True
416        options['maxItemCount'] = 2
417
418        def get_order_by_key(r):
419            return r['cnt']
420        expected_ordered_ids = [r['id'] for r in sorted(self.document_definitions, key=get_order_by_key)]
421
422        # validates the results size and order
423        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
424
425    def test_orderby_floating_point_number_query(self):
426        # an orderby by floating point number query
427        query = {
428                'query': 'SELECT * FROM root r order by r.number',
429        }
430
431        options = {}
432        options['enableCrossPartitionQuery'] = True
433        options['maxItemCount'] = 2
434
435        def get_order_by_key(r):
436            return r['number']
437        expected_ordered_ids = [r['id'] for r in sorted(self.document_definitions, key=get_order_by_key)]
438
439        # validates the results size and order
440        self.execute_query_and_validate_results(query, options, expected_ordered_ids)
441
442    def test_orderby_boolean_query(self):
443        # an orderby by floating point number query
444        query = {
445                'query': 'SELECT * FROM root r order by r.boolVar',
446        }
447
448        options = {}
449        options['enableCrossPartitionQuery'] = True
450        options['maxItemCount'] = 2
451
452        result_iterable = self.client.QueryItems(self.collection_link, query, options)
453        results = list(result_iterable)
454        # validates the results size and order
455
456        self.assertEqual(len(results), len(self.document_definitions))
457
458        # false values before true values
459        index = 0
460        while index < len(results):
461            if results[index]['boolVar']:
462                break
463
464            self.assertTrue(int(results[index]['id']) % 2 == 1)
465            index = index + 1
466
467        while index < len(results):
468            self.assertTrue(results[index]['boolVar'])
469            self.assertTrue(int(results[index]['id']) % 2 == 0)
470            index = index + 1
471
472    def find_docs_by_partition_key_range_id(self):
473        query = {
474                 'query': 'SELECT * FROM root r'
475        }
476
477        partition_key_range = list(self.client._ReadPartitionKeyRanges(self.collection_link))
478        docs_by_partition_key_range_id = {}
479        for r in partition_key_range:
480            options = {}
481
482            path = base.GetPathFromLink(self.collection_link, 'docs')
483            collection_id = base.GetResourceIdOrFullNameFromLink(self.collection_link)
484            def fetch_fn(options):
485                return self.client.QueryFeed(path, collection_id, query, options, r['id'])
486            docResultsIterable = query_iterable.QueryIterable(self.client, query, options, fetch_fn, self.collection_link)
487
488            docs = list(docResultsIterable)
489            self.assertFalse(r['id'] in docs_by_partition_key_range_id)
490            docs_by_partition_key_range_id[r['id']] = docs
491        return docs_by_partition_key_range_id
492
493    def execute_query_and_validate_results(self, query, options, expected_ordered_ids):
494        # executes the query and validates the results against the expected results
495        page_size = options['maxItemCount']
496
497        result_iterable = self.client.QueryItems(self.collection_link, query, options)
498
499        self.assertTrue(isinstance(result_iterable, query_iterable.QueryIterable))
500
501        ######################################
502        # test next() behavior
503        ######################################
504        it = result_iterable.__iter__()
505        def invokeNext():
506            return next(it)
507
508        # validate that invocations of next() produces the same results as expected_ordered_ids
509        for i in xrange(len(expected_ordered_ids)):
510            item = invokeNext()
511            self.assertEqual(item['id'], expected_ordered_ids[i])
512
513        # after the result set is exhausted, invoking next must raise a StopIteration exception
514        self.assertRaises(StopIteration, invokeNext)
515
516        ######################################
517        # test fetch_next_block() behavior
518        ######################################
519        results = {}
520        cnt = 0
521        while True:
522            fetched_res = result_iterable.fetch_next_block()
523            fetched_size = len(fetched_res)
524
525            for item in fetched_res:
526                self.assertEqual(item['id'], expected_ordered_ids[cnt])
527                results[cnt] = item
528                cnt = cnt + 1
529            if (cnt < len(expected_ordered_ids)):
530                self.assertEqual(fetched_size, page_size, "page size")
531            else:
532                if cnt == len(expected_ordered_ids):
533                    self.assertTrue(fetched_size <= page_size, "last page size")
534                    break
535                else:
536                    #cnt > expected_number_of_results
537                    self.fail("more results than expected")
538
539        # validate the number of collected results
540        self.assertEqual(len(results), len(expected_ordered_ids))
541
542        # no more results will be returned
543        self.assertEqual(result_iterable.fetch_next_block(), [])
544
545    @classmethod
546    def create_collection(self, client, created_db):
547
548        collection_definition = {
549           'id': 'orderby_tests collection ' + str(uuid.uuid4()),
550           'indexingPolicy':{
551              'includedPaths':[
552                 {
553                    'path':'/',
554                    'indexes':[
555                       {
556                          'kind':'Range',
557                          'dataType':'Number'
558                       },
559                       {
560                          'kind':'Range',
561                          'dataType':'String'
562                       }
563                    ]
564                 }
565              ]
566           },
567           'partitionKey':{
568              'paths':[
569                 '/id'
570              ],
571              'kind':documents.PartitionKind.Hash
572           }
573        }
574
575        collection_options = { 'offerThroughput': 30000 }
576
577        created_collection = client.CreateContainer(self.GetDatabaseLink(created_db),
578                                collection_definition,
579                                collection_options)
580
581        return created_collection
582
583    @classmethod
584    def insert_doc(cls):
585        # create a document using the document definition
586        created_docs = []
587        for d in cls.document_definitions:
588
589            created_doc = cls.client.CreateItem(cls.collection_link, d)
590            created_docs.append(created_doc)
591
592        return created_docs
593
594    @classmethod
595    def GetDatabaseLink(cls, database, is_name_based=True):
596        if is_name_based:
597            return 'dbs/' + database['id']
598        else:
599            return database['_self']
600
601    @classmethod
602    def GetDocumentCollectionLink(cls, database, document_collection, is_name_based=True):
603        if is_name_based:
604            return cls.GetDatabaseLink(database) + '/colls/' + document_collection['id']
605        else:
606            return document_collection['_self']
607
608    @classmethod
609    def GetDocumentLink(cls, database, document_collection, document, is_name_based=True):
610        if is_name_based:
611            return cls.GetDocumentCollectionLink(database, document_collection) + '/docs/' + document['id']
612        else:
613            return document['_self']
614
615if __name__ == "__main__":
616
617
618    #import sys;sys.argv = ['', 'Test.testName']
619    unittest.main()