1# Copyright (c) 2012 Mitch Garnaat http://garnaat.org/
2# Copyright (c) 2012 Amazon.com, Inc. or its affiliates.  All Rights Reserved
3#
4# Permission is hereby granted, free of charge, to any person obtaining a
5# copy of this software and associated documentation files (the
6# "Software"), to deal in the Software without restriction, including
7# without limitation the rights to use, copy, modify, merge, publish, dis-
8# tribute, sublicense, and/or sell copies of the Software, and to permit
9# persons to whom the Software is furnished to do so, subject to the fol-
10# lowing conditions:
11#
12# The above copyright notice and this permission notice shall be included
13# in all copies or substantial portions of the Software.
14#
15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
17# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
18# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21# IN THE SOFTWARE.
22#
23
24from boto.dynamodb.batch import BatchList
25from boto.dynamodb.schema import Schema
26from boto.dynamodb.item import Item
27from boto.dynamodb import exceptions as dynamodb_exceptions
28import time
29
30
31class TableBatchGenerator(object):
32    """
33    A low-level generator used to page through results from
34    batch_get_item operations.
35
36    :ivar consumed_units: An integer that holds the number of
37        ConsumedCapacityUnits accumulated thus far for this
38        generator.
39    """
40
41    def __init__(self, table, keys, attributes_to_get=None,
42                 consistent_read=False):
43        self.table = table
44        self.keys = keys
45        self.consumed_units = 0
46        self.attributes_to_get = attributes_to_get
47        self.consistent_read = consistent_read
48
49    def _queue_unprocessed(self, res):
50        if u'UnprocessedKeys' not in res:
51            return
52        if self.table.name not in res[u'UnprocessedKeys']:
53            return
54
55        keys = res[u'UnprocessedKeys'][self.table.name][u'Keys']
56
57        for key in keys:
58            h = key[u'HashKeyElement']
59            r = key[u'RangeKeyElement'] if u'RangeKeyElement' in key else None
60            self.keys.append((h, r))
61
62    def __iter__(self):
63        while self.keys:
64            # Build the next batch
65            batch = BatchList(self.table.layer2)
66            batch.add_batch(self.table, self.keys[:100],
67                            self.attributes_to_get)
68            res = batch.submit()
69
70            # parse the results
71            if self.table.name not in res[u'Responses']:
72                continue
73            self.consumed_units += res[u'Responses'][self.table.name][u'ConsumedCapacityUnits']
74            for elem in res[u'Responses'][self.table.name][u'Items']:
75                yield elem
76
77            # re-queue un processed keys
78            self.keys = self.keys[100:]
79            self._queue_unprocessed(res)
80
81
82class Table(object):
83    """
84    An Amazon DynamoDB table.
85
86    :ivar name: The name of the table.
87    :ivar create_time: The date and time that the table was created.
88    :ivar status: The current status of the table.  One of:
89        'ACTIVE', 'UPDATING', 'DELETING'.
90    :ivar schema: A :class:`boto.dynamodb.schema.Schema` object representing
91        the schema defined for the table.
92    :ivar item_count: The number of items in the table.  This value is
93        set only when the Table object is created or refreshed and
94        may not reflect the actual count.
95    :ivar size_bytes: Total size of the specified table, in bytes.
96        Amazon DynamoDB updates this value approximately every six hours.
97        Recent changes might not be reflected in this value.
98    :ivar read_units: The ReadCapacityUnits of the tables
99        Provisioned Throughput.
100    :ivar write_units: The WriteCapacityUnits of the tables
101        Provisioned Throughput.
102    :ivar schema: The Schema object associated with the table.
103    """
104
105    def __init__(self, layer2, response):
106        """
107
108        :type layer2: :class:`boto.dynamodb.layer2.Layer2`
109        :param layer2: A `Layer2` api object.
110
111        :type response: dict
112        :param response: The output of
113            `boto.dynamodb.layer1.Layer1.describe_table`.
114
115        """
116        self.layer2 = layer2
117        self._dict = {}
118        self.update_from_response(response)
119
120    @classmethod
121    def create_from_schema(cls, layer2, name, schema):
122        """Create a Table object.
123
124        If you know the name and schema of your table, you can
125        create a ``Table`` object without having to make any
126        API calls (normally an API call is made to retrieve
127        the schema of a table).
128
129        Example usage::
130
131            table = Table.create_from_schema(
132                boto.connect_dynamodb(),
133                'tablename',
134                Schema.create(hash_key=('keyname', 'N')))
135
136        :type layer2: :class:`boto.dynamodb.layer2.Layer2`
137        :param layer2: A ``Layer2`` api object.
138
139        :type name: str
140        :param name: The name of the table.
141
142        :type schema: :class:`boto.dynamodb.schema.Schema`
143        :param schema: The schema associated with the table.
144
145        :rtype: :class:`boto.dynamodb.table.Table`
146        :return: A Table object representing the table.
147
148        """
149        table = cls(layer2, {'Table': {'TableName': name}})
150        table._schema = schema
151        return table
152
153    def __repr__(self):
154        return 'Table(%s)' % self.name
155
156    @property
157    def name(self):
158        return self._dict['TableName']
159
160    @property
161    def create_time(self):
162        return self._dict.get('CreationDateTime', None)
163
164    @property
165    def status(self):
166        return self._dict.get('TableStatus', None)
167
168    @property
169    def item_count(self):
170        return self._dict.get('ItemCount', 0)
171
172    @property
173    def size_bytes(self):
174        return self._dict.get('TableSizeBytes', 0)
175
176    @property
177    def schema(self):
178        return self._schema
179
180    @property
181    def read_units(self):
182        try:
183            return self._dict['ProvisionedThroughput']['ReadCapacityUnits']
184        except KeyError:
185            return None
186
187    @property
188    def write_units(self):
189        try:
190            return self._dict['ProvisionedThroughput']['WriteCapacityUnits']
191        except KeyError:
192            return None
193
194    def update_from_response(self, response):
195        """
196        Update the state of the Table object based on the response
197        data received from Amazon DynamoDB.
198        """
199        # 'Table' is from a describe_table call.
200        if 'Table' in response:
201            self._dict.update(response['Table'])
202        # 'TableDescription' is from a create_table call.
203        elif 'TableDescription' in response:
204            self._dict.update(response['TableDescription'])
205        if 'KeySchema' in self._dict:
206            self._schema = Schema(self._dict['KeySchema'])
207
208    def refresh(self, wait_for_active=False, retry_seconds=5):
209        """
210        Refresh all of the fields of the Table object by calling
211        the underlying DescribeTable request.
212
213        :type wait_for_active: bool
214        :param wait_for_active: If True, this command will not return
215            until the table status, as returned from Amazon DynamoDB, is
216            'ACTIVE'.
217
218        :type retry_seconds: int
219        :param retry_seconds: If wait_for_active is True, this
220            parameter controls the number of seconds of delay between
221            calls to update_table in Amazon DynamoDB.  Default is 5 seconds.
222        """
223        done = False
224        while not done:
225            response = self.layer2.describe_table(self.name)
226            self.update_from_response(response)
227            if wait_for_active:
228                if self.status == 'ACTIVE':
229                    done = True
230                else:
231                    time.sleep(retry_seconds)
232            else:
233                done = True
234
235    def update_throughput(self, read_units, write_units):
236        """
237        Update the ProvisionedThroughput for the Amazon DynamoDB Table.
238
239        :type read_units: int
240        :param read_units: The new value for ReadCapacityUnits.
241
242        :type write_units: int
243        :param write_units: The new value for WriteCapacityUnits.
244        """
245        self.layer2.update_throughput(self, read_units, write_units)
246
247    def delete(self):
248        """
249        Delete this table and all items in it.  After calling this
250        the Table objects status attribute will be set to 'DELETING'.
251        """
252        self.layer2.delete_table(self)
253
254    def get_item(self, hash_key, range_key=None,
255                 attributes_to_get=None, consistent_read=False,
256                 item_class=Item):
257        """
258        Retrieve an existing item from the table.
259
260        :type hash_key: int|long|float|str|unicode|Binary
261        :param hash_key: The HashKey of the requested item.  The
262            type of the value must match the type defined in the
263            schema for the table.
264
265        :type range_key: int|long|float|str|unicode|Binary
266        :param range_key: The optional RangeKey of the requested item.
267            The type of the value must match the type defined in the
268            schema for the table.
269
270        :type attributes_to_get: list
271        :param attributes_to_get: A list of attribute names.
272            If supplied, only the specified attribute names will
273            be returned.  Otherwise, all attributes will be returned.
274
275        :type consistent_read: bool
276        :param consistent_read: If True, a consistent read
277            request is issued.  Otherwise, an eventually consistent
278            request is issued.
279
280        :type item_class: Class
281        :param item_class: Allows you to override the class used
282            to generate the items. This should be a subclass of
283            :class:`boto.dynamodb.item.Item`
284        """
285        return self.layer2.get_item(self, hash_key, range_key,
286                                    attributes_to_get, consistent_read,
287                                    item_class)
288    lookup = get_item
289
290    def has_item(self, hash_key, range_key=None, consistent_read=False):
291        """
292        Checks the table to see if the Item with the specified ``hash_key``
293        exists. This may save a tiny bit of time/bandwidth over a
294        straight :py:meth:`get_item` if you have no intention to touch
295        the data that is returned, since this method specifically tells
296        Amazon not to return anything but the Item's key.
297
298        :type hash_key: int|long|float|str|unicode|Binary
299        :param hash_key: The HashKey of the requested item.  The
300            type of the value must match the type defined in the
301            schema for the table.
302
303        :type range_key: int|long|float|str|unicode|Binary
304        :param range_key: The optional RangeKey of the requested item.
305            The type of the value must match the type defined in the
306            schema for the table.
307
308        :type consistent_read: bool
309        :param consistent_read: If True, a consistent read
310            request is issued.  Otherwise, an eventually consistent
311            request is issued.
312
313        :rtype: bool
314        :returns: ``True`` if the Item exists, ``False`` if not.
315        """
316        try:
317            # Attempt to get the key. If it can't be found, it'll raise
318            # an exception.
319            self.get_item(hash_key, range_key=range_key,
320                          # This minimizes the size of the response body.
321                          attributes_to_get=[hash_key],
322                          consistent_read=consistent_read)
323        except dynamodb_exceptions.DynamoDBKeyNotFoundError:
324            # Key doesn't exist.
325            return False
326        return True
327
328    def new_item(self, hash_key=None, range_key=None, attrs=None,
329                 item_class=Item):
330        """
331        Return an new, unsaved Item which can later be PUT to
332        Amazon DynamoDB.
333
334        This method has explicit (but optional) parameters for
335        the hash_key and range_key values of the item.  You can use
336        these explicit parameters when calling the method, such as::
337
338            >>> my_item = my_table.new_item(hash_key='a', range_key=1,
339                                        attrs={'key1': 'val1', 'key2': 'val2'})
340            >>> my_item
341            {u'bar': 1, u'foo': 'a', 'key1': 'val1', 'key2': 'val2'}
342
343        Or, if you prefer, you can simply put the hash_key and range_key
344        in the attrs dictionary itself, like this::
345
346            >>> attrs = {'foo': 'a', 'bar': 1, 'key1': 'val1', 'key2': 'val2'}
347            >>> my_item = my_table.new_item(attrs=attrs)
348            >>> my_item
349            {u'bar': 1, u'foo': 'a', 'key1': 'val1', 'key2': 'val2'}
350
351        The effect is the same.
352
353        .. note:
354           The explicit parameters take priority over the values in
355           the attrs dict.  So, if you have a hash_key or range_key
356           in the attrs dict and you also supply either or both using
357           the explicit parameters, the values in the attrs will be
358           ignored.
359
360        :type hash_key: int|long|float|str|unicode|Binary
361        :param hash_key: The HashKey of the new item.  The
362            type of the value must match the type defined in the
363            schema for the table.
364
365        :type range_key: int|long|float|str|unicode|Binary
366        :param range_key: The optional RangeKey of the new item.
367            The type of the value must match the type defined in the
368            schema for the table.
369
370        :type attrs: dict
371        :param attrs: A dictionary of key value pairs used to
372            populate the new item.
373
374        :type item_class: Class
375        :param item_class: Allows you to override the class used
376            to generate the items. This should be a subclass of
377            :class:`boto.dynamodb.item.Item`
378        """
379        return item_class(self, hash_key, range_key, attrs)
380
381    def query(self, hash_key, *args, **kw):
382        """
383        Perform a query on the table.
384
385        :type hash_key: int|long|float|str|unicode|Binary
386        :param hash_key: The HashKey of the requested item.  The
387            type of the value must match the type defined in the
388            schema for the table.
389
390        :type range_key_condition: :class:`boto.dynamodb.condition.Condition`
391        :param range_key_condition: A Condition object.
392            Condition object can be one of the following types:
393
394            EQ|LE|LT|GE|GT|BEGINS_WITH|BETWEEN
395
396            The only condition which expects or will accept two
397            values is 'BETWEEN', otherwise a single value should
398            be passed to the Condition constructor.
399
400        :type attributes_to_get: list
401        :param attributes_to_get: A list of attribute names.
402            If supplied, only the specified attribute names will
403            be returned.  Otherwise, all attributes will be returned.
404
405        :type request_limit: int
406        :param request_limit: The maximum number of items to retrieve
407            from Amazon DynamoDB on each request.  You may want to set
408            a specific request_limit based on the provisioned throughput
409            of your table.  The default behavior is to retrieve as many
410            results as possible per request.
411
412        :type max_results: int
413        :param max_results: The maximum number of results that will
414            be retrieved from Amazon DynamoDB in total.  For example,
415            if you only wanted to see the first 100 results from the
416            query, regardless of how many were actually available, you
417            could set max_results to 100 and the generator returned
418            from the query method will only yeild 100 results max.
419
420        :type consistent_read: bool
421        :param consistent_read: If True, a consistent read
422            request is issued.  Otherwise, an eventually consistent
423            request is issued.
424
425        :type scan_index_forward: bool
426        :param scan_index_forward: Specified forward or backward
427            traversal of the index.  Default is forward (True).
428
429        :type exclusive_start_key: list or tuple
430        :param exclusive_start_key: Primary key of the item from
431            which to continue an earlier query.  This would be
432            provided as the LastEvaluatedKey in that query.
433
434        :type count: bool
435        :param count: If True, Amazon DynamoDB returns a total
436            number of items for the Query operation, even if the
437            operation has no matching items for the assigned filter.
438            If count is True, the actual items are not returned and
439            the count is accessible as the ``count`` attribute of
440            the returned object.
441
442
443        :type item_class: Class
444        :param item_class: Allows you to override the class used
445            to generate the items. This should be a subclass of
446            :class:`boto.dynamodb.item.Item`
447        """
448        return self.layer2.query(self, hash_key, *args, **kw)
449
450    def scan(self, *args, **kw):
451        """
452        Scan through this table, this is a very long
453        and expensive operation, and should be avoided if
454        at all possible.
455
456        :type scan_filter: A dict
457        :param scan_filter: A dictionary where the key is the
458            attribute name and the value is a
459            :class:`boto.dynamodb.condition.Condition` object.
460            Valid Condition objects include:
461
462             * EQ - equal (1)
463             * NE - not equal (1)
464             * LE - less than or equal (1)
465             * LT - less than (1)
466             * GE - greater than or equal (1)
467             * GT - greater than (1)
468             * NOT_NULL - attribute exists (0, use None)
469             * NULL - attribute does not exist (0, use None)
470             * CONTAINS - substring or value in list (1)
471             * NOT_CONTAINS - absence of substring or value in list (1)
472             * BEGINS_WITH - substring prefix (1)
473             * IN - exact match in list (N)
474             * BETWEEN - >= first value, <= second value (2)
475
476        :type attributes_to_get: list
477        :param attributes_to_get: A list of attribute names.
478            If supplied, only the specified attribute names will
479            be returned.  Otherwise, all attributes will be returned.
480
481        :type request_limit: int
482        :param request_limit: The maximum number of items to retrieve
483            from Amazon DynamoDB on each request.  You may want to set
484            a specific request_limit based on the provisioned throughput
485            of your table.  The default behavior is to retrieve as many
486            results as possible per request.
487
488        :type max_results: int
489        :param max_results: The maximum number of results that will
490            be retrieved from Amazon DynamoDB in total.  For example,
491            if you only wanted to see the first 100 results from the
492            query, regardless of how many were actually available, you
493            could set max_results to 100 and the generator returned
494            from the query method will only yeild 100 results max.
495
496        :type count: bool
497        :param count: If True, Amazon DynamoDB returns a total
498            number of items for the Scan operation, even if the
499            operation has no matching items for the assigned filter.
500            If count is True, the actual items are not returned and
501            the count is accessible as the ``count`` attribute of
502            the returned object.
503
504        :type exclusive_start_key: list or tuple
505        :param exclusive_start_key: Primary key of the item from
506            which to continue an earlier query.  This would be
507            provided as the LastEvaluatedKey in that query.
508
509        :type item_class: Class
510        :param item_class: Allows you to override the class used
511            to generate the items. This should be a subclass of
512            :class:`boto.dynamodb.item.Item`
513
514        :return: A TableGenerator (generator) object which will iterate
515            over all results
516        :rtype: :class:`boto.dynamodb.layer2.TableGenerator`
517        """
518        return self.layer2.scan(self, *args, **kw)
519
520    def batch_get_item(self, keys, attributes_to_get=None):
521        """
522        Return a set of attributes for a multiple items from a single table
523        using their primary keys. This abstraction removes the 100 Items per
524        batch limitations as well as the "UnprocessedKeys" logic.
525
526        :type keys: list
527        :param keys: A list of scalar or tuple values.  Each element in the
528            list represents one Item to retrieve.  If the schema for the
529            table has both a HashKey and a RangeKey, each element in the
530            list should be a tuple consisting of (hash_key, range_key).  If
531            the schema for the table contains only a HashKey, each element
532            in the list should be a scalar value of the appropriate type
533            for the table schema. NOTE: The maximum number of items that
534            can be retrieved for a single operation is 100. Also, the
535            number of items retrieved is constrained by a 1 MB size limit.
536
537        :type attributes_to_get: list
538        :param attributes_to_get: A list of attribute names.
539            If supplied, only the specified attribute names will
540            be returned.  Otherwise, all attributes will be returned.
541
542        :return: A TableBatchGenerator (generator) object which will
543            iterate over all results
544        :rtype: :class:`boto.dynamodb.table.TableBatchGenerator`
545        """
546        return TableBatchGenerator(self, keys, attributes_to_get)
547