1#-------------------------------------------------------------------------
2# Copyright (c) Microsoft.  All rights reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#   http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#--------------------------------------------------------------------------
15from .._error import (
16    _validate_not_none,
17    _validate_type_bytes,
18    _validate_encryption_unsupported,
19    _ERROR_VALUE_NEGATIVE,
20)
21from .._common_conversion import (
22    _to_str,
23    _int_to_str,
24    _datetime_to_utc_string,
25    _get_content_md5,
26)
27from .._serialization import (
28    _get_data_bytes_only,
29    _add_metadata_headers,
30)
31from .._http import HTTPRequest
32from ._upload_chunking import (
33    _AppendBlobChunkUploader,
34    _upload_blob_chunks,
35)
36from .models import _BlobTypes
37from .._constants import (
38    SERVICE_HOST_BASE,
39    DEFAULT_PROTOCOL,
40)
41from ._serialization import (
42    _get_path,
43)
44from ._deserialization import (
45    _parse_append_block,
46    _parse_base_properties,
47)
48from .baseblobservice import BaseBlobService
49from os import path
50import sys
51if sys.version_info >= (3,):
52    from io import BytesIO
53else:
54    from cStringIO import StringIO as BytesIO
55
56
57class AppendBlobService(BaseBlobService):
58    '''
59    An append blob is comprised of blocks and is optimized for append operations.
60    When you modify an append blob, blocks are added to the end of the blob only,
61    via the append_block operation. Updating or deleting of existing blocks is not
62    supported. Unlike a block blob, an append blob does not expose its block IDs.
63
64    Each block in an append blob can be a different size, up to a maximum of 4 MB,
65    and an append blob can include up to 50,000 blocks. The maximum size of an
66    append blob is therefore slightly more than 195 GB (4 MB X 50,000 blocks).
67
68    :ivar int MAX_BLOCK_SIZE:
69        The size of the blocks put by append_blob_from_* methods. Smaller blocks
70        may be put if there is less data provided. The maximum block size the service
71        supports is 4MB.
72    '''
73    MAX_BLOCK_SIZE = 4 * 1024 * 1024
74
75    def __init__(self, account_name=None, account_key=None, sas_token=None,
76                 is_emulated=False, protocol=DEFAULT_PROTOCOL, endpoint_suffix=SERVICE_HOST_BASE,
77                 custom_domain=None, request_session=None, connection_string=None, socket_timeout=None):
78        '''
79        :param str account_name:
80            The storage account name. This is used to authenticate requests
81            signed with an account key and to construct the storage endpoint. It
82            is required unless a connection string is given, or if a custom
83            domain is used with anonymous authentication.
84        :param str account_key:
85            The storage account key. This is used for shared key authentication.
86            If neither account key or sas token is specified, anonymous access
87            will be used.
88        :param str sas_token:
89             A shared access signature token to use to authenticate requests
90             instead of the account key. If account key and sas token are both
91             specified, account key will be used to sign. If neither are
92             specified, anonymous access will be used.
93        :param bool is_emulated:
94            Whether to use the emulator. Defaults to False. If specified, will
95            override all other parameters besides connection string and request
96            session.
97        :param str protocol:
98            The protocol to use for requests. Defaults to https.
99        :param str endpoint_suffix:
100            The host base component of the url, minus the account name. Defaults
101            to Azure (core.windows.net). Override this to use the China cloud
102            (core.chinacloudapi.cn).
103        :param str custom_domain:
104            The custom domain to use. This can be set in the Azure Portal. For
105            example, 'www.mydomain.com'.
106        :param requests.Session request_session:
107            The session object to use for http requests.
108        :param str connection_string:
109            If specified, this will override all other parameters besides
110            request session. See
111            http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/
112            for the connection string format.
113        :param int socket_timeout:
114            If specified, this will override the default socket timeout. The timeout specified is in seconds.
115            See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value.
116        '''
117        self.blob_type = _BlobTypes.AppendBlob
118        super(AppendBlobService, self).__init__(
119            account_name, account_key, sas_token, is_emulated, protocol, endpoint_suffix,
120            custom_domain, request_session, connection_string, socket_timeout)
121
122    def create_blob(self, container_name, blob_name, content_settings=None,
123                    metadata=None, lease_id=None,
124                    if_modified_since=None, if_unmodified_since=None,
125                    if_match=None, if_none_match=None, timeout=None):
126        '''
127        Creates a blob or overrides an existing blob. Use if_match=* to
128        prevent overriding an existing blob.
129
130        See create_blob_from_* for high level
131        functions that handle the creation and upload of large blobs with
132        automatic chunking and progress notifications.
133
134        :param str container_name:
135            Name of existing container.
136        :param str blob_name:
137            Name of blob to create or update.
138        :param ~azure.storage.blob.models.ContentSettings content_settings:
139            ContentSettings object used to set blob properties.
140        :param metadata:
141            Name-value pairs associated with the blob as metadata.
142        :type metadata: a dict mapping str to str
143        :param str lease_id:
144            Required if the blob has an active lease.
145        :param datetime if_modified_since:
146            A DateTime value. Azure expects the date value passed in to be UTC.
147            If timezone is included, any non-UTC datetimes will be converted to UTC.
148            If a date is passed in without timezone info, it is assumed to be UTC.
149            Specify this header to perform the operation only
150            if the resource has been modified since the specified time.
151        :param datetime if_unmodified_since:
152            A DateTime value. Azure expects the date value passed in to be UTC.
153            If timezone is included, any non-UTC datetimes will be converted to UTC.
154            If a date is passed in without timezone info, it is assumed to be UTC.
155            Specify this header to perform the operation only if
156            the resource has not been modified since the specified date/time.
157        :param str if_match:
158            An ETag value, or the wildcard character (*). Specify this header to
159            perform the operation only if the resource's ETag matches the value specified.
160        :param str if_none_match:
161            An ETag value, or the wildcard character (*). Specify this header
162            to perform the operation only if the resource's ETag does not match
163            the value specified. Specify the wildcard character (*) to perform
164            the operation only if the resource does not exist, and fail the
165            operation if it does exist.
166        :param int timeout:
167            The timeout parameter is expressed in seconds.
168        :return: ETag and last modified properties for the updated Append Blob
169        :rtype: :class:`~azure.storage.blob.models.ResourceProperties`
170        '''
171        _validate_not_none('container_name', container_name)
172        _validate_not_none('blob_name', blob_name)
173        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
174
175        request = HTTPRequest()
176        request.method = 'PUT'
177        request.host_locations = self._get_host_locations()
178        request.path = _get_path(container_name, blob_name)
179        request.query = {'timeout': _int_to_str(timeout)}
180        request.headers = {
181            'x-ms-blob-type': _to_str(self.blob_type),
182            'x-ms-lease-id': _to_str(lease_id),
183            'If-Modified-Since': _datetime_to_utc_string(if_modified_since),
184            'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since),
185            'If-Match': _to_str(if_match),
186            'If-None-Match': _to_str(if_none_match)
187        }
188        _add_metadata_headers(metadata, request)
189        if content_settings is not None:
190            request.headers.update(content_settings._to_headers())
191
192        return self._perform_request(request, _parse_base_properties)
193
194    def append_block(self, container_name, blob_name, block,
195                     validate_content=False, maxsize_condition=None,
196                     appendpos_condition=None,
197                     lease_id=None, if_modified_since=None,
198                     if_unmodified_since=None, if_match=None,
199                     if_none_match=None, timeout=None):
200        '''
201        Commits a new block of data to the end of an existing append blob.
202
203        :param str container_name:
204            Name of existing container.
205        :param str blob_name:
206            Name of existing blob.
207        :param bytes block:
208            Content of the block in bytes.
209        :param bool validate_content:
210            If true, calculates an MD5 hash of the block content. The storage
211            service checks the hash of the content that has arrived
212            with the hash that was sent. This is primarily valuable for detecting
213            bitflips on the wire if using http instead of https as https (the default)
214            will already validate. Note that this MD5 hash is not stored with the
215            blob.
216        :param int maxsize_condition:
217            Optional conditional header. The max length in bytes permitted for
218            the append blob. If the Append Block operation would cause the blob
219            to exceed that limit or if the blob size is already greater than the
220            value specified in this header, the request will fail with
221            MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed).
222        :param int appendpos_condition:
223            Optional conditional header, used only for the Append Block operation.
224            A number indicating the byte offset to compare. Append Block will
225            succeed only if the append position is equal to this number. If it
226            is not, the request will fail with the
227            AppendPositionConditionNotMet error
228            (HTTP status code 412 - Precondition Failed).
229        :param str lease_id:
230            Required if the blob has an active lease.
231        :param datetime if_modified_since:
232            A DateTime value. Azure expects the date value passed in to be UTC.
233            If timezone is included, any non-UTC datetimes will be converted to UTC.
234            If a date is passed in without timezone info, it is assumed to be UTC.
235            Specify this header to perform the operation only
236            if the resource has been modified since the specified time.
237        :param datetime if_unmodified_since:
238            A DateTime value. Azure expects the date value passed in to be UTC.
239            If timezone is included, any non-UTC datetimes will be converted to UTC.
240            If a date is passed in without timezone info, it is assumed to be UTC.
241            Specify this header to perform the operation only if
242            the resource has not been modified since the specified date/time.
243        :param str if_match:
244            An ETag value, or the wildcard character (*). Specify this header to perform
245            the operation only if the resource's ETag matches the value specified.
246        :param str if_none_match:
247            An ETag value, or the wildcard character (*). Specify this header
248            to perform the operation only if the resource's ETag does not match
249            the value specified. Specify the wildcard character (*) to perform
250            the operation only if the resource does not exist, and fail the
251            operation if it does exist.
252        :param int timeout:
253            The timeout parameter is expressed in seconds.
254        :return:
255            ETag, last modified, append offset, and committed block count
256            properties for the updated Append Blob
257        :rtype: :class:`~azure.storage.blob.models.AppendBlockProperties`
258        '''
259        _validate_not_none('container_name', container_name)
260        _validate_not_none('blob_name', blob_name)
261        _validate_not_none('block', block)
262        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
263
264        request = HTTPRequest()
265        request.method = 'PUT'
266        request.host_locations = self._get_host_locations()
267        request.path = _get_path(container_name, blob_name)
268        request.query = {
269            'comp': 'appendblock',
270            'timeout': _int_to_str(timeout),
271         }
272        request.headers = {
273            'x-ms-blob-condition-maxsize': _to_str(maxsize_condition),
274            'x-ms-blob-condition-appendpos': _to_str(appendpos_condition),
275            'x-ms-lease-id': _to_str(lease_id),
276            'If-Modified-Since': _datetime_to_utc_string(if_modified_since),
277            'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since),
278            'If-Match': _to_str(if_match),
279            'If-None-Match': _to_str(if_none_match)
280        }
281        request.body = _get_data_bytes_only('block', block)
282
283        if validate_content:
284            computed_md5 = _get_content_md5(request.body)
285            request.headers['Content-MD5'] = _to_str(computed_md5)
286
287        return self._perform_request(request, _parse_append_block)
288
289    #----Convenience APIs----------------------------------------------
290
291    def append_blob_from_path(
292        self, container_name, blob_name, file_path, validate_content=False,
293        maxsize_condition=None, progress_callback=None, lease_id=None, timeout=None):
294        '''
295        Appends to the content of an existing blob from a file path, with automatic
296        chunking and progress notifications.
297
298        :param str container_name:
299            Name of existing container.
300        :param str blob_name:
301            Name of blob to create or update.
302        :param str file_path:
303            Path of the file to upload as the blob content.
304        :param bool validate_content:
305            If true, calculates an MD5 hash for each chunk of the blob. The storage
306            service checks the hash of the content that has arrived with the hash
307            that was sent. This is primarily valuable for detecting bitflips on
308            the wire if using http instead of https as https (the default) will
309            already validate. Note that this MD5 hash is not stored with the
310            blob.
311        :param int maxsize_condition:
312            Optional conditional header. The max length in bytes permitted for
313            the append blob. If the Append Block operation would cause the blob
314            to exceed that limit or if the blob size is already greater than the
315            value specified in this header, the request will fail with
316            MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed).
317        :param progress_callback:
318            Callback for progress with signature function(current, total) where
319            current is the number of bytes transfered so far, and total is the
320            size of the blob, or None if the total size is unknown.
321        :type progress_callback: callback function in format of func(current, total)
322        :param str lease_id:
323            Required if the blob has an active lease.
324        :param int timeout:
325            The timeout parameter is expressed in seconds. This method may make
326            multiple calls to the Azure service and the timeout will apply to
327            each call individually.
328        '''
329        _validate_not_none('container_name', container_name)
330        _validate_not_none('blob_name', blob_name)
331        _validate_not_none('file_path', file_path)
332        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
333
334        count = path.getsize(file_path)
335        with open(file_path, 'rb') as stream:
336            self.append_blob_from_stream(
337                container_name,
338                blob_name,
339                stream,
340                count=count,
341                validate_content=validate_content,
342                maxsize_condition=maxsize_condition,
343                progress_callback=progress_callback,
344                lease_id=lease_id,
345                timeout=timeout)
346
347    def append_blob_from_bytes(
348        self, container_name, blob_name, blob, index=0, count=None,
349        validate_content=False, maxsize_condition=None, progress_callback=None,
350        lease_id=None, timeout=None):
351        '''
352        Appends to the content of an existing blob from an array of bytes, with
353        automatic chunking and progress notifications.
354
355        :param str container_name:
356            Name of existing container.
357        :param str blob_name:
358            Name of blob to create or update.
359        :param bytes blob:
360            Content of blob as an array of bytes.
361        :param int index:
362            Start index in the array of bytes.
363        :param int count:
364            Number of bytes to upload. Set to None or negative value to upload
365            all bytes starting from index.
366        :param bool validate_content:
367            If true, calculates an MD5 hash for each chunk of the blob. The storage
368            service checks the hash of the content that has arrived with the hash
369            that was sent. This is primarily valuable for detecting bitflips on
370            the wire if using http instead of https as https (the default) will
371            already validate. Note that this MD5 hash is not stored with the
372            blob.
373        :param int maxsize_condition:
374            Optional conditional header. The max length in bytes permitted for
375            the append blob. If the Append Block operation would cause the blob
376            to exceed that limit or if the blob size is already greater than the
377            value specified in this header, the request will fail with
378            MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed).
379        :param progress_callback:
380            Callback for progress with signature function(current, total) where
381            current is the number of bytes transfered so far, and total is the
382            size of the blob, or None if the total size is unknown.
383        :type progress_callback: callback function in format of func(current, total)
384        :param str lease_id:
385            Required if the blob has an active lease.
386        :param int timeout:
387            The timeout parameter is expressed in seconds. This method may make
388            multiple calls to the Azure service and the timeout will apply to
389            each call individually.
390        '''
391        _validate_not_none('container_name', container_name)
392        _validate_not_none('blob_name', blob_name)
393        _validate_not_none('blob', blob)
394        _validate_not_none('index', index)
395        _validate_type_bytes('blob', blob)
396        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
397
398        if index < 0:
399            raise IndexError(_ERROR_VALUE_NEGATIVE.format('index'))
400
401        if count is None or count < 0:
402            count = len(blob) - index
403
404        stream = BytesIO(blob)
405        stream.seek(index)
406
407        self.append_blob_from_stream(
408            container_name,
409            blob_name,
410            stream,
411            count=count,
412            validate_content=validate_content,
413            maxsize_condition=maxsize_condition,
414            lease_id=lease_id,
415            progress_callback=progress_callback,
416            timeout=timeout)
417
418    def append_blob_from_text(
419        self, container_name, blob_name, text, encoding='utf-8',
420        validate_content=False, maxsize_condition=None, progress_callback=None,
421        lease_id=None, timeout=None):
422        '''
423        Appends to the content of an existing blob from str/unicode, with
424        automatic chunking and progress notifications.
425
426        :param str container_name:
427            Name of existing container.
428        :param str blob_name:
429            Name of blob to create or update.
430        :param str text:
431            Text to upload to the blob.
432        :param str encoding:
433            Python encoding to use to convert the text to bytes.
434        :param bool validate_content:
435            If true, calculates an MD5 hash for each chunk of the blob. The storage
436            service checks the hash of the content that has arrived with the hash
437            that was sent. This is primarily valuable for detecting bitflips on
438            the wire if using http instead of https as https (the default) will
439            already validate. Note that this MD5 hash is not stored with the
440            blob.
441        :param int maxsize_condition:
442            Optional conditional header. The max length in bytes permitted for
443            the append blob. If the Append Block operation would cause the blob
444            to exceed that limit or if the blob size is already greater than the
445            value specified in this header, the request will fail with
446            MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed).
447        :param progress_callback:
448            Callback for progress with signature function(current, total) where
449            current is the number of bytes transfered so far, and total is the
450            size of the blob, or None if the total size is unknown.
451        :type progress_callback: callback function in format of func(current, total)
452        :param str lease_id:
453            Required if the blob has an active lease.
454        :param int timeout:
455            The timeout parameter is expressed in seconds. This method may make
456            multiple calls to the Azure service and the timeout will apply to
457            each call individually.
458        '''
459        _validate_not_none('container_name', container_name)
460        _validate_not_none('blob_name', blob_name)
461        _validate_not_none('text', text)
462        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
463
464        if not isinstance(text, bytes):
465            _validate_not_none('encoding', encoding)
466            text = text.encode(encoding)
467
468        self.append_blob_from_bytes(
469            container_name,
470            blob_name,
471            text,
472            index=0,
473            count=len(text),
474            validate_content=validate_content,
475            maxsize_condition=maxsize_condition,
476            lease_id=lease_id,
477            progress_callback=progress_callback,
478            timeout=timeout)
479
480    def append_blob_from_stream(
481        self, container_name, blob_name, stream, count=None,
482        validate_content=False, maxsize_condition=None, progress_callback=None,
483        lease_id=None, timeout=None):
484        '''
485        Appends to the content of an existing blob from a file/stream, with
486        automatic chunking and progress notifications.
487
488        :param str container_name:
489            Name of existing container.
490        :param str blob_name:
491            Name of blob to create or update.
492        :param io.IOBase stream:
493            Opened stream to upload as the blob content.
494        :param int count:
495            Number of bytes to read from the stream. This is optional, but
496            should be supplied for optimal performance.
497        :param bool validate_content:
498            If true, calculates an MD5 hash for each chunk of the blob. The storage
499            service checks the hash of the content that has arrived with the hash
500            that was sent. This is primarily valuable for detecting bitflips on
501            the wire if using http instead of https as https (the default) will
502            already validate. Note that this MD5 hash is not stored with the
503            blob.
504        :param int maxsize_condition:
505            Conditional header. The max length in bytes permitted for
506            the append blob. If the Append Block operation would cause the blob
507            to exceed that limit or if the blob size is already greater than the
508            value specified in this header, the request will fail with
509            MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed).
510        :param progress_callback:
511            Callback for progress with signature function(current, total) where
512            current is the number of bytes transfered so far, and total is the
513            size of the blob, or None if the total size is unknown.
514        :type progress_callback: callback function in format of func(current, total)
515        :param str lease_id:
516            Required if the blob has an active lease.
517        :param int timeout:
518            The timeout parameter is expressed in seconds. This method may make
519            multiple calls to the Azure service and the timeout will apply to
520            each call individually.
521        '''
522        _validate_not_none('container_name', container_name)
523        _validate_not_none('blob_name', blob_name)
524        _validate_not_none('stream', stream)
525        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
526
527        _upload_blob_chunks(
528            blob_service=self,
529            container_name=container_name,
530            blob_name=blob_name,
531            blob_size=count,
532            block_size=self.MAX_BLOCK_SIZE,
533            stream=stream,
534            max_connections=1, # upload not easily parallelizable
535            progress_callback=progress_callback,
536            validate_content=validate_content,
537            lease_id=lease_id,
538            uploader_class=_AppendBlobChunkUploader,
539            maxsize_condition=maxsize_condition,
540            timeout=timeout
541        )