1# -------------------------------------------------------------------------
2# Copyright (c) Microsoft.  All rights reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#   http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# --------------------------------------------------------------------------
15import sys
16from os import path
17
18from ..common._common_conversion import (
19    _to_str,
20    _int_to_str,
21    _datetime_to_utc_string,
22    _get_content_md5,
23)
24from ..common._constants import (
25    SERVICE_HOST_BASE,
26    DEFAULT_PROTOCOL,
27)
28from ..common._error import (
29    _validate_not_none,
30    _validate_type_bytes,
31    _validate_encryption_unsupported,
32    _ERROR_VALUE_NEGATIVE,
33)
34from ..common._http import HTTPRequest
35from ..common._serialization import (
36    _get_data_bytes_only,
37    _add_metadata_headers,
38)
39from ._deserialization import (
40    _parse_append_block,
41    _parse_base_properties,
42)
43from ._serialization import (
44    _get_path,
45)
46from ._upload_chunking import (
47    _AppendBlobChunkUploader,
48    _upload_blob_chunks,
49)
50from .baseblobservice import BaseBlobService
51from .models import (
52    _BlobTypes,
53    ResourceProperties
54)
55
56if sys.version_info >= (3,):
57    from io import BytesIO
58else:
59    from cStringIO import StringIO as BytesIO
60
61
62class AppendBlobService(BaseBlobService):
63    '''
64    An append blob is comprised of blocks and is optimized for append operations.
65    When you modify an append blob, blocks are added to the end of the blob only,
66    via the append_block operation. Updating or deleting of existing blocks is not
67    supported. Unlike a block blob, an append blob does not expose its block IDs.
68
69    Each block in an append blob can be a different size, up to a maximum of 4 MB,
70    and an append blob can include up to 50,000 blocks. The maximum size of an
71    append blob is therefore slightly more than 195 GB (4 MB X 50,000 blocks).
72
73    :ivar int MAX_BLOCK_SIZE:
74        The size of the blocks put by append_blob_from_* methods. Smaller blocks
75        may be put if there is less data provided. The maximum block size the service
76        supports is 4MB.
77    '''
78    MAX_BLOCK_SIZE = 4 * 1024 * 1024
79
80    def __init__(self, account_name=None, account_key=None, sas_token=None,
81                 is_emulated=False, protocol=DEFAULT_PROTOCOL, endpoint_suffix=SERVICE_HOST_BASE,
82                 custom_domain=None, request_session=None, connection_string=None, socket_timeout=None):
83        '''
84        :param str account_name:
85            The storage account name. This is used to authenticate requests
86            signed with an account key and to construct the storage endpoint. It
87            is required unless a connection string is given, or if a custom
88            domain is used with anonymous authentication.
89        :param str account_key:
90            The storage account key. This is used for shared key authentication.
91            If neither account key or sas token is specified, anonymous access
92            will be used.
93        :param str sas_token:
94             A shared access signature token to use to authenticate requests
95             instead of the account key. If account key and sas token are both
96             specified, account key will be used to sign. If neither are
97             specified, anonymous access will be used.
98        :param bool is_emulated:
99            Whether to use the emulator. Defaults to False. If specified, will
100            override all other parameters besides connection string and request
101            session.
102        :param str protocol:
103            The protocol to use for requests. Defaults to https.
104        :param str endpoint_suffix:
105            The host base component of the url, minus the account name. Defaults
106            to Azure (core.windows.net). Override this to use the China cloud
107            (core.chinacloudapi.cn).
108        :param str custom_domain:
109            The custom domain to use. This can be set in the Azure Portal. For
110            example, 'www.mydomain.com'.
111        :param requests.Session request_session:
112            The session object to use for http requests.
113        :param str connection_string:
114            If specified, this will override all other parameters besides
115            request session. See
116            http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/
117            for the connection string format.
118        :param int socket_timeout:
119            If specified, this will override the default socket timeout. The timeout specified is in seconds.
120            See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value.
121        '''
122        self.blob_type = _BlobTypes.AppendBlob
123        super(AppendBlobService, self).__init__(
124            account_name, account_key, sas_token, is_emulated, protocol, endpoint_suffix,
125            custom_domain, request_session, connection_string, socket_timeout)
126
127    def create_blob(self, container_name, blob_name, content_settings=None,
128                    metadata=None, lease_id=None,
129                    if_modified_since=None, if_unmodified_since=None,
130                    if_match=None, if_none_match=None, timeout=None):
131        '''
132        Creates a blob or overrides an existing blob. Use if_match=* to
133        prevent overriding an existing blob.
134
135        See create_blob_from_* for high level
136        functions that handle the creation and upload of large blobs with
137        automatic chunking and progress notifications.
138
139        :param str container_name:
140            Name of existing container.
141        :param str blob_name:
142            Name of blob to create or update.
143        :param ~azure.storage.blob.models.ContentSettings content_settings:
144            ContentSettings object used to set blob properties.
145        :param metadata:
146            Name-value pairs associated with the blob as metadata.
147        :type metadata: dict(str, str)
148        :param str lease_id:
149            Required if the blob has an active lease.
150        :param datetime if_modified_since:
151            A DateTime value. Azure expects the date value passed in to be UTC.
152            If timezone is included, any non-UTC datetimes will be converted to UTC.
153            If a date is passed in without timezone info, it is assumed to be UTC.
154            Specify this header to perform the operation only
155            if the resource has been modified since the specified time.
156        :param datetime if_unmodified_since:
157            A DateTime value. Azure expects the date value passed in to be UTC.
158            If timezone is included, any non-UTC datetimes will be converted to UTC.
159            If a date is passed in without timezone info, it is assumed to be UTC.
160            Specify this header to perform the operation only if
161            the resource has not been modified since the specified date/time.
162        :param str if_match:
163            An ETag value, or the wildcard character (*). Specify this header to
164            perform the operation only if the resource's ETag matches the value specified.
165        :param str if_none_match:
166            An ETag value, or the wildcard character (*). Specify this header
167            to perform the operation only if the resource's ETag does not match
168            the value specified. Specify the wildcard character (*) to perform
169            the operation only if the resource does not exist, and fail the
170            operation if it does exist.
171        :param int timeout:
172            The timeout parameter is expressed in seconds.
173        :return: ETag and last modified properties for the updated Append Blob
174        :rtype: :class:`~azure.storage.blob.models.ResourceProperties`
175        '''
176        _validate_not_none('container_name', container_name)
177        _validate_not_none('blob_name', blob_name)
178        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
179
180        request = HTTPRequest()
181        request.method = 'PUT'
182        request.host_locations = self._get_host_locations()
183        request.path = _get_path(container_name, blob_name)
184        request.query = {'timeout': _int_to_str(timeout)}
185        request.headers = {
186            'x-ms-blob-type': _to_str(self.blob_type),
187            'x-ms-lease-id': _to_str(lease_id),
188            'If-Modified-Since': _datetime_to_utc_string(if_modified_since),
189            'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since),
190            'If-Match': _to_str(if_match),
191            'If-None-Match': _to_str(if_none_match)
192        }
193        _add_metadata_headers(metadata, request)
194        if content_settings is not None:
195            request.headers.update(content_settings._to_headers())
196
197        return self._perform_request(request, _parse_base_properties)
198
199    def append_block(self, container_name, blob_name, block,
200                     validate_content=False, maxsize_condition=None,
201                     appendpos_condition=None,
202                     lease_id=None, if_modified_since=None,
203                     if_unmodified_since=None, if_match=None,
204                     if_none_match=None, timeout=None):
205        '''
206        Commits a new block of data to the end of an existing append blob.
207
208        :param str container_name:
209            Name of existing container.
210        :param str blob_name:
211            Name of existing blob.
212        :param bytes block:
213            Content of the block in bytes.
214        :param bool validate_content:
215            If true, calculates an MD5 hash of the block content. The storage
216            service checks the hash of the content that has arrived
217            with the hash that was sent. This is primarily valuable for detecting
218            bitflips on the wire if using http instead of https as https (the default)
219            will already validate. Note that this MD5 hash is not stored with the
220            blob.
221        :param int maxsize_condition:
222            Optional conditional header. The max length in bytes permitted for
223            the append blob. If the Append Block operation would cause the blob
224            to exceed that limit or if the blob size is already greater than the
225            value specified in this header, the request will fail with
226            MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed).
227        :param int appendpos_condition:
228            Optional conditional header, used only for the Append Block operation.
229            A number indicating the byte offset to compare. Append Block will
230            succeed only if the append position is equal to this number. If it
231            is not, the request will fail with the
232            AppendPositionConditionNotMet error
233            (HTTP status code 412 - Precondition Failed).
234        :param str lease_id:
235            Required if the blob has an active lease.
236        :param datetime if_modified_since:
237            A DateTime value. Azure expects the date value passed in to be UTC.
238            If timezone is included, any non-UTC datetimes will be converted to UTC.
239            If a date is passed in without timezone info, it is assumed to be UTC.
240            Specify this header to perform the operation only
241            if the resource has been modified since the specified time.
242        :param datetime if_unmodified_since:
243            A DateTime value. Azure expects the date value passed in to be UTC.
244            If timezone is included, any non-UTC datetimes will be converted to UTC.
245            If a date is passed in without timezone info, it is assumed to be UTC.
246            Specify this header to perform the operation only if
247            the resource has not been modified since the specified date/time.
248        :param str if_match:
249            An ETag value, or the wildcard character (*). Specify this header to perform
250            the operation only if the resource's ETag matches the value specified.
251        :param str if_none_match:
252            An ETag value, or the wildcard character (*). Specify this header
253            to perform the operation only if the resource's ETag does not match
254            the value specified. Specify the wildcard character (*) to perform
255            the operation only if the resource does not exist, and fail the
256            operation if it does exist.
257        :param int timeout:
258            The timeout parameter is expressed in seconds.
259        :return:
260            ETag, last modified, append offset, and committed block count
261            properties for the updated Append Blob
262        :rtype: :class:`~azure.storage.blob.models.AppendBlockProperties`
263        '''
264        _validate_not_none('container_name', container_name)
265        _validate_not_none('blob_name', blob_name)
266        _validate_not_none('block', block)
267        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
268
269        request = HTTPRequest()
270        request.method = 'PUT'
271        request.host_locations = self._get_host_locations()
272        request.path = _get_path(container_name, blob_name)
273        request.query = {
274            'comp': 'appendblock',
275            'timeout': _int_to_str(timeout),
276        }
277        request.headers = {
278            'x-ms-blob-condition-maxsize': _to_str(maxsize_condition),
279            'x-ms-blob-condition-appendpos': _to_str(appendpos_condition),
280            'x-ms-lease-id': _to_str(lease_id),
281            'If-Modified-Since': _datetime_to_utc_string(if_modified_since),
282            'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since),
283            'If-Match': _to_str(if_match),
284            'If-None-Match': _to_str(if_none_match)
285        }
286        request.body = _get_data_bytes_only('block', block)
287
288        if validate_content:
289            computed_md5 = _get_content_md5(request.body)
290            request.headers['Content-MD5'] = _to_str(computed_md5)
291
292        return self._perform_request(request, _parse_append_block)
293
294    # ----Convenience APIs----------------------------------------------
295
296    def append_blob_from_path(
297            self, container_name, blob_name, file_path, validate_content=False,
298            maxsize_condition=None, progress_callback=None, lease_id=None, timeout=None):
299        '''
300        Appends to the content of an existing blob from a file path, with automatic
301        chunking and progress notifications.
302
303        :param str container_name:
304            Name of existing container.
305        :param str blob_name:
306            Name of blob to create or update.
307        :param str file_path:
308            Path of the file to upload as the blob content.
309        :param bool validate_content:
310            If true, calculates an MD5 hash for each chunk of the blob. The storage
311            service checks the hash of the content that has arrived with the hash
312            that was sent. This is primarily valuable for detecting bitflips on
313            the wire if using http instead of https as https (the default) will
314            already validate. Note that this MD5 hash is not stored with the
315            blob.
316        :param int maxsize_condition:
317            Optional conditional header. The max length in bytes permitted for
318            the append blob. If the Append Block operation would cause the blob
319            to exceed that limit or if the blob size is already greater than the
320            value specified in this header, the request will fail with
321            MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed).
322        :param progress_callback:
323            Callback for progress with signature function(current, total) where
324            current is the number of bytes transfered so far, and total is the
325            size of the blob, or None if the total size is unknown.
326        :type progress_callback: func(current, total)
327        :param str lease_id:
328            Required if the blob has an active lease.
329        :param int timeout:
330            The timeout parameter is expressed in seconds. This method may make
331            multiple calls to the Azure service and the timeout will apply to
332            each call individually.
333        :return: ETag and last modified properties for the Append Blob
334        :rtype: :class:`~azure.storage.blob.models.ResourceProperties`
335        '''
336        _validate_not_none('container_name', container_name)
337        _validate_not_none('blob_name', blob_name)
338        _validate_not_none('file_path', file_path)
339        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
340
341        count = path.getsize(file_path)
342        with open(file_path, 'rb') as stream:
343            return self.append_blob_from_stream(
344                container_name,
345                blob_name,
346                stream,
347                count=count,
348                validate_content=validate_content,
349                maxsize_condition=maxsize_condition,
350                progress_callback=progress_callback,
351                lease_id=lease_id,
352                timeout=timeout)
353
354    def append_blob_from_bytes(
355            self, container_name, blob_name, blob, index=0, count=None,
356            validate_content=False, maxsize_condition=None, progress_callback=None,
357            lease_id=None, timeout=None):
358        '''
359        Appends to the content of an existing blob from an array of bytes, with
360        automatic chunking and progress notifications.
361
362        :param str container_name:
363            Name of existing container.
364        :param str blob_name:
365            Name of blob to create or update.
366        :param bytes blob:
367            Content of blob as an array of bytes.
368        :param int index:
369            Start index in the array of bytes.
370        :param int count:
371            Number of bytes to upload. Set to None or negative value to upload
372            all bytes starting from index.
373        :param bool validate_content:
374            If true, calculates an MD5 hash for each chunk of the blob. The storage
375            service checks the hash of the content that has arrived with the hash
376            that was sent. This is primarily valuable for detecting bitflips on
377            the wire if using http instead of https as https (the default) will
378            already validate. Note that this MD5 hash is not stored with the
379            blob.
380        :param int maxsize_condition:
381            Optional conditional header. The max length in bytes permitted for
382            the append blob. If the Append Block operation would cause the blob
383            to exceed that limit or if the blob size is already greater than the
384            value specified in this header, the request will fail with
385            MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed).
386        :param progress_callback:
387            Callback for progress with signature function(current, total) where
388            current is the number of bytes transfered so far, and total is the
389            size of the blob, or None if the total size is unknown.
390        :type progress_callback: func(current, total)
391        :param str lease_id:
392            Required if the blob has an active lease.
393        :param int timeout:
394            The timeout parameter is expressed in seconds. This method may make
395            multiple calls to the Azure service and the timeout will apply to
396            each call individually.
397        :return: ETag and last modified properties for the Append Blob
398        :rtype: :class:`~azure.storage.blob.models.ResourceProperties`
399        '''
400        _validate_not_none('container_name', container_name)
401        _validate_not_none('blob_name', blob_name)
402        _validate_not_none('blob', blob)
403        _validate_not_none('index', index)
404        _validate_type_bytes('blob', blob)
405        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
406
407        if index < 0:
408            raise IndexError(_ERROR_VALUE_NEGATIVE.format('index'))
409
410        if count is None or count < 0:
411            count = len(blob) - index
412
413        stream = BytesIO(blob)
414        stream.seek(index)
415
416        return self.append_blob_from_stream(
417            container_name,
418            blob_name,
419            stream,
420            count=count,
421            validate_content=validate_content,
422            maxsize_condition=maxsize_condition,
423            lease_id=lease_id,
424            progress_callback=progress_callback,
425            timeout=timeout)
426
427    def append_blob_from_text(
428            self, container_name, blob_name, text, encoding='utf-8',
429            validate_content=False, maxsize_condition=None, progress_callback=None,
430            lease_id=None, timeout=None):
431        '''
432        Appends to the content of an existing blob from str/unicode, with
433        automatic chunking and progress notifications.
434
435        :param str container_name:
436            Name of existing container.
437        :param str blob_name:
438            Name of blob to create or update.
439        :param str text:
440            Text to upload to the blob.
441        :param str encoding:
442            Python encoding to use to convert the text to bytes.
443        :param bool validate_content:
444            If true, calculates an MD5 hash for each chunk of the blob. The storage
445            service checks the hash of the content that has arrived with the hash
446            that was sent. This is primarily valuable for detecting bitflips on
447            the wire if using http instead of https as https (the default) will
448            already validate. Note that this MD5 hash is not stored with the
449            blob.
450        :param int maxsize_condition:
451            Optional conditional header. The max length in bytes permitted for
452            the append blob. If the Append Block operation would cause the blob
453            to exceed that limit or if the blob size is already greater than the
454            value specified in this header, the request will fail with
455            MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed).
456        :param progress_callback:
457            Callback for progress with signature function(current, total) where
458            current is the number of bytes transfered so far, and total is the
459            size of the blob, or None if the total size is unknown.
460        :type progress_callback: func(current, total)
461        :param str lease_id:
462            Required if the blob has an active lease.
463        :param int timeout:
464            The timeout parameter is expressed in seconds. This method may make
465            multiple calls to the Azure service and the timeout will apply to
466            each call individually.
467        :return: ETag and last modified properties for the Append Blob
468        :rtype: :class:`~azure.storage.blob.models.ResourceProperties`
469        '''
470        _validate_not_none('container_name', container_name)
471        _validate_not_none('blob_name', blob_name)
472        _validate_not_none('text', text)
473        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
474
475        if not isinstance(text, bytes):
476            _validate_not_none('encoding', encoding)
477            text = text.encode(encoding)
478
479        return self.append_blob_from_bytes(
480            container_name,
481            blob_name,
482            text,
483            index=0,
484            count=len(text),
485            validate_content=validate_content,
486            maxsize_condition=maxsize_condition,
487            lease_id=lease_id,
488            progress_callback=progress_callback,
489            timeout=timeout)
490
491    def append_blob_from_stream(
492            self, container_name, blob_name, stream, count=None,
493            validate_content=False, maxsize_condition=None, progress_callback=None,
494            lease_id=None, timeout=None):
495        '''
496        Appends to the content of an existing blob from a file/stream, with
497        automatic chunking and progress notifications.
498
499        :param str container_name:
500            Name of existing container.
501        :param str blob_name:
502            Name of blob to create or update.
503        :param io.IOBase stream:
504            Opened stream to upload as the blob content.
505        :param int count:
506            Number of bytes to read from the stream. This is optional, but
507            should be supplied for optimal performance.
508        :param bool validate_content:
509            If true, calculates an MD5 hash for each chunk of the blob. The storage
510            service checks the hash of the content that has arrived with the hash
511            that was sent. This is primarily valuable for detecting bitflips on
512            the wire if using http instead of https as https (the default) will
513            already validate. Note that this MD5 hash is not stored with the
514            blob.
515        :param int maxsize_condition:
516            Conditional header. The max length in bytes permitted for
517            the append blob. If the Append Block operation would cause the blob
518            to exceed that limit or if the blob size is already greater than the
519            value specified in this header, the request will fail with
520            MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed).
521        :param progress_callback:
522            Callback for progress with signature function(current, total) where
523            current is the number of bytes transfered so far, and total is the
524            size of the blob, or None if the total size is unknown.
525        :type progress_callback: func(current, total)
526        :param str lease_id:
527            Required if the blob has an active lease.
528        :param int timeout:
529            The timeout parameter is expressed in seconds. This method may make
530            multiple calls to the Azure service and the timeout will apply to
531            each call individually.
532        :return: ETag and last modified properties for the Append Blob
533        :rtype: :class:`~azure.storage.blob.models.ResourceProperties`
534        '''
535        _validate_not_none('container_name', container_name)
536        _validate_not_none('blob_name', blob_name)
537        _validate_not_none('stream', stream)
538        _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key)
539
540        # _upload_blob_chunks returns the block ids for block blobs so resource_properties
541        # is passed as a parameter to get the last_modified and etag for page and append blobs.
542        # this info is not needed for block_blobs since _put_block_list is called after which gets this info
543        resource_properties = ResourceProperties()
544        _upload_blob_chunks(
545            blob_service=self,
546            container_name=container_name,
547            blob_name=blob_name,
548            blob_size=count,
549            block_size=self.MAX_BLOCK_SIZE,
550            stream=stream,
551            max_connections=1,  # upload not easily parallelizable
552            progress_callback=progress_callback,
553            validate_content=validate_content,
554            lease_id=lease_id,
555            uploader_class=_AppendBlobChunkUploader,
556            maxsize_condition=maxsize_condition,
557            timeout=timeout,
558            resource_properties=resource_properties
559        )
560
561        return resource_properties
562