1# ------------------------------------------------------------------------- 2# Copyright (c) Microsoft. All rights reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# -------------------------------------------------------------------------- 15import sys 16from os import path 17 18from ..common._common_conversion import ( 19 _to_str, 20 _int_to_str, 21 _datetime_to_utc_string, 22 _get_content_md5, 23) 24from ..common._constants import ( 25 SERVICE_HOST_BASE, 26 DEFAULT_PROTOCOL, 27) 28from ..common._error import ( 29 _validate_not_none, 30 _validate_type_bytes, 31 _validate_encryption_unsupported, 32 _ERROR_VALUE_NEGATIVE, 33) 34from ..common._http import HTTPRequest 35from ..common._serialization import ( 36 _get_data_bytes_only, 37 _add_metadata_headers, 38) 39from ._deserialization import ( 40 _parse_append_block, 41 _parse_base_properties, 42) 43from ._serialization import ( 44 _get_path, 45) 46from ._upload_chunking import ( 47 _AppendBlobChunkUploader, 48 _upload_blob_chunks, 49) 50from .baseblobservice import BaseBlobService 51from .models import ( 52 _BlobTypes, 53 ResourceProperties 54) 55 56if sys.version_info >= (3,): 57 from io import BytesIO 58else: 59 from cStringIO import StringIO as BytesIO 60 61 62class AppendBlobService(BaseBlobService): 63 ''' 64 An append blob is comprised of blocks and is optimized for append operations. 65 When you modify an append blob, blocks are added to the end of the blob only, 66 via the append_block operation. Updating or deleting of existing blocks is not 67 supported. Unlike a block blob, an append blob does not expose its block IDs. 68 69 Each block in an append blob can be a different size, up to a maximum of 4 MB, 70 and an append blob can include up to 50,000 blocks. The maximum size of an 71 append blob is therefore slightly more than 195 GB (4 MB X 50,000 blocks). 72 73 :ivar int MAX_BLOCK_SIZE: 74 The size of the blocks put by append_blob_from_* methods. Smaller blocks 75 may be put if there is less data provided. The maximum block size the service 76 supports is 4MB. 77 ''' 78 MAX_BLOCK_SIZE = 4 * 1024 * 1024 79 80 def __init__(self, account_name=None, account_key=None, sas_token=None, 81 is_emulated=False, protocol=DEFAULT_PROTOCOL, endpoint_suffix=SERVICE_HOST_BASE, 82 custom_domain=None, request_session=None, connection_string=None, socket_timeout=None): 83 ''' 84 :param str account_name: 85 The storage account name. This is used to authenticate requests 86 signed with an account key and to construct the storage endpoint. It 87 is required unless a connection string is given, or if a custom 88 domain is used with anonymous authentication. 89 :param str account_key: 90 The storage account key. This is used for shared key authentication. 91 If neither account key or sas token is specified, anonymous access 92 will be used. 93 :param str sas_token: 94 A shared access signature token to use to authenticate requests 95 instead of the account key. If account key and sas token are both 96 specified, account key will be used to sign. If neither are 97 specified, anonymous access will be used. 98 :param bool is_emulated: 99 Whether to use the emulator. Defaults to False. If specified, will 100 override all other parameters besides connection string and request 101 session. 102 :param str protocol: 103 The protocol to use for requests. Defaults to https. 104 :param str endpoint_suffix: 105 The host base component of the url, minus the account name. Defaults 106 to Azure (core.windows.net). Override this to use the China cloud 107 (core.chinacloudapi.cn). 108 :param str custom_domain: 109 The custom domain to use. This can be set in the Azure Portal. For 110 example, 'www.mydomain.com'. 111 :param requests.Session request_session: 112 The session object to use for http requests. 113 :param str connection_string: 114 If specified, this will override all other parameters besides 115 request session. See 116 http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ 117 for the connection string format. 118 :param int socket_timeout: 119 If specified, this will override the default socket timeout. The timeout specified is in seconds. 120 See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value. 121 ''' 122 self.blob_type = _BlobTypes.AppendBlob 123 super(AppendBlobService, self).__init__( 124 account_name, account_key, sas_token, is_emulated, protocol, endpoint_suffix, 125 custom_domain, request_session, connection_string, socket_timeout) 126 127 def create_blob(self, container_name, blob_name, content_settings=None, 128 metadata=None, lease_id=None, 129 if_modified_since=None, if_unmodified_since=None, 130 if_match=None, if_none_match=None, timeout=None): 131 ''' 132 Creates a blob or overrides an existing blob. Use if_match=* to 133 prevent overriding an existing blob. 134 135 See create_blob_from_* for high level 136 functions that handle the creation and upload of large blobs with 137 automatic chunking and progress notifications. 138 139 :param str container_name: 140 Name of existing container. 141 :param str blob_name: 142 Name of blob to create or update. 143 :param ~azure.storage.blob.models.ContentSettings content_settings: 144 ContentSettings object used to set blob properties. 145 :param metadata: 146 Name-value pairs associated with the blob as metadata. 147 :type metadata: dict(str, str) 148 :param str lease_id: 149 Required if the blob has an active lease. 150 :param datetime if_modified_since: 151 A DateTime value. Azure expects the date value passed in to be UTC. 152 If timezone is included, any non-UTC datetimes will be converted to UTC. 153 If a date is passed in without timezone info, it is assumed to be UTC. 154 Specify this header to perform the operation only 155 if the resource has been modified since the specified time. 156 :param datetime if_unmodified_since: 157 A DateTime value. Azure expects the date value passed in to be UTC. 158 If timezone is included, any non-UTC datetimes will be converted to UTC. 159 If a date is passed in without timezone info, it is assumed to be UTC. 160 Specify this header to perform the operation only if 161 the resource has not been modified since the specified date/time. 162 :param str if_match: 163 An ETag value, or the wildcard character (*). Specify this header to 164 perform the operation only if the resource's ETag matches the value specified. 165 :param str if_none_match: 166 An ETag value, or the wildcard character (*). Specify this header 167 to perform the operation only if the resource's ETag does not match 168 the value specified. Specify the wildcard character (*) to perform 169 the operation only if the resource does not exist, and fail the 170 operation if it does exist. 171 :param int timeout: 172 The timeout parameter is expressed in seconds. 173 :return: ETag and last modified properties for the updated Append Blob 174 :rtype: :class:`~azure.storage.blob.models.ResourceProperties` 175 ''' 176 _validate_not_none('container_name', container_name) 177 _validate_not_none('blob_name', blob_name) 178 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 179 180 request = HTTPRequest() 181 request.method = 'PUT' 182 request.host_locations = self._get_host_locations() 183 request.path = _get_path(container_name, blob_name) 184 request.query = {'timeout': _int_to_str(timeout)} 185 request.headers = { 186 'x-ms-blob-type': _to_str(self.blob_type), 187 'x-ms-lease-id': _to_str(lease_id), 188 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), 189 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), 190 'If-Match': _to_str(if_match), 191 'If-None-Match': _to_str(if_none_match) 192 } 193 _add_metadata_headers(metadata, request) 194 if content_settings is not None: 195 request.headers.update(content_settings._to_headers()) 196 197 return self._perform_request(request, _parse_base_properties) 198 199 def append_block(self, container_name, blob_name, block, 200 validate_content=False, maxsize_condition=None, 201 appendpos_condition=None, 202 lease_id=None, if_modified_since=None, 203 if_unmodified_since=None, if_match=None, 204 if_none_match=None, timeout=None): 205 ''' 206 Commits a new block of data to the end of an existing append blob. 207 208 :param str container_name: 209 Name of existing container. 210 :param str blob_name: 211 Name of existing blob. 212 :param bytes block: 213 Content of the block in bytes. 214 :param bool validate_content: 215 If true, calculates an MD5 hash of the block content. The storage 216 service checks the hash of the content that has arrived 217 with the hash that was sent. This is primarily valuable for detecting 218 bitflips on the wire if using http instead of https as https (the default) 219 will already validate. Note that this MD5 hash is not stored with the 220 blob. 221 :param int maxsize_condition: 222 Optional conditional header. The max length in bytes permitted for 223 the append blob. If the Append Block operation would cause the blob 224 to exceed that limit or if the blob size is already greater than the 225 value specified in this header, the request will fail with 226 MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). 227 :param int appendpos_condition: 228 Optional conditional header, used only for the Append Block operation. 229 A number indicating the byte offset to compare. Append Block will 230 succeed only if the append position is equal to this number. If it 231 is not, the request will fail with the 232 AppendPositionConditionNotMet error 233 (HTTP status code 412 - Precondition Failed). 234 :param str lease_id: 235 Required if the blob has an active lease. 236 :param datetime if_modified_since: 237 A DateTime value. Azure expects the date value passed in to be UTC. 238 If timezone is included, any non-UTC datetimes will be converted to UTC. 239 If a date is passed in without timezone info, it is assumed to be UTC. 240 Specify this header to perform the operation only 241 if the resource has been modified since the specified time. 242 :param datetime if_unmodified_since: 243 A DateTime value. Azure expects the date value passed in to be UTC. 244 If timezone is included, any non-UTC datetimes will be converted to UTC. 245 If a date is passed in without timezone info, it is assumed to be UTC. 246 Specify this header to perform the operation only if 247 the resource has not been modified since the specified date/time. 248 :param str if_match: 249 An ETag value, or the wildcard character (*). Specify this header to perform 250 the operation only if the resource's ETag matches the value specified. 251 :param str if_none_match: 252 An ETag value, or the wildcard character (*). Specify this header 253 to perform the operation only if the resource's ETag does not match 254 the value specified. Specify the wildcard character (*) to perform 255 the operation only if the resource does not exist, and fail the 256 operation if it does exist. 257 :param int timeout: 258 The timeout parameter is expressed in seconds. 259 :return: 260 ETag, last modified, append offset, and committed block count 261 properties for the updated Append Blob 262 :rtype: :class:`~azure.storage.blob.models.AppendBlockProperties` 263 ''' 264 _validate_not_none('container_name', container_name) 265 _validate_not_none('blob_name', blob_name) 266 _validate_not_none('block', block) 267 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 268 269 request = HTTPRequest() 270 request.method = 'PUT' 271 request.host_locations = self._get_host_locations() 272 request.path = _get_path(container_name, blob_name) 273 request.query = { 274 'comp': 'appendblock', 275 'timeout': _int_to_str(timeout), 276 } 277 request.headers = { 278 'x-ms-blob-condition-maxsize': _to_str(maxsize_condition), 279 'x-ms-blob-condition-appendpos': _to_str(appendpos_condition), 280 'x-ms-lease-id': _to_str(lease_id), 281 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), 282 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), 283 'If-Match': _to_str(if_match), 284 'If-None-Match': _to_str(if_none_match) 285 } 286 request.body = _get_data_bytes_only('block', block) 287 288 if validate_content: 289 computed_md5 = _get_content_md5(request.body) 290 request.headers['Content-MD5'] = _to_str(computed_md5) 291 292 return self._perform_request(request, _parse_append_block) 293 294 # ----Convenience APIs---------------------------------------------- 295 296 def append_blob_from_path( 297 self, container_name, blob_name, file_path, validate_content=False, 298 maxsize_condition=None, progress_callback=None, lease_id=None, timeout=None): 299 ''' 300 Appends to the content of an existing blob from a file path, with automatic 301 chunking and progress notifications. 302 303 :param str container_name: 304 Name of existing container. 305 :param str blob_name: 306 Name of blob to create or update. 307 :param str file_path: 308 Path of the file to upload as the blob content. 309 :param bool validate_content: 310 If true, calculates an MD5 hash for each chunk of the blob. The storage 311 service checks the hash of the content that has arrived with the hash 312 that was sent. This is primarily valuable for detecting bitflips on 313 the wire if using http instead of https as https (the default) will 314 already validate. Note that this MD5 hash is not stored with the 315 blob. 316 :param int maxsize_condition: 317 Optional conditional header. The max length in bytes permitted for 318 the append blob. If the Append Block operation would cause the blob 319 to exceed that limit or if the blob size is already greater than the 320 value specified in this header, the request will fail with 321 MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). 322 :param progress_callback: 323 Callback for progress with signature function(current, total) where 324 current is the number of bytes transfered so far, and total is the 325 size of the blob, or None if the total size is unknown. 326 :type progress_callback: func(current, total) 327 :param str lease_id: 328 Required if the blob has an active lease. 329 :param int timeout: 330 The timeout parameter is expressed in seconds. This method may make 331 multiple calls to the Azure service and the timeout will apply to 332 each call individually. 333 :return: ETag and last modified properties for the Append Blob 334 :rtype: :class:`~azure.storage.blob.models.ResourceProperties` 335 ''' 336 _validate_not_none('container_name', container_name) 337 _validate_not_none('blob_name', blob_name) 338 _validate_not_none('file_path', file_path) 339 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 340 341 count = path.getsize(file_path) 342 with open(file_path, 'rb') as stream: 343 return self.append_blob_from_stream( 344 container_name, 345 blob_name, 346 stream, 347 count=count, 348 validate_content=validate_content, 349 maxsize_condition=maxsize_condition, 350 progress_callback=progress_callback, 351 lease_id=lease_id, 352 timeout=timeout) 353 354 def append_blob_from_bytes( 355 self, container_name, blob_name, blob, index=0, count=None, 356 validate_content=False, maxsize_condition=None, progress_callback=None, 357 lease_id=None, timeout=None): 358 ''' 359 Appends to the content of an existing blob from an array of bytes, with 360 automatic chunking and progress notifications. 361 362 :param str container_name: 363 Name of existing container. 364 :param str blob_name: 365 Name of blob to create or update. 366 :param bytes blob: 367 Content of blob as an array of bytes. 368 :param int index: 369 Start index in the array of bytes. 370 :param int count: 371 Number of bytes to upload. Set to None or negative value to upload 372 all bytes starting from index. 373 :param bool validate_content: 374 If true, calculates an MD5 hash for each chunk of the blob. The storage 375 service checks the hash of the content that has arrived with the hash 376 that was sent. This is primarily valuable for detecting bitflips on 377 the wire if using http instead of https as https (the default) will 378 already validate. Note that this MD5 hash is not stored with the 379 blob. 380 :param int maxsize_condition: 381 Optional conditional header. The max length in bytes permitted for 382 the append blob. If the Append Block operation would cause the blob 383 to exceed that limit or if the blob size is already greater than the 384 value specified in this header, the request will fail with 385 MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). 386 :param progress_callback: 387 Callback for progress with signature function(current, total) where 388 current is the number of bytes transfered so far, and total is the 389 size of the blob, or None if the total size is unknown. 390 :type progress_callback: func(current, total) 391 :param str lease_id: 392 Required if the blob has an active lease. 393 :param int timeout: 394 The timeout parameter is expressed in seconds. This method may make 395 multiple calls to the Azure service and the timeout will apply to 396 each call individually. 397 :return: ETag and last modified properties for the Append Blob 398 :rtype: :class:`~azure.storage.blob.models.ResourceProperties` 399 ''' 400 _validate_not_none('container_name', container_name) 401 _validate_not_none('blob_name', blob_name) 402 _validate_not_none('blob', blob) 403 _validate_not_none('index', index) 404 _validate_type_bytes('blob', blob) 405 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 406 407 if index < 0: 408 raise IndexError(_ERROR_VALUE_NEGATIVE.format('index')) 409 410 if count is None or count < 0: 411 count = len(blob) - index 412 413 stream = BytesIO(blob) 414 stream.seek(index) 415 416 return self.append_blob_from_stream( 417 container_name, 418 blob_name, 419 stream, 420 count=count, 421 validate_content=validate_content, 422 maxsize_condition=maxsize_condition, 423 lease_id=lease_id, 424 progress_callback=progress_callback, 425 timeout=timeout) 426 427 def append_blob_from_text( 428 self, container_name, blob_name, text, encoding='utf-8', 429 validate_content=False, maxsize_condition=None, progress_callback=None, 430 lease_id=None, timeout=None): 431 ''' 432 Appends to the content of an existing blob from str/unicode, with 433 automatic chunking and progress notifications. 434 435 :param str container_name: 436 Name of existing container. 437 :param str blob_name: 438 Name of blob to create or update. 439 :param str text: 440 Text to upload to the blob. 441 :param str encoding: 442 Python encoding to use to convert the text to bytes. 443 :param bool validate_content: 444 If true, calculates an MD5 hash for each chunk of the blob. The storage 445 service checks the hash of the content that has arrived with the hash 446 that was sent. This is primarily valuable for detecting bitflips on 447 the wire if using http instead of https as https (the default) will 448 already validate. Note that this MD5 hash is not stored with the 449 blob. 450 :param int maxsize_condition: 451 Optional conditional header. The max length in bytes permitted for 452 the append blob. If the Append Block operation would cause the blob 453 to exceed that limit or if the blob size is already greater than the 454 value specified in this header, the request will fail with 455 MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). 456 :param progress_callback: 457 Callback for progress with signature function(current, total) where 458 current is the number of bytes transfered so far, and total is the 459 size of the blob, or None if the total size is unknown. 460 :type progress_callback: func(current, total) 461 :param str lease_id: 462 Required if the blob has an active lease. 463 :param int timeout: 464 The timeout parameter is expressed in seconds. This method may make 465 multiple calls to the Azure service and the timeout will apply to 466 each call individually. 467 :return: ETag and last modified properties for the Append Blob 468 :rtype: :class:`~azure.storage.blob.models.ResourceProperties` 469 ''' 470 _validate_not_none('container_name', container_name) 471 _validate_not_none('blob_name', blob_name) 472 _validate_not_none('text', text) 473 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 474 475 if not isinstance(text, bytes): 476 _validate_not_none('encoding', encoding) 477 text = text.encode(encoding) 478 479 return self.append_blob_from_bytes( 480 container_name, 481 blob_name, 482 text, 483 index=0, 484 count=len(text), 485 validate_content=validate_content, 486 maxsize_condition=maxsize_condition, 487 lease_id=lease_id, 488 progress_callback=progress_callback, 489 timeout=timeout) 490 491 def append_blob_from_stream( 492 self, container_name, blob_name, stream, count=None, 493 validate_content=False, maxsize_condition=None, progress_callback=None, 494 lease_id=None, timeout=None): 495 ''' 496 Appends to the content of an existing blob from a file/stream, with 497 automatic chunking and progress notifications. 498 499 :param str container_name: 500 Name of existing container. 501 :param str blob_name: 502 Name of blob to create or update. 503 :param io.IOBase stream: 504 Opened stream to upload as the blob content. 505 :param int count: 506 Number of bytes to read from the stream. This is optional, but 507 should be supplied for optimal performance. 508 :param bool validate_content: 509 If true, calculates an MD5 hash for each chunk of the blob. The storage 510 service checks the hash of the content that has arrived with the hash 511 that was sent. This is primarily valuable for detecting bitflips on 512 the wire if using http instead of https as https (the default) will 513 already validate. Note that this MD5 hash is not stored with the 514 blob. 515 :param int maxsize_condition: 516 Conditional header. The max length in bytes permitted for 517 the append blob. If the Append Block operation would cause the blob 518 to exceed that limit or if the blob size is already greater than the 519 value specified in this header, the request will fail with 520 MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). 521 :param progress_callback: 522 Callback for progress with signature function(current, total) where 523 current is the number of bytes transfered so far, and total is the 524 size of the blob, or None if the total size is unknown. 525 :type progress_callback: func(current, total) 526 :param str lease_id: 527 Required if the blob has an active lease. 528 :param int timeout: 529 The timeout parameter is expressed in seconds. This method may make 530 multiple calls to the Azure service and the timeout will apply to 531 each call individually. 532 :return: ETag and last modified properties for the Append Blob 533 :rtype: :class:`~azure.storage.blob.models.ResourceProperties` 534 ''' 535 _validate_not_none('container_name', container_name) 536 _validate_not_none('blob_name', blob_name) 537 _validate_not_none('stream', stream) 538 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 539 540 # _upload_blob_chunks returns the block ids for block blobs so resource_properties 541 # is passed as a parameter to get the last_modified and etag for page and append blobs. 542 # this info is not needed for block_blobs since _put_block_list is called after which gets this info 543 resource_properties = ResourceProperties() 544 _upload_blob_chunks( 545 blob_service=self, 546 container_name=container_name, 547 blob_name=blob_name, 548 blob_size=count, 549 block_size=self.MAX_BLOCK_SIZE, 550 stream=stream, 551 max_connections=1, # upload not easily parallelizable 552 progress_callback=progress_callback, 553 validate_content=validate_content, 554 lease_id=lease_id, 555 uploader_class=_AppendBlobChunkUploader, 556 maxsize_condition=maxsize_condition, 557 timeout=timeout, 558 resource_properties=resource_properties 559 ) 560 561 return resource_properties 562