1#------------------------------------------------------------------------- 2# Copyright (c) Microsoft. All rights reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14#-------------------------------------------------------------------------- 15from .._error import ( 16 _validate_not_none, 17 _validate_type_bytes, 18 _validate_encryption_unsupported, 19 _ERROR_VALUE_NEGATIVE, 20) 21from .._common_conversion import ( 22 _to_str, 23 _int_to_str, 24 _datetime_to_utc_string, 25 _get_content_md5, 26) 27from .._serialization import ( 28 _get_data_bytes_only, 29 _add_metadata_headers, 30) 31from .._http import HTTPRequest 32from ._upload_chunking import ( 33 _AppendBlobChunkUploader, 34 _upload_blob_chunks, 35) 36from .models import _BlobTypes 37from .._constants import ( 38 SERVICE_HOST_BASE, 39 DEFAULT_PROTOCOL, 40) 41from ._serialization import ( 42 _get_path, 43) 44from ._deserialization import ( 45 _parse_append_block, 46 _parse_base_properties, 47) 48from .baseblobservice import BaseBlobService 49from os import path 50import sys 51if sys.version_info >= (3,): 52 from io import BytesIO 53else: 54 from cStringIO import StringIO as BytesIO 55 56 57class AppendBlobService(BaseBlobService): 58 ''' 59 An append blob is comprised of blocks and is optimized for append operations. 60 When you modify an append blob, blocks are added to the end of the blob only, 61 via the append_block operation. Updating or deleting of existing blocks is not 62 supported. Unlike a block blob, an append blob does not expose its block IDs. 63 64 Each block in an append blob can be a different size, up to a maximum of 4 MB, 65 and an append blob can include up to 50,000 blocks. The maximum size of an 66 append blob is therefore slightly more than 195 GB (4 MB X 50,000 blocks). 67 68 :ivar int MAX_BLOCK_SIZE: 69 The size of the blocks put by append_blob_from_* methods. Smaller blocks 70 may be put if there is less data provided. The maximum block size the service 71 supports is 4MB. 72 ''' 73 MAX_BLOCK_SIZE = 4 * 1024 * 1024 74 75 def __init__(self, account_name=None, account_key=None, sas_token=None, 76 is_emulated=False, protocol=DEFAULT_PROTOCOL, endpoint_suffix=SERVICE_HOST_BASE, 77 custom_domain=None, request_session=None, connection_string=None, socket_timeout=None): 78 ''' 79 :param str account_name: 80 The storage account name. This is used to authenticate requests 81 signed with an account key and to construct the storage endpoint. It 82 is required unless a connection string is given, or if a custom 83 domain is used with anonymous authentication. 84 :param str account_key: 85 The storage account key. This is used for shared key authentication. 86 If neither account key or sas token is specified, anonymous access 87 will be used. 88 :param str sas_token: 89 A shared access signature token to use to authenticate requests 90 instead of the account key. If account key and sas token are both 91 specified, account key will be used to sign. If neither are 92 specified, anonymous access will be used. 93 :param bool is_emulated: 94 Whether to use the emulator. Defaults to False. If specified, will 95 override all other parameters besides connection string and request 96 session. 97 :param str protocol: 98 The protocol to use for requests. Defaults to https. 99 :param str endpoint_suffix: 100 The host base component of the url, minus the account name. Defaults 101 to Azure (core.windows.net). Override this to use the China cloud 102 (core.chinacloudapi.cn). 103 :param str custom_domain: 104 The custom domain to use. This can be set in the Azure Portal. For 105 example, 'www.mydomain.com'. 106 :param requests.Session request_session: 107 The session object to use for http requests. 108 :param str connection_string: 109 If specified, this will override all other parameters besides 110 request session. See 111 http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ 112 for the connection string format. 113 :param int socket_timeout: 114 If specified, this will override the default socket timeout. The timeout specified is in seconds. 115 See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value. 116 ''' 117 self.blob_type = _BlobTypes.AppendBlob 118 super(AppendBlobService, self).__init__( 119 account_name, account_key, sas_token, is_emulated, protocol, endpoint_suffix, 120 custom_domain, request_session, connection_string, socket_timeout) 121 122 def create_blob(self, container_name, blob_name, content_settings=None, 123 metadata=None, lease_id=None, 124 if_modified_since=None, if_unmodified_since=None, 125 if_match=None, if_none_match=None, timeout=None): 126 ''' 127 Creates a blob or overrides an existing blob. Use if_match=* to 128 prevent overriding an existing blob. 129 130 See create_blob_from_* for high level 131 functions that handle the creation and upload of large blobs with 132 automatic chunking and progress notifications. 133 134 :param str container_name: 135 Name of existing container. 136 :param str blob_name: 137 Name of blob to create or update. 138 :param ~azure.storage.blob.models.ContentSettings content_settings: 139 ContentSettings object used to set blob properties. 140 :param metadata: 141 Name-value pairs associated with the blob as metadata. 142 :type metadata: a dict mapping str to str 143 :param str lease_id: 144 Required if the blob has an active lease. 145 :param datetime if_modified_since: 146 A DateTime value. Azure expects the date value passed in to be UTC. 147 If timezone is included, any non-UTC datetimes will be converted to UTC. 148 If a date is passed in without timezone info, it is assumed to be UTC. 149 Specify this header to perform the operation only 150 if the resource has been modified since the specified time. 151 :param datetime if_unmodified_since: 152 A DateTime value. Azure expects the date value passed in to be UTC. 153 If timezone is included, any non-UTC datetimes will be converted to UTC. 154 If a date is passed in without timezone info, it is assumed to be UTC. 155 Specify this header to perform the operation only if 156 the resource has not been modified since the specified date/time. 157 :param str if_match: 158 An ETag value, or the wildcard character (*). Specify this header to 159 perform the operation only if the resource's ETag matches the value specified. 160 :param str if_none_match: 161 An ETag value, or the wildcard character (*). Specify this header 162 to perform the operation only if the resource's ETag does not match 163 the value specified. Specify the wildcard character (*) to perform 164 the operation only if the resource does not exist, and fail the 165 operation if it does exist. 166 :param int timeout: 167 The timeout parameter is expressed in seconds. 168 :return: ETag and last modified properties for the updated Append Blob 169 :rtype: :class:`~azure.storage.blob.models.ResourceProperties` 170 ''' 171 _validate_not_none('container_name', container_name) 172 _validate_not_none('blob_name', blob_name) 173 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 174 175 request = HTTPRequest() 176 request.method = 'PUT' 177 request.host_locations = self._get_host_locations() 178 request.path = _get_path(container_name, blob_name) 179 request.query = {'timeout': _int_to_str(timeout)} 180 request.headers = { 181 'x-ms-blob-type': _to_str(self.blob_type), 182 'x-ms-lease-id': _to_str(lease_id), 183 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), 184 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), 185 'If-Match': _to_str(if_match), 186 'If-None-Match': _to_str(if_none_match) 187 } 188 _add_metadata_headers(metadata, request) 189 if content_settings is not None: 190 request.headers.update(content_settings._to_headers()) 191 192 return self._perform_request(request, _parse_base_properties) 193 194 def append_block(self, container_name, blob_name, block, 195 validate_content=False, maxsize_condition=None, 196 appendpos_condition=None, 197 lease_id=None, if_modified_since=None, 198 if_unmodified_since=None, if_match=None, 199 if_none_match=None, timeout=None): 200 ''' 201 Commits a new block of data to the end of an existing append blob. 202 203 :param str container_name: 204 Name of existing container. 205 :param str blob_name: 206 Name of existing blob. 207 :param bytes block: 208 Content of the block in bytes. 209 :param bool validate_content: 210 If true, calculates an MD5 hash of the block content. The storage 211 service checks the hash of the content that has arrived 212 with the hash that was sent. This is primarily valuable for detecting 213 bitflips on the wire if using http instead of https as https (the default) 214 will already validate. Note that this MD5 hash is not stored with the 215 blob. 216 :param int maxsize_condition: 217 Optional conditional header. The max length in bytes permitted for 218 the append blob. If the Append Block operation would cause the blob 219 to exceed that limit or if the blob size is already greater than the 220 value specified in this header, the request will fail with 221 MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). 222 :param int appendpos_condition: 223 Optional conditional header, used only for the Append Block operation. 224 A number indicating the byte offset to compare. Append Block will 225 succeed only if the append position is equal to this number. If it 226 is not, the request will fail with the 227 AppendPositionConditionNotMet error 228 (HTTP status code 412 - Precondition Failed). 229 :param str lease_id: 230 Required if the blob has an active lease. 231 :param datetime if_modified_since: 232 A DateTime value. Azure expects the date value passed in to be UTC. 233 If timezone is included, any non-UTC datetimes will be converted to UTC. 234 If a date is passed in without timezone info, it is assumed to be UTC. 235 Specify this header to perform the operation only 236 if the resource has been modified since the specified time. 237 :param datetime if_unmodified_since: 238 A DateTime value. Azure expects the date value passed in to be UTC. 239 If timezone is included, any non-UTC datetimes will be converted to UTC. 240 If a date is passed in without timezone info, it is assumed to be UTC. 241 Specify this header to perform the operation only if 242 the resource has not been modified since the specified date/time. 243 :param str if_match: 244 An ETag value, or the wildcard character (*). Specify this header to perform 245 the operation only if the resource's ETag matches the value specified. 246 :param str if_none_match: 247 An ETag value, or the wildcard character (*). Specify this header 248 to perform the operation only if the resource's ETag does not match 249 the value specified. Specify the wildcard character (*) to perform 250 the operation only if the resource does not exist, and fail the 251 operation if it does exist. 252 :param int timeout: 253 The timeout parameter is expressed in seconds. 254 :return: 255 ETag, last modified, append offset, and committed block count 256 properties for the updated Append Blob 257 :rtype: :class:`~azure.storage.blob.models.AppendBlockProperties` 258 ''' 259 _validate_not_none('container_name', container_name) 260 _validate_not_none('blob_name', blob_name) 261 _validate_not_none('block', block) 262 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 263 264 request = HTTPRequest() 265 request.method = 'PUT' 266 request.host_locations = self._get_host_locations() 267 request.path = _get_path(container_name, blob_name) 268 request.query = { 269 'comp': 'appendblock', 270 'timeout': _int_to_str(timeout), 271 } 272 request.headers = { 273 'x-ms-blob-condition-maxsize': _to_str(maxsize_condition), 274 'x-ms-blob-condition-appendpos': _to_str(appendpos_condition), 275 'x-ms-lease-id': _to_str(lease_id), 276 'If-Modified-Since': _datetime_to_utc_string(if_modified_since), 277 'If-Unmodified-Since': _datetime_to_utc_string(if_unmodified_since), 278 'If-Match': _to_str(if_match), 279 'If-None-Match': _to_str(if_none_match) 280 } 281 request.body = _get_data_bytes_only('block', block) 282 283 if validate_content: 284 computed_md5 = _get_content_md5(request.body) 285 request.headers['Content-MD5'] = _to_str(computed_md5) 286 287 return self._perform_request(request, _parse_append_block) 288 289 #----Convenience APIs---------------------------------------------- 290 291 def append_blob_from_path( 292 self, container_name, blob_name, file_path, validate_content=False, 293 maxsize_condition=None, progress_callback=None, lease_id=None, timeout=None): 294 ''' 295 Appends to the content of an existing blob from a file path, with automatic 296 chunking and progress notifications. 297 298 :param str container_name: 299 Name of existing container. 300 :param str blob_name: 301 Name of blob to create or update. 302 :param str file_path: 303 Path of the file to upload as the blob content. 304 :param bool validate_content: 305 If true, calculates an MD5 hash for each chunk of the blob. The storage 306 service checks the hash of the content that has arrived with the hash 307 that was sent. This is primarily valuable for detecting bitflips on 308 the wire if using http instead of https as https (the default) will 309 already validate. Note that this MD5 hash is not stored with the 310 blob. 311 :param int maxsize_condition: 312 Optional conditional header. The max length in bytes permitted for 313 the append blob. If the Append Block operation would cause the blob 314 to exceed that limit or if the blob size is already greater than the 315 value specified in this header, the request will fail with 316 MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). 317 :param progress_callback: 318 Callback for progress with signature function(current, total) where 319 current is the number of bytes transfered so far, and total is the 320 size of the blob, or None if the total size is unknown. 321 :type progress_callback: callback function in format of func(current, total) 322 :param str lease_id: 323 Required if the blob has an active lease. 324 :param int timeout: 325 The timeout parameter is expressed in seconds. This method may make 326 multiple calls to the Azure service and the timeout will apply to 327 each call individually. 328 ''' 329 _validate_not_none('container_name', container_name) 330 _validate_not_none('blob_name', blob_name) 331 _validate_not_none('file_path', file_path) 332 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 333 334 count = path.getsize(file_path) 335 with open(file_path, 'rb') as stream: 336 self.append_blob_from_stream( 337 container_name, 338 blob_name, 339 stream, 340 count=count, 341 validate_content=validate_content, 342 maxsize_condition=maxsize_condition, 343 progress_callback=progress_callback, 344 lease_id=lease_id, 345 timeout=timeout) 346 347 def append_blob_from_bytes( 348 self, container_name, blob_name, blob, index=0, count=None, 349 validate_content=False, maxsize_condition=None, progress_callback=None, 350 lease_id=None, timeout=None): 351 ''' 352 Appends to the content of an existing blob from an array of bytes, with 353 automatic chunking and progress notifications. 354 355 :param str container_name: 356 Name of existing container. 357 :param str blob_name: 358 Name of blob to create or update. 359 :param bytes blob: 360 Content of blob as an array of bytes. 361 :param int index: 362 Start index in the array of bytes. 363 :param int count: 364 Number of bytes to upload. Set to None or negative value to upload 365 all bytes starting from index. 366 :param bool validate_content: 367 If true, calculates an MD5 hash for each chunk of the blob. The storage 368 service checks the hash of the content that has arrived with the hash 369 that was sent. This is primarily valuable for detecting bitflips on 370 the wire if using http instead of https as https (the default) will 371 already validate. Note that this MD5 hash is not stored with the 372 blob. 373 :param int maxsize_condition: 374 Optional conditional header. The max length in bytes permitted for 375 the append blob. If the Append Block operation would cause the blob 376 to exceed that limit or if the blob size is already greater than the 377 value specified in this header, the request will fail with 378 MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). 379 :param progress_callback: 380 Callback for progress with signature function(current, total) where 381 current is the number of bytes transfered so far, and total is the 382 size of the blob, or None if the total size is unknown. 383 :type progress_callback: callback function in format of func(current, total) 384 :param str lease_id: 385 Required if the blob has an active lease. 386 :param int timeout: 387 The timeout parameter is expressed in seconds. This method may make 388 multiple calls to the Azure service and the timeout will apply to 389 each call individually. 390 ''' 391 _validate_not_none('container_name', container_name) 392 _validate_not_none('blob_name', blob_name) 393 _validate_not_none('blob', blob) 394 _validate_not_none('index', index) 395 _validate_type_bytes('blob', blob) 396 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 397 398 if index < 0: 399 raise IndexError(_ERROR_VALUE_NEGATIVE.format('index')) 400 401 if count is None or count < 0: 402 count = len(blob) - index 403 404 stream = BytesIO(blob) 405 stream.seek(index) 406 407 self.append_blob_from_stream( 408 container_name, 409 blob_name, 410 stream, 411 count=count, 412 validate_content=validate_content, 413 maxsize_condition=maxsize_condition, 414 lease_id=lease_id, 415 progress_callback=progress_callback, 416 timeout=timeout) 417 418 def append_blob_from_text( 419 self, container_name, blob_name, text, encoding='utf-8', 420 validate_content=False, maxsize_condition=None, progress_callback=None, 421 lease_id=None, timeout=None): 422 ''' 423 Appends to the content of an existing blob from str/unicode, with 424 automatic chunking and progress notifications. 425 426 :param str container_name: 427 Name of existing container. 428 :param str blob_name: 429 Name of blob to create or update. 430 :param str text: 431 Text to upload to the blob. 432 :param str encoding: 433 Python encoding to use to convert the text to bytes. 434 :param bool validate_content: 435 If true, calculates an MD5 hash for each chunk of the blob. The storage 436 service checks the hash of the content that has arrived with the hash 437 that was sent. This is primarily valuable for detecting bitflips on 438 the wire if using http instead of https as https (the default) will 439 already validate. Note that this MD5 hash is not stored with the 440 blob. 441 :param int maxsize_condition: 442 Optional conditional header. The max length in bytes permitted for 443 the append blob. If the Append Block operation would cause the blob 444 to exceed that limit or if the blob size is already greater than the 445 value specified in this header, the request will fail with 446 MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). 447 :param progress_callback: 448 Callback for progress with signature function(current, total) where 449 current is the number of bytes transfered so far, and total is the 450 size of the blob, or None if the total size is unknown. 451 :type progress_callback: callback function in format of func(current, total) 452 :param str lease_id: 453 Required if the blob has an active lease. 454 :param int timeout: 455 The timeout parameter is expressed in seconds. This method may make 456 multiple calls to the Azure service and the timeout will apply to 457 each call individually. 458 ''' 459 _validate_not_none('container_name', container_name) 460 _validate_not_none('blob_name', blob_name) 461 _validate_not_none('text', text) 462 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 463 464 if not isinstance(text, bytes): 465 _validate_not_none('encoding', encoding) 466 text = text.encode(encoding) 467 468 self.append_blob_from_bytes( 469 container_name, 470 blob_name, 471 text, 472 index=0, 473 count=len(text), 474 validate_content=validate_content, 475 maxsize_condition=maxsize_condition, 476 lease_id=lease_id, 477 progress_callback=progress_callback, 478 timeout=timeout) 479 480 def append_blob_from_stream( 481 self, container_name, blob_name, stream, count=None, 482 validate_content=False, maxsize_condition=None, progress_callback=None, 483 lease_id=None, timeout=None): 484 ''' 485 Appends to the content of an existing blob from a file/stream, with 486 automatic chunking and progress notifications. 487 488 :param str container_name: 489 Name of existing container. 490 :param str blob_name: 491 Name of blob to create or update. 492 :param io.IOBase stream: 493 Opened stream to upload as the blob content. 494 :param int count: 495 Number of bytes to read from the stream. This is optional, but 496 should be supplied for optimal performance. 497 :param bool validate_content: 498 If true, calculates an MD5 hash for each chunk of the blob. The storage 499 service checks the hash of the content that has arrived with the hash 500 that was sent. This is primarily valuable for detecting bitflips on 501 the wire if using http instead of https as https (the default) will 502 already validate. Note that this MD5 hash is not stored with the 503 blob. 504 :param int maxsize_condition: 505 Conditional header. The max length in bytes permitted for 506 the append blob. If the Append Block operation would cause the blob 507 to exceed that limit or if the blob size is already greater than the 508 value specified in this header, the request will fail with 509 MaxBlobSizeConditionNotMet error (HTTP status code 412 - Precondition Failed). 510 :param progress_callback: 511 Callback for progress with signature function(current, total) where 512 current is the number of bytes transfered so far, and total is the 513 size of the blob, or None if the total size is unknown. 514 :type progress_callback: callback function in format of func(current, total) 515 :param str lease_id: 516 Required if the blob has an active lease. 517 :param int timeout: 518 The timeout parameter is expressed in seconds. This method may make 519 multiple calls to the Azure service and the timeout will apply to 520 each call individually. 521 ''' 522 _validate_not_none('container_name', container_name) 523 _validate_not_none('blob_name', blob_name) 524 _validate_not_none('stream', stream) 525 _validate_encryption_unsupported(self.require_encryption, self.key_encryption_key) 526 527 _upload_blob_chunks( 528 blob_service=self, 529 container_name=container_name, 530 blob_name=blob_name, 531 blob_size=count, 532 block_size=self.MAX_BLOCK_SIZE, 533 stream=stream, 534 max_connections=1, # upload not easily parallelizable 535 progress_callback=progress_callback, 536 validate_content=validate_content, 537 lease_id=lease_id, 538 uploader_class=_AppendBlobChunkUploader, 539 maxsize_condition=maxsize_condition, 540 timeout=timeout 541 )