1# -*- coding: utf-8 -*- 2 3## Amazon S3 manager 4## Author: Michal Ludvig <michal@logix.cz> 5## http://www.logix.cz/michal 6## License: GPL Version 2 7## Copyright: TGRMN Software and contributors 8 9from __future__ import absolute_import 10 11import logging 12import datetime 13import locale 14import re 15import os 16import io 17import sys 18import json 19import time 20 21from logging import debug, warning 22 23from .ExitCodes import EX_OSFILE 24 25try: 26 import dateutil.parser 27 import dateutil.tz 28except ImportError: 29 sys.stderr.write(u""" 30!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 31ImportError trying to import dateutil.parser and dateutil.tz. 32Please install the python dateutil module: 33$ sudo apt-get install python-dateutil 34 or 35$ sudo yum install python-dateutil 36 or 37$ pip install python-dateutil 38!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 39""") 40 sys.stderr.flush() 41 sys.exit(EX_OSFILE) 42 43try: 44 # python 3 support 45 import httplib 46except ImportError: 47 import http.client as httplib 48 49try: 50 from configparser import (NoOptionError, NoSectionError, 51 MissingSectionHeaderError, ParsingError, 52 ConfigParser as PyConfigParser) 53except ImportError: 54 # Python2 fallback code 55 from ConfigParser import (NoOptionError, NoSectionError, 56 MissingSectionHeaderError, ParsingError, 57 ConfigParser as PyConfigParser) 58 59from . import Progress 60from .SortedDict import SortedDict 61from .BaseUtils import (s3_quote, getTreeFromXml, getDictFromTree, 62 base_unicodise, dateRFC822toPython) 63 64 65try: 66 unicode 67except NameError: 68 # python 3 support 69 # In python 3, unicode -> str, and str -> bytes 70 unicode = str 71 72 73def is_bool_true(value): 74 """Check to see if a string is true, yes, on, or 1 75 76 value may be a str, or unicode. 77 78 Return True if it is 79 """ 80 if type(value) == unicode: 81 return value.lower() in ["true", "yes", "on", "1"] 82 elif type(value) == bool and value == True: 83 return True 84 else: 85 return False 86 87 88def is_bool_false(value): 89 """Check to see if a string is false, no, off, or 0 90 91 value may be a str, or unicode. 92 93 Return True if it is 94 """ 95 if type(value) == unicode: 96 return value.lower() in ["false", "no", "off", "0"] 97 elif type(value) == bool and value == False: 98 return True 99 else: 100 return False 101 102 103def is_bool(value): 104 """Check a string value to see if it is bool""" 105 return is_bool_true(value) or is_bool_false(value) 106 107 108class Config(object): 109 _instance = None 110 _parsed_files = [] 111 _doc = {} 112 access_key = u"" 113 secret_key = u"" 114 access_token = u"" 115 _access_token_refresh = True 116 _access_token_expiration = None 117 _access_token_last_update = None 118 host_base = u"s3.amazonaws.com" 119 host_bucket = u"%(bucket)s.s3.amazonaws.com" 120 kms_key = u"" #can't set this and Server Side Encryption at the same time 121 # simpledb_host looks useless, legacy? to remove? 122 simpledb_host = u"sdb.amazonaws.com" 123 cloudfront_host = u"cloudfront.amazonaws.com" 124 verbosity = logging.WARNING 125 progress_meter = sys.stdout.isatty() 126 progress_class = Progress.ProgressCR 127 send_chunk = 64 * 1024 128 recv_chunk = 64 * 1024 129 list_md5 = False 130 long_listing = False 131 human_readable_sizes = False 132 extra_headers = SortedDict(ignore_case = True) 133 force = False 134 server_side_encryption = False 135 enable = None 136 get_continue = False 137 put_continue = False 138 upload_id = u"" 139 skip_existing = False 140 recursive = False 141 restore_days = 1 142 restore_priority = u"Standard" 143 acl_public = None 144 acl_grants = [] 145 acl_revokes = [] 146 proxy_host = u"" 147 proxy_port = 3128 148 encrypt = False 149 dry_run = False 150 add_encoding_exts = u"" 151 preserve_attrs = True 152 preserve_attrs_list = [ 153 u'uname', # Verbose owner Name (e.g. 'root') 154 u'uid', # Numeric user ID (e.g. 0) 155 u'gname', # Group name (e.g. 'users') 156 u'gid', # Numeric group ID (e.g. 100) 157 u'atime', # Last access timestamp 158 u'mtime', # Modification timestamp 159 u'ctime', # Creation timestamp 160 u'mode', # File mode (e.g. rwxr-xr-x = 755) 161 u'md5', # File MD5 (if known) 162 #u'acl', # Full ACL (not yet supported) 163 ] 164 delete_removed = False 165 delete_after = False 166 delete_after_fetch = False 167 max_delete = -1 168 limit = -1 169 _doc['delete_removed'] = u"[sync] Remove remote S3 objects when local file has been deleted" 170 delay_updates = False # OBSOLETE 171 gpg_passphrase = u"" 172 gpg_command = u"" 173 gpg_encrypt = u"%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s" 174 gpg_decrypt = u"%(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s" 175 use_https = True 176 ca_certs_file = u"" 177 ssl_client_key_file = u"" 178 ssl_client_cert_file = u"" 179 check_ssl_certificate = True 180 check_ssl_hostname = True 181 bucket_location = u"US" 182 default_mime_type = u"binary/octet-stream" 183 guess_mime_type = True 184 use_mime_magic = True 185 mime_type = u"" 186 enable_multipart = True 187 # Chunk size is at the same time the chunk size and the threshold 188 multipart_chunk_size_mb = 15 # MiB 189 # Maximum chunk size for s3-to-s3 copy is 5 GiB. 190 # But, use a lot lower value by default (1GiB) 191 multipart_copy_chunk_size_mb = 1 * 1024 192 # Maximum chunks on AWS S3, could be different on other S3-compatible APIs 193 multipart_max_chunks = 10000 194 # List of checks to be performed for 'sync' 195 sync_checks = ['size', 'md5'] # 'weak-timestamp' 196 # List of compiled REGEXPs 197 exclude = [] 198 include = [] 199 # Dict mapping compiled REGEXPs back to their textual form 200 debug_exclude = {} 201 debug_include = {} 202 encoding = locale.getpreferredencoding() or "UTF-8" 203 urlencoding_mode = u"normal" 204 log_target_prefix = u"" 205 reduced_redundancy = False 206 storage_class = u"" 207 follow_symlinks = False 208 # If too big, this value can be overriden by the OS socket timeouts max values. 209 # For example, on Linux, a connection attempt will automatically timeout after 120s. 210 socket_timeout = 300 211 invalidate_on_cf = False 212 # joseprio: new flags for default index invalidation 213 invalidate_default_index_on_cf = False 214 invalidate_default_index_root_on_cf = True 215 website_index = u"index.html" 216 website_error = u"" 217 website_endpoint = u"http://%(bucket)s.s3-website-%(location)s.amazonaws.com/" 218 additional_destinations = [] 219 files_from = [] 220 cache_file = u"" 221 add_headers = u"" 222 remove_headers = [] 223 expiry_days = u"" 224 expiry_date = u"" 225 expiry_prefix = u"" 226 signature_v2 = False 227 limitrate = 0 228 requester_pays = False 229 stop_on_error = False 230 content_disposition = u"" 231 content_type = u"" 232 stats = False 233 # Disabled by default because can create a latency with a CONTINUE status reply 234 # expected for every send file requests. 235 use_http_expect = False 236 signurl_use_https = False 237 # Maximum sleep duration for throtte / limitrate. 238 # s3 will timeout if a request/transfer is stuck for more than a short time 239 throttle_max = 100 240 public_url_use_https = False 241 connection_pooling = True 242 # How long in seconds a connection can be kept idle in the pool and still 243 # be alive. AWS s3 is supposed to close connections that are idle for 20 244 # seconds or more, but in real life, undocumented, it closes https conns 245 # after around 6s of inactivity. 246 connection_max_age = 5 247 248 ## Creating a singleton 249 def __new__(self, configfile = None, access_key=None, secret_key=None, access_token=None): 250 if self._instance is None: 251 self._instance = object.__new__(self) 252 return self._instance 253 254 def __init__(self, configfile = None, access_key=None, secret_key=None, access_token=None): 255 if configfile: 256 try: 257 self.read_config_file(configfile) 258 except IOError: 259 if 'AWS_CREDENTIAL_FILE' in os.environ or 'AWS_PROFILE' in os.environ: 260 self.aws_credential_file() 261 262 # override these if passed on the command-line 263 if access_key and secret_key: 264 self.access_key = access_key 265 self.secret_key = secret_key 266 if access_token: 267 self.access_token = access_token 268 # Do not refresh the IAM role when an access token is provided. 269 self._access_token_refresh = False 270 271 if len(self.access_key) == 0: 272 env_access_key = os.getenv('AWS_ACCESS_KEY') or os.getenv('AWS_ACCESS_KEY_ID') 273 env_secret_key = os.getenv('AWS_SECRET_KEY') or os.getenv('AWS_SECRET_ACCESS_KEY') 274 env_access_token = os.getenv('AWS_SESSION_TOKEN') or os.getenv('AWS_SECURITY_TOKEN') 275 if env_access_key: 276 if not env_secret_key: 277 raise ValueError( 278 "AWS_ACCESS_KEY environment variable is used but" 279 " AWS_SECRET_KEY variable is missing" 280 ) 281 # py3 getenv returns unicode and py2 returns bytes. 282 self.access_key = base_unicodise(env_access_key) 283 self.secret_key = base_unicodise(env_secret_key) 284 if env_access_token: 285 # Do not refresh the IAM role when an access token is provided. 286 self._access_token_refresh = False 287 self.access_token = base_unicodise(env_access_token) 288 else: 289 self.role_config() 290 291 #TODO check KMS key is valid 292 if self.kms_key and self.server_side_encryption == True: 293 warning('Cannot have server_side_encryption (S3 SSE) and KMS_key set (S3 KMS). KMS encryption will be used. Please set server_side_encryption to False') 294 if self.kms_key and self.signature_v2 == True: 295 raise Exception('KMS encryption requires signature v4. Please set signature_v2 to False') 296 297 def role_config(self): 298 """ 299 Get credentials from IAM authentication and STS AssumeRole 300 """ 301 try: 302 role_arn = os.environ.get('AWS_ROLE_ARN') 303 if role_arn: 304 role_session_name = 'role-session-%s' % (int(time.time())) 305 params = { 306 'Action': 'AssumeRole', 307 'Version': '2011-06-15', 308 'RoleArn': role_arn, 309 'RoleSessionName': role_session_name, 310 } 311 web_identity_token_file = os.environ.get('AWS_WEB_IDENTITY_TOKEN_FILE') 312 if web_identity_token_file: 313 with open(web_identity_token_file) as f: 314 web_identity_token = f.read() 315 params['Action'] = 'AssumeRoleWithWebIdentity' 316 params['WebIdentityToken'] = web_identity_token 317 encoded_params = '&'.join([ 318 '%s=%s' % (k, s3_quote(v, unicode_output=True)) 319 for k, v in params.items() 320 ]) 321 conn = httplib.HTTPSConnection(host='sts.amazonaws.com', 322 timeout=2) 323 conn.request('POST', '/?' + encoded_params) 324 resp = conn.getresponse() 325 resp_content = resp.read() 326 if resp.status == 200 and len(resp_content) > 1: 327 tree = getTreeFromXml(resp_content) 328 result_dict = getDictFromTree(tree) 329 if tree.tag == "AssumeRoleResponse": 330 creds = result_dict['AssumeRoleResult']['Credentials'] 331 elif tree.tag == "AssumeRoleWithWebIdentityResponse": 332 creds = result_dict['AssumeRoleWithWebIdentityResult']['Credentials'] 333 else: 334 raise IOError("Unexpected XML message from STS server: <%s />" % tree.tag) 335 Config().update_option('access_key', creds['AccessKeyId']) 336 Config().update_option('secret_key', creds['SecretAccessKey']) 337 Config().update_option('access_token', creds['SessionToken']) 338 expiration = dateRFC822toPython(base_unicodise(creds['Expiration'])) 339 # Add a timedelta to prevent any expiration if the EC2 machine is not at the right date 340 self._access_token_expiration = expiration - datetime.timedelta(minutes=15) 341 # last update date is not provided in STS responses 342 self._access_token_last_update = datetime.datetime.now(dateutil.tz.tzutc()) 343 # Others variables : Code / Type 344 else: 345 raise IOError 346 else: 347 conn = httplib.HTTPConnection(host='169.254.169.254', 348 timeout=2) 349 conn.request('GET', "/latest/meta-data/iam/security-credentials/") 350 resp = conn.getresponse() 351 files = resp.read() 352 if resp.status == 200 and len(files) > 1: 353 conn.request('GET', "/latest/meta-data/iam/security-credentials/%s" % files.decode('utf-8')) 354 resp=conn.getresponse() 355 if resp.status == 200: 356 resp_content = base_unicodise(resp.read()) 357 creds=json.loads(resp_content) 358 Config().update_option('access_key', base_unicodise(creds['AccessKeyId'])) 359 Config().update_option('secret_key', base_unicodise(creds['SecretAccessKey'])) 360 Config().update_option('access_token', base_unicodise(creds['Token'])) 361 expiration = dateRFC822toPython(base_unicodise(creds['Expiration'])) 362 # Add a timedelta to prevent any expiration if the EC2 machine is not at the right date 363 self._access_token_expiration = expiration - datetime.timedelta(minutes=15) 364 self._access_token_last_update = dateRFC822toPython(base_unicodise(creds['LastUpdated'])) 365 # Others variables : Code / Type 366 else: 367 raise IOError 368 else: 369 raise IOError 370 except: 371 raise 372 373 def role_refresh(self): 374 if self._access_token_refresh: 375 now = datetime.datetime.now(dateutil.tz.tzutc()) 376 if self._access_token_expiration \ 377 and now < self._access_token_expiration \ 378 and self._access_token_last_update \ 379 and self._access_token_last_update <= now: 380 # current token is still valid. No need to refresh it 381 return 382 try: 383 self.role_config() 384 except Exception: 385 warning("Could not refresh role") 386 387 def aws_credential_file(self): 388 try: 389 aws_credential_file = os.path.expanduser('~/.aws/credentials') 390 credential_file_from_env = os.environ.get('AWS_CREDENTIAL_FILE') 391 if credential_file_from_env and \ 392 os.path.isfile(credential_file_from_env): 393 aws_credential_file = base_unicodise(credential_file_from_env) 394 elif not os.path.isfile(aws_credential_file): 395 return 396 397 config = PyConfigParser() 398 399 debug("Reading AWS credentials from %s" % (aws_credential_file)) 400 with io.open(aws_credential_file, "r", 401 encoding=getattr(self, 'encoding', 'UTF-8')) as fp: 402 config_string = fp.read() 403 try: 404 try: 405 # readfp is replaced by read_file in python3, 406 # but so far readfp it is still available. 407 config.readfp(io.StringIO(config_string)) 408 except MissingSectionHeaderError: 409 # if header is missing, this could be deprecated 410 # credentials file format as described here: 411 # https://blog.csanchez.org/2011/05/ 412 # then do the hacky-hack and add default header 413 # to be able to read the file with PyConfigParser() 414 config_string = u'[default]\n' + config_string 415 config.readfp(io.StringIO(config_string)) 416 except ParsingError as exc: 417 raise ValueError( 418 "Error reading aws_credential_file " 419 "(%s): %s" % (aws_credential_file, str(exc))) 420 421 profile = base_unicodise(os.environ.get('AWS_PROFILE', "default")) 422 debug("Using AWS profile '%s'" % (profile)) 423 424 # get_key - helper function to read the aws profile credentials 425 # including the legacy ones as described here: 426 # https://blog.csanchez.org/2011/05/ 427 def get_key(profile, key, legacy_key, print_warning=True): 428 result = None 429 430 try: 431 result = config.get(profile, key) 432 except NoOptionError as e: 433 # we may want to skip warning message for optional keys 434 if print_warning: 435 warning("Couldn't find key '%s' for the AWS Profile " 436 "'%s' in the credentials file '%s'", 437 e.option, e.section, aws_credential_file) 438 # if the legacy_key defined and original one wasn't found, 439 # try read the legacy_key 440 if legacy_key: 441 try: 442 key = legacy_key 443 profile = "default" 444 result = config.get(profile, key) 445 warning( 446 "Legacy configuratin key '%s' used, please use" 447 " the standardized config format as described " 448 "here: https://aws.amazon.com/blogs/security/a-new-and-standardized-way-to-manage-credentials-in-the-aws-sdks/", 449 key) 450 except NoOptionError as e: 451 pass 452 453 if result: 454 debug("Found the configuration option '%s' for the AWS " 455 "Profile '%s' in the credentials file %s", 456 key, profile, aws_credential_file) 457 return result 458 459 profile_access_key = get_key(profile, "aws_access_key_id", 460 "AWSAccessKeyId") 461 if profile_access_key: 462 Config().update_option('access_key', 463 base_unicodise(profile_access_key)) 464 465 profile_secret_key = get_key(profile, "aws_secret_access_key", 466 "AWSSecretKey") 467 if profile_secret_key: 468 Config().update_option('secret_key', 469 base_unicodise(profile_secret_key)) 470 471 profile_access_token = get_key(profile, "aws_session_token", None, 472 False) 473 if profile_access_token: 474 Config().update_option('access_token', 475 base_unicodise(profile_access_token)) 476 477 except IOError as e: 478 warning("Errno %d accessing credentials file %s", e.errno, 479 aws_credential_file) 480 except NoSectionError as e: 481 warning("Couldn't find AWS Profile '%s' in the credentials file " 482 "'%s'", profile, aws_credential_file) 483 484 def option_list(self): 485 retval = [] 486 for option in dir(self): 487 ## Skip attributes that start with underscore or are not string, int or bool 488 option_type = type(getattr(Config, option)) 489 if option.startswith("_") or \ 490 not (option_type in ( 491 type(u"string"), # str 492 type(42), # int 493 type(True))): # bool 494 continue 495 retval.append(option) 496 return retval 497 498 def read_config_file(self, configfile): 499 cp = ConfigParser(configfile) 500 for option in self.option_list(): 501 _option = cp.get(option) 502 if _option is not None: 503 _option = _option.strip() 504 self.update_option(option, _option) 505 506 # allow acl_public to be set from the config file too, even though by 507 # default it is set to None, and not present in the config file. 508 if cp.get('acl_public'): 509 self.update_option('acl_public', cp.get('acl_public')) 510 511 if cp.get('add_headers'): 512 for option in cp.get('add_headers').split(","): 513 (key, value) = option.split(':', 1) 514 self.extra_headers[key.strip()] = value.strip() 515 516 self._parsed_files.append(configfile) 517 518 def dump_config(self, stream): 519 ConfigDumper(stream).dump(u"default", self) 520 521 def update_option(self, option, value): 522 if value is None: 523 return 524 525 #### Handle environment reference 526 if unicode(value).startswith("$"): 527 return self.update_option(option, os.getenv(value[1:])) 528 529 #### Special treatment of some options 530 ## verbosity must be known to "logging" module 531 if option == "verbosity": 532 # support integer verboisities 533 try: 534 value = int(value) 535 except ValueError: 536 try: 537 # otherwise it must be a key known to the logging module 538 try: 539 # python 3 support 540 value = logging._levelNames[value] 541 except AttributeError: 542 value = logging._nameToLevel[value] 543 except KeyError: 544 raise ValueError("Config: verbosity level '%s' is not valid" % value) 545 546 elif option == "limitrate": 547 #convert kb,mb to bytes 548 if value.endswith("k") or value.endswith("K"): 549 shift = 10 550 elif value.endswith("m") or value.endswith("M"): 551 shift = 20 552 else: 553 shift = 0 554 try: 555 value = shift and int(value[:-1]) << shift or int(value) 556 except Exception: 557 raise ValueError("Config: value of option %s must have suffix m, k, or nothing, not '%s'" % (option, value)) 558 559 ## allow yes/no, true/false, on/off and 1/0 for boolean options 560 ## Some options default to None, if that's the case check the value to see if it is bool 561 elif (type(getattr(Config, option)) is type(True) or # Config is bool 562 (getattr(Config, option) is None and is_bool(value))): # Config is None and value is bool 563 if is_bool_true(value): 564 value = True 565 elif is_bool_false(value): 566 value = False 567 else: 568 raise ValueError("Config: value of option '%s' must be Yes or No, not '%s'" % (option, value)) 569 570 elif type(getattr(Config, option)) is type(42): # int 571 try: 572 value = int(value) 573 except ValueError: 574 raise ValueError("Config: value of option '%s' must be an integer, not '%s'" % (option, value)) 575 576 elif option in ["host_base", "host_bucket", "cloudfront_host"]: 577 if value.startswith("http://"): 578 value = value[7:] 579 elif value.startswith("https://"): 580 value = value[8:] 581 582 583 setattr(Config, option, value) 584 585class ConfigParser(object): 586 def __init__(self, file, sections = []): 587 self.cfg = {} 588 self.parse_file(file, sections) 589 590 def parse_file(self, file, sections = []): 591 debug("ConfigParser: Reading file '%s'" % file) 592 if type(sections) != type([]): 593 sections = [sections] 594 in_our_section = True 595 r_comment = re.compile(r'^\s*#.*') 596 r_empty = re.compile(r'^\s*$') 597 r_section = re.compile(r'^\[([^\]]+)\]') 598 r_data = re.compile(r'^\s*(?P<key>\w+)\s*=\s*(?P<value>.*)') 599 r_quotes = re.compile(r'^"(.*)"\s*$') 600 with io.open(file, "r", encoding=self.get('encoding', 'UTF-8')) as fp: 601 for line in fp: 602 if r_comment.match(line) or r_empty.match(line): 603 continue 604 is_section = r_section.match(line) 605 if is_section: 606 section = is_section.groups()[0] 607 in_our_section = (section in sections) or (len(sections) == 0) 608 continue 609 is_data = r_data.match(line) 610 if is_data and in_our_section: 611 data = is_data.groupdict() 612 if r_quotes.match(data["value"]): 613 data["value"] = data["value"][1:-1] 614 self.__setitem__(data["key"], data["value"]) 615 if data["key"] in ("access_key", "secret_key", "gpg_passphrase"): 616 print_value = ("%s...%d_chars...%s") % (data["value"][:2], len(data["value"]) - 3, data["value"][-1:]) 617 else: 618 print_value = data["value"] 619 debug("ConfigParser: %s->%s" % (data["key"], print_value)) 620 continue 621 warning("Ignoring invalid line in '%s': %s" % (file, line)) 622 623 def __getitem__(self, name): 624 return self.cfg[name] 625 626 def __setitem__(self, name, value): 627 self.cfg[name] = value 628 629 def get(self, name, default = None): 630 if name in self.cfg: 631 return self.cfg[name] 632 return default 633 634class ConfigDumper(object): 635 def __init__(self, stream): 636 self.stream = stream 637 638 def dump(self, section, config): 639 self.stream.write(u"[%s]\n" % section) 640 for option in config.option_list(): 641 value = getattr(config, option) 642 if option == "verbosity": 643 # we turn level numbers back into strings if possible 644 if isinstance(value, int): 645 try: 646 try: 647 # python 3 support 648 value = logging._levelNames[value] 649 except AttributeError: 650 value = logging._levelToName[value] 651 except KeyError: 652 pass 653 self.stream.write(u"%s = %s\n" % (option, value)) 654 655# vim:et:ts=4:sts=4:ai 656