1# -*- coding: utf-8 -*-
3## Amazon S3 manager
4## Author: Michal Ludvig <michal@logix.cz>
5##         http://www.logix.cz/michal
6## License: GPL Version 2
7## Copyright: TGRMN Software and contributors
9from __future__ import absolute_import
11import logging
12import datetime
13import locale
14import re
15import os
16import io
17import sys
18import json
19import time
21from logging import debug, warning
23from .ExitCodes import EX_OSFILE
26    import dateutil.parser
27    import dateutil.tz
28except ImportError:
29    sys.stderr.write(u"""
31ImportError trying to import dateutil.parser and dateutil.tz.
32Please install the python dateutil module:
33$ sudo apt-get install python-dateutil
34  or
35$ sudo yum install python-dateutil
36  or
37$ pip install python-dateutil
40    sys.stderr.flush()
41    sys.exit(EX_OSFILE)
44    # python 3 support
45    import httplib
46except ImportError:
47    import http.client as httplib
50    from configparser import (NoOptionError, NoSectionError,
51                              MissingSectionHeaderError, ParsingError,
52                              ConfigParser as PyConfigParser)
53except ImportError:
54    # Python2 fallback code
55    from ConfigParser import (NoOptionError, NoSectionError,
56                              MissingSectionHeaderError, ParsingError,
57                              ConfigParser as PyConfigParser)
59from . import Progress
60from .SortedDict import SortedDict
61from .BaseUtils import (s3_quote, getTreeFromXml, getDictFromTree,
62                        base_unicodise, dateRFC822toPython)
66    unicode
67except NameError:
68    # python 3 support
69    # In python 3, unicode -> str, and str -> bytes
70    unicode = str
73def is_bool_true(value):
74    """Check to see if a string is true, yes, on, or 1
76    value may be a str, or unicode.
78    Return True if it is
79    """
80    if type(value) == unicode:
81        return value.lower() in ["true", "yes", "on", "1"]
82    elif type(value) == bool and value == True:
83        return True
84    else:
85        return False
88def is_bool_false(value):
89    """Check to see if a string is false, no, off, or 0
91    value may be a str, or unicode.
93    Return True if it is
94    """
95    if type(value) == unicode:
96        return value.lower() in ["false", "no", "off", "0"]
97    elif type(value) == bool and value == False:
98        return True
99    else:
100        return False
103def is_bool(value):
104    """Check a string value to see if it is bool"""
105    return is_bool_true(value) or is_bool_false(value)
108class Config(object):
109    _instance = None
110    _parsed_files = []
111    _doc = {}
112    access_key = u""
113    secret_key = u""
114    access_token = u""
115    _access_token_refresh = True
116    _access_token_expiration = None
117    _access_token_last_update = None
118    host_base = u"s3.amazonaws.com"
119    host_bucket = u"%(bucket)s.s3.amazonaws.com"
120    kms_key = u""    #can't set this and Server Side Encryption at the same time
121    # simpledb_host looks useless, legacy? to remove?
122    simpledb_host = u"sdb.amazonaws.com"
123    cloudfront_host = u"cloudfront.amazonaws.com"
124    verbosity = logging.WARNING
125    progress_meter = sys.stdout.isatty()
126    progress_class = Progress.ProgressCR
127    send_chunk = 64 * 1024
128    recv_chunk = 64 * 1024
129    list_md5 = False
130    long_listing = False
131    human_readable_sizes = False
132    extra_headers = SortedDict(ignore_case = True)
133    force = False
134    server_side_encryption = False
135    enable = None
136    get_continue = False
137    put_continue = False
138    upload_id = u""
139    skip_existing = False
140    recursive = False
141    restore_days = 1
142    restore_priority = u"Standard"
143    acl_public = None
144    acl_grants = []
145    acl_revokes = []
146    proxy_host = u""
147    proxy_port = 3128
148    encrypt = False
149    dry_run = False
150    add_encoding_exts = u""
151    preserve_attrs = True
152    preserve_attrs_list = [
153        u'uname',    # Verbose owner Name (e.g. 'root')
154        u'uid',      # Numeric user ID (e.g. 0)
155        u'gname',    # Group name (e.g. 'users')
156        u'gid',      # Numeric group ID (e.g. 100)
157        u'atime',    # Last access timestamp
158        u'mtime',    # Modification timestamp
159        u'ctime',    # Creation timestamp
160        u'mode',     # File mode (e.g. rwxr-xr-x = 755)
161        u'md5',      # File MD5 (if known)
162        #u'acl',     # Full ACL (not yet supported)
163    ]
164    delete_removed = False
165    delete_after = False
166    delete_after_fetch = False
167    max_delete = -1
168    limit = -1
169    _doc['delete_removed'] = u"[sync] Remove remote S3 objects when local file has been deleted"
170    delay_updates = False  # OBSOLETE
171    gpg_passphrase = u""
172    gpg_command = u""
173    gpg_encrypt = u"%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s"
174    gpg_decrypt = u"%(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s"
175    use_https = True
176    ca_certs_file = u""
177    ssl_client_key_file = u""
178    ssl_client_cert_file = u""
179    check_ssl_certificate = True
180    check_ssl_hostname = True
181    bucket_location = u"US"
182    default_mime_type = u"binary/octet-stream"
183    guess_mime_type = True
184    use_mime_magic = True
185    mime_type = u""
186    enable_multipart = True
187    # Chunk size is at the same time the chunk size and the threshold
188    multipart_chunk_size_mb = 15    # MiB
189    # Maximum chunk size for s3-to-s3 copy is 5 GiB.
190    # But, use a lot lower value by default (1GiB)
191    multipart_copy_chunk_size_mb = 1 * 1024
192    # Maximum chunks on AWS S3, could be different on other S3-compatible APIs
193    multipart_max_chunks = 10000
194    # List of checks to be performed for 'sync'
195    sync_checks = ['size', 'md5']   # 'weak-timestamp'
196    # List of compiled REGEXPs
197    exclude = []
198    include = []
199    # Dict mapping compiled REGEXPs back to their textual form
200    debug_exclude = {}
201    debug_include = {}
202    encoding = locale.getpreferredencoding() or "UTF-8"
203    urlencoding_mode = u"normal"
204    log_target_prefix = u""
205    reduced_redundancy = False
206    storage_class = u""
207    follow_symlinks = False
208    # If too big, this value can be overriden by the OS socket timeouts max values.
209    # For example, on Linux, a connection attempt will automatically timeout after 120s.
210    socket_timeout = 300
211    invalidate_on_cf = False
212    # joseprio: new flags for default index invalidation
213    invalidate_default_index_on_cf = False
214    invalidate_default_index_root_on_cf = True
215    website_index = u"index.html"
216    website_error = u""
217    website_endpoint = u"http://%(bucket)s.s3-website-%(location)s.amazonaws.com/"
218    additional_destinations = []
219    files_from = []
220    cache_file = u""
221    add_headers = u""
222    remove_headers = []
223    expiry_days = u""
224    expiry_date = u""
225    expiry_prefix = u""
226    signature_v2 = False
227    limitrate = 0
228    requester_pays = False
229    stop_on_error = False
230    content_disposition = u""
231    content_type = u""
232    stats = False
233    # Disabled by default because can create a latency with a CONTINUE status reply
234    # expected for every send file requests.
235    use_http_expect = False
236    signurl_use_https = False
237    # Maximum sleep duration for throtte / limitrate.
238    # s3 will timeout if a request/transfer is stuck for more than a short time
239    throttle_max = 100
240    public_url_use_https = False
241    connection_pooling = True
242    # How long in seconds a connection can be kept idle in the pool and still
243    # be alive. AWS s3 is supposed to close connections that are idle for 20
244    # seconds or more, but in real life, undocumented, it closes https conns
245    # after around 6s of inactivity.
246    connection_max_age = 5
248    ## Creating a singleton
249    def __new__(self, configfile = None, access_key=None, secret_key=None, access_token=None):
250        if self._instance is None:
251            self._instance = object.__new__(self)
252        return self._instance
254    def __init__(self, configfile = None, access_key=None, secret_key=None, access_token=None):
255        if configfile:
256            try:
257                self.read_config_file(configfile)
258            except IOError:
259                if 'AWS_CREDENTIAL_FILE' in os.environ or 'AWS_PROFILE' in os.environ:
260                    self.aws_credential_file()
262            # override these if passed on the command-line
263            if access_key and secret_key:
264                self.access_key = access_key
265                self.secret_key = secret_key
266            if access_token:
267                self.access_token = access_token
268                # Do not refresh the IAM role when an access token is provided.
269                self._access_token_refresh = False
271            if len(self.access_key) == 0:
272                env_access_key = os.getenv('AWS_ACCESS_KEY') or os.getenv('AWS_ACCESS_KEY_ID')
273                env_secret_key = os.getenv('AWS_SECRET_KEY') or os.getenv('AWS_SECRET_ACCESS_KEY')
274                env_access_token = os.getenv('AWS_SESSION_TOKEN') or os.getenv('AWS_SECURITY_TOKEN')
275                if env_access_key:
276                    if not env_secret_key:
277                        raise ValueError(
278                            "AWS_ACCESS_KEY environment variable is used but"
279                            " AWS_SECRET_KEY variable is missing"
280                        )
281                    # py3 getenv returns unicode and py2 returns bytes.
282                    self.access_key = base_unicodise(env_access_key)
283                    self.secret_key = base_unicodise(env_secret_key)
284                    if env_access_token:
285                        # Do not refresh the IAM role when an access token is provided.
286                        self._access_token_refresh = False
287                        self.access_token = base_unicodise(env_access_token)
288                else:
289                    self.role_config()
291            #TODO check KMS key is valid
292            if self.kms_key and self.server_side_encryption == True:
293                warning('Cannot have server_side_encryption (S3 SSE) and KMS_key set (S3 KMS). KMS encryption will be used. Please set server_side_encryption to False')
294            if self.kms_key and self.signature_v2 == True:
295                raise Exception('KMS encryption requires signature v4. Please set signature_v2 to False')
297    def role_config(self):
298        """
299        Get credentials from IAM authentication and STS AssumeRole
300        """
301        try:
302            role_arn = os.environ.get('AWS_ROLE_ARN')
303            if role_arn:
304                role_session_name = 'role-session-%s' % (int(time.time()))
305                params = {
306                    'Action': 'AssumeRole',
307                    'Version': '2011-06-15',
308                    'RoleArn': role_arn,
309                    'RoleSessionName': role_session_name,
310                }
311                web_identity_token_file = os.environ.get('AWS_WEB_IDENTITY_TOKEN_FILE')
312                if web_identity_token_file:
313                    with open(web_identity_token_file) as f:
314                        web_identity_token = f.read()
315                    params['Action'] = 'AssumeRoleWithWebIdentity'
316                    params['WebIdentityToken'] = web_identity_token
317                encoded_params = '&'.join([
318                    '%s=%s' % (k, s3_quote(v, unicode_output=True))
319                    for k, v in params.items()
320                ])
321                conn = httplib.HTTPSConnection(host='sts.amazonaws.com',
322                                               timeout=2)
323                conn.request('POST', '/?' + encoded_params)
324                resp = conn.getresponse()
325                resp_content = resp.read()
326                if resp.status == 200 and len(resp_content) > 1:
327                    tree = getTreeFromXml(resp_content)
328                    result_dict = getDictFromTree(tree)
329                    if tree.tag == "AssumeRoleResponse":
330                        creds = result_dict['AssumeRoleResult']['Credentials']
331                    elif tree.tag == "AssumeRoleWithWebIdentityResponse":
332                        creds = result_dict['AssumeRoleWithWebIdentityResult']['Credentials']
333                    else:
334                        raise IOError("Unexpected XML message from STS server: <%s />" % tree.tag)
335                    Config().update_option('access_key', creds['AccessKeyId'])
336                    Config().update_option('secret_key', creds['SecretAccessKey'])
337                    Config().update_option('access_token', creds['SessionToken'])
338                    expiration = dateRFC822toPython(base_unicodise(creds['Expiration']))
339                    # Add a timedelta to prevent any expiration if the EC2 machine is not at the right date
340                    self._access_token_expiration = expiration - datetime.timedelta(minutes=15)
341                    # last update date is not provided in STS responses
342                    self._access_token_last_update = datetime.datetime.now(dateutil.tz.tzutc())
343                    # Others variables : Code / Type
344                else:
345                    raise IOError
346            else:
347                conn = httplib.HTTPConnection(host='',
348                                              timeout=2)
349                conn.request('GET', "/latest/meta-data/iam/security-credentials/")
350                resp = conn.getresponse()
351                files = resp.read()
352                if resp.status == 200 and len(files) > 1:
353                    conn.request('GET', "/latest/meta-data/iam/security-credentials/%s" % files.decode('utf-8'))
354                    resp=conn.getresponse()
355                    if resp.status == 200:
356                        resp_content = base_unicodise(resp.read())
357                        creds=json.loads(resp_content)
358                        Config().update_option('access_key', base_unicodise(creds['AccessKeyId']))
359                        Config().update_option('secret_key', base_unicodise(creds['SecretAccessKey']))
360                        Config().update_option('access_token', base_unicodise(creds['Token']))
361                        expiration = dateRFC822toPython(base_unicodise(creds['Expiration']))
362                        # Add a timedelta to prevent any expiration if the EC2 machine is not at the right date
363                        self._access_token_expiration = expiration - datetime.timedelta(minutes=15)
364                        self._access_token_last_update = dateRFC822toPython(base_unicodise(creds['LastUpdated']))
365                        # Others variables : Code / Type
366                    else:
367                        raise IOError
368                else:
369                    raise IOError
370        except:
371            raise
373    def role_refresh(self):
374        if self._access_token_refresh:
375            now = datetime.datetime.now(dateutil.tz.tzutc())
376            if self._access_token_expiration \
377               and now < self._access_token_expiration \
378               and self._access_token_last_update \
379               and self._access_token_last_update <= now:
380                # current token is still valid. No need to refresh it
381                return
382            try:
383                self.role_config()
384            except Exception:
385                warning("Could not refresh role")
387    def aws_credential_file(self):
388        try:
389            aws_credential_file = os.path.expanduser('~/.aws/credentials')
390            credential_file_from_env = os.environ.get('AWS_CREDENTIAL_FILE')
391            if credential_file_from_env and \
392               os.path.isfile(credential_file_from_env):
393                aws_credential_file = base_unicodise(credential_file_from_env)
394            elif not os.path.isfile(aws_credential_file):
395                return
397            config = PyConfigParser()
399            debug("Reading AWS credentials from %s" % (aws_credential_file))
400            with io.open(aws_credential_file, "r",
401                         encoding=getattr(self, 'encoding', 'UTF-8')) as fp:
402                config_string = fp.read()
403            try:
404                try:
405                    # readfp is replaced by read_file in python3,
406                    # but so far readfp it is still available.
407                    config.readfp(io.StringIO(config_string))
408                except MissingSectionHeaderError:
409                    # if header is missing, this could be deprecated
410                    # credentials file format as described here:
411                    # https://blog.csanchez.org/2011/05/
412                    # then do the hacky-hack and add default header
413                    # to be able to read the file with PyConfigParser()
414                    config_string = u'[default]\n' + config_string
415                    config.readfp(io.StringIO(config_string))
416            except ParsingError as exc:
417                raise ValueError(
418                    "Error reading aws_credential_file "
419                    "(%s): %s" % (aws_credential_file, str(exc)))
421            profile = base_unicodise(os.environ.get('AWS_PROFILE', "default"))
422            debug("Using AWS profile '%s'" % (profile))
424            # get_key - helper function to read the aws profile credentials
425            # including the legacy ones as described here:
426            # https://blog.csanchez.org/2011/05/
427            def get_key(profile, key, legacy_key, print_warning=True):
428                result = None
430                try:
431                    result = config.get(profile, key)
432                except NoOptionError as e:
433                    # we may want to skip warning message for optional keys
434                    if print_warning:
435                        warning("Couldn't find key '%s' for the AWS Profile "
436                                "'%s' in the credentials file '%s'",
437                                e.option, e.section, aws_credential_file)
438                    # if the legacy_key defined and original one wasn't found,
439                    # try read the legacy_key
440                    if legacy_key:
441                        try:
442                            key = legacy_key
443                            profile = "default"
444                            result = config.get(profile, key)
445                            warning(
446                                "Legacy configuratin key '%s' used, please use"
447                                " the standardized config format as described "
448                                "here: https://aws.amazon.com/blogs/security/a-new-and-standardized-way-to-manage-credentials-in-the-aws-sdks/",
449                                key)
450                        except NoOptionError as e:
451                            pass
453                if result:
454                    debug("Found the configuration option '%s' for the AWS "
455                          "Profile '%s' in the credentials file %s",
456                          key, profile, aws_credential_file)
457                return result
459            profile_access_key = get_key(profile, "aws_access_key_id",
460                                         "AWSAccessKeyId")
461            if profile_access_key:
462                Config().update_option('access_key',
463                                       base_unicodise(profile_access_key))
465            profile_secret_key = get_key(profile, "aws_secret_access_key",
466                                         "AWSSecretKey")
467            if profile_secret_key:
468                Config().update_option('secret_key',
469                                       base_unicodise(profile_secret_key))
471            profile_access_token = get_key(profile, "aws_session_token", None,
472                                           False)
473            if profile_access_token:
474                Config().update_option('access_token',
475                                       base_unicodise(profile_access_token))
477        except IOError as e:
478            warning("Errno %d accessing credentials file %s", e.errno,
479                    aws_credential_file)
480        except NoSectionError as e:
481            warning("Couldn't find AWS Profile '%s' in the credentials file "
482                    "'%s'", profile, aws_credential_file)
484    def option_list(self):
485        retval = []
486        for option in dir(self):
487            ## Skip attributes that start with underscore or are not string, int or bool
488            option_type = type(getattr(Config, option))
489            if option.startswith("_") or \
490               not (option_type in (
491                    type(u"string"), # str
492                        type(42),   # int
493                    type(True))):   # bool
494                continue
495            retval.append(option)
496        return retval
498    def read_config_file(self, configfile):
499        cp = ConfigParser(configfile)
500        for option in self.option_list():
501            _option = cp.get(option)
502            if _option is not None:
503                _option = _option.strip()
504            self.update_option(option, _option)
506        # allow acl_public to be set from the config file too, even though by
507        # default it is set to None, and not present in the config file.
508        if cp.get('acl_public'):
509            self.update_option('acl_public', cp.get('acl_public'))
511        if cp.get('add_headers'):
512            for option in cp.get('add_headers').split(","):
513                (key, value) = option.split(':', 1)
514                self.extra_headers[key.strip()] = value.strip()
516        self._parsed_files.append(configfile)
518    def dump_config(self, stream):
519        ConfigDumper(stream).dump(u"default", self)
521    def update_option(self, option, value):
522        if value is None:
523            return
525        #### Handle environment reference
526        if unicode(value).startswith("$"):
527            return self.update_option(option, os.getenv(value[1:]))
529        #### Special treatment of some options
530        ## verbosity must be known to "logging" module
531        if option == "verbosity":
532            # support integer verboisities
533            try:
534                value = int(value)
535            except ValueError:
536                try:
537                    # otherwise it must be a key known to the logging module
538                    try:
539                        # python 3 support
540                        value = logging._levelNames[value]
541                    except AttributeError:
542                        value = logging._nameToLevel[value]
543                except KeyError:
544                    raise ValueError("Config: verbosity level '%s' is not valid" % value)
546        elif option == "limitrate":
547            #convert kb,mb to bytes
548            if value.endswith("k") or value.endswith("K"):
549                shift = 10
550            elif value.endswith("m") or value.endswith("M"):
551                shift = 20
552            else:
553                shift = 0
554            try:
555                value = shift and int(value[:-1]) << shift or int(value)
556            except Exception:
557                raise ValueError("Config: value of option %s must have suffix m, k, or nothing, not '%s'" % (option, value))
559        ## allow yes/no, true/false, on/off and 1/0 for boolean options
560        ## Some options default to None, if that's the case check the value to see if it is bool
561        elif (type(getattr(Config, option)) is type(True) or              # Config is bool
562              (getattr(Config, option) is None and is_bool(value))):  # Config is None and value is bool
563            if is_bool_true(value):
564                value = True
565            elif is_bool_false(value):
566                value = False
567            else:
568                raise ValueError("Config: value of option '%s' must be Yes or No, not '%s'" % (option, value))
570        elif type(getattr(Config, option)) is type(42):     # int
571            try:
572                value = int(value)
573            except ValueError:
574                raise ValueError("Config: value of option '%s' must be an integer, not '%s'" % (option, value))
576        elif option in ["host_base", "host_bucket", "cloudfront_host"]:
577            if value.startswith("http://"):
578                value = value[7:]
579            elif value.startswith("https://"):
580                value = value[8:]
583        setattr(Config, option, value)
585class ConfigParser(object):
586    def __init__(self, file, sections = []):
587        self.cfg = {}
588        self.parse_file(file, sections)
590    def parse_file(self, file, sections = []):
591        debug("ConfigParser: Reading file '%s'" % file)
592        if type(sections) != type([]):
593            sections = [sections]
594        in_our_section = True
595        r_comment = re.compile(r'^\s*#.*')
596        r_empty = re.compile(r'^\s*$')
597        r_section = re.compile(r'^\[([^\]]+)\]')
598        r_data = re.compile(r'^\s*(?P<key>\w+)\s*=\s*(?P<value>.*)')
599        r_quotes = re.compile(r'^"(.*)"\s*$')
600        with io.open(file, "r", encoding=self.get('encoding', 'UTF-8')) as fp:
601            for line in fp:
602                if r_comment.match(line) or r_empty.match(line):
603                    continue
604                is_section = r_section.match(line)
605                if is_section:
606                    section = is_section.groups()[0]
607                    in_our_section = (section in sections) or (len(sections) == 0)
608                    continue
609                is_data = r_data.match(line)
610                if is_data and in_our_section:
611                    data = is_data.groupdict()
612                    if r_quotes.match(data["value"]):
613                        data["value"] = data["value"][1:-1]
614                    self.__setitem__(data["key"], data["value"])
615                    if data["key"] in ("access_key", "secret_key", "gpg_passphrase"):
616                        print_value = ("%s...%d_chars...%s") % (data["value"][:2], len(data["value"]) - 3, data["value"][-1:])
617                    else:
618                        print_value = data["value"]
619                    debug("ConfigParser: %s->%s" % (data["key"], print_value))
620                    continue
621                warning("Ignoring invalid line in '%s': %s" % (file, line))
623    def __getitem__(self, name):
624        return self.cfg[name]
626    def __setitem__(self, name, value):
627        self.cfg[name] = value
629    def get(self, name, default = None):
630        if name in self.cfg:
631            return self.cfg[name]
632        return default
634class ConfigDumper(object):
635    def __init__(self, stream):
636        self.stream = stream
638    def dump(self, section, config):
639        self.stream.write(u"[%s]\n" % section)
640        for option in config.option_list():
641            value = getattr(config, option)
642            if option == "verbosity":
643                # we turn level numbers back into strings if possible
644                if isinstance(value, int):
645                    try:
646                        try:
647                            # python 3 support
648                            value = logging._levelNames[value]
649                        except AttributeError:
650                            value = logging._levelToName[value]
651                    except KeyError:
652                        pass
653            self.stream.write(u"%s = %s\n" % (option, value))
655# vim:et:ts=4:sts=4:ai