1# -*- coding: utf-8 -*-
2
3## Amazon S3 manager
4## Author: Michal Ludvig <michal@logix.cz>
5##         http://www.logix.cz/michal
6## License: GPL Version 2
7## Copyright: TGRMN Software and contributors
8
9from __future__ import absolute_import
10
11import logging
12import datetime
13import locale
14import re
15import os
16import io
17import sys
18import json
19import time
20
21from logging import debug, warning
22
23from .ExitCodes import EX_OSFILE
24
25try:
26    import dateutil.parser
27    import dateutil.tz
28except ImportError:
29    sys.stderr.write(u"""
30!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
31ImportError trying to import dateutil.parser and dateutil.tz.
32Please install the python dateutil module:
33$ sudo apt-get install python-dateutil
34  or
35$ sudo yum install python-dateutil
36  or
37$ pip install python-dateutil
38!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
39""")
40    sys.stderr.flush()
41    sys.exit(EX_OSFILE)
42
43try:
44    # python 3 support
45    import httplib
46except ImportError:
47    import http.client as httplib
48
49try:
50    from configparser import (NoOptionError, NoSectionError,
51                              MissingSectionHeaderError, ParsingError,
52                              ConfigParser as PyConfigParser)
53except ImportError:
54    # Python2 fallback code
55    from ConfigParser import (NoOptionError, NoSectionError,
56                              MissingSectionHeaderError, ParsingError,
57                              ConfigParser as PyConfigParser)
58
59from . import Progress
60from .SortedDict import SortedDict
61from .BaseUtils import (s3_quote, getTreeFromXml, getDictFromTree,
62                        base_unicodise, dateRFC822toPython)
63
64
65try:
66    unicode
67except NameError:
68    # python 3 support
69    # In python 3, unicode -> str, and str -> bytes
70    unicode = str
71
72
73def is_bool_true(value):
74    """Check to see if a string is true, yes, on, or 1
75
76    value may be a str, or unicode.
77
78    Return True if it is
79    """
80    if type(value) == unicode:
81        return value.lower() in ["true", "yes", "on", "1"]
82    elif type(value) == bool and value == True:
83        return True
84    else:
85        return False
86
87
88def is_bool_false(value):
89    """Check to see if a string is false, no, off, or 0
90
91    value may be a str, or unicode.
92
93    Return True if it is
94    """
95    if type(value) == unicode:
96        return value.lower() in ["false", "no", "off", "0"]
97    elif type(value) == bool and value == False:
98        return True
99    else:
100        return False
101
102
103def is_bool(value):
104    """Check a string value to see if it is bool"""
105    return is_bool_true(value) or is_bool_false(value)
106
107
108class Config(object):
109    _instance = None
110    _parsed_files = []
111    _doc = {}
112    access_key = u""
113    secret_key = u""
114    access_token = u""
115    _access_token_refresh = True
116    _access_token_expiration = None
117    _access_token_last_update = None
118    host_base = u"s3.amazonaws.com"
119    host_bucket = u"%(bucket)s.s3.amazonaws.com"
120    kms_key = u""    #can't set this and Server Side Encryption at the same time
121    # simpledb_host looks useless, legacy? to remove?
122    simpledb_host = u"sdb.amazonaws.com"
123    cloudfront_host = u"cloudfront.amazonaws.com"
124    verbosity = logging.WARNING
125    progress_meter = sys.stdout.isatty()
126    progress_class = Progress.ProgressCR
127    send_chunk = 64 * 1024
128    recv_chunk = 64 * 1024
129    list_md5 = False
130    long_listing = False
131    human_readable_sizes = False
132    extra_headers = SortedDict(ignore_case = True)
133    force = False
134    server_side_encryption = False
135    enable = None
136    get_continue = False
137    put_continue = False
138    upload_id = u""
139    skip_existing = False
140    recursive = False
141    restore_days = 1
142    restore_priority = u"Standard"
143    acl_public = None
144    acl_grants = []
145    acl_revokes = []
146    proxy_host = u""
147    proxy_port = 3128
148    encrypt = False
149    dry_run = False
150    add_encoding_exts = u""
151    preserve_attrs = True
152    preserve_attrs_list = [
153        u'uname',    # Verbose owner Name (e.g. 'root')
154        u'uid',      # Numeric user ID (e.g. 0)
155        u'gname',    # Group name (e.g. 'users')
156        u'gid',      # Numeric group ID (e.g. 100)
157        u'atime',    # Last access timestamp
158        u'mtime',    # Modification timestamp
159        u'ctime',    # Creation timestamp
160        u'mode',     # File mode (e.g. rwxr-xr-x = 755)
161        u'md5',      # File MD5 (if known)
162        #u'acl',     # Full ACL (not yet supported)
163    ]
164    delete_removed = False
165    delete_after = False
166    delete_after_fetch = False
167    max_delete = -1
168    limit = -1
169    _doc['delete_removed'] = u"[sync] Remove remote S3 objects when local file has been deleted"
170    delay_updates = False  # OBSOLETE
171    gpg_passphrase = u""
172    gpg_command = u""
173    gpg_encrypt = u"%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s"
174    gpg_decrypt = u"%(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s"
175    use_https = True
176    ca_certs_file = u""
177    ssl_client_key_file = u""
178    ssl_client_cert_file = u""
179    check_ssl_certificate = True
180    check_ssl_hostname = True
181    bucket_location = u"US"
182    default_mime_type = u"binary/octet-stream"
183    guess_mime_type = True
184    use_mime_magic = True
185    mime_type = u""
186    enable_multipart = True
187    # Chunk size is at the same time the chunk size and the threshold
188    multipart_chunk_size_mb = 15    # MiB
189    # Maximum chunk size for s3-to-s3 copy is 5 GiB.
190    # But, use a lot lower value by default (1GiB)
191    multipart_copy_chunk_size_mb = 1 * 1024
192    # Maximum chunks on AWS S3, could be different on other S3-compatible APIs
193    multipart_max_chunks = 10000
194    # List of checks to be performed for 'sync'
195    sync_checks = ['size', 'md5']   # 'weak-timestamp'
196    # List of compiled REGEXPs
197    exclude = []
198    include = []
199    # Dict mapping compiled REGEXPs back to their textual form
200    debug_exclude = {}
201    debug_include = {}
202    encoding = locale.getpreferredencoding() or "UTF-8"
203    urlencoding_mode = u"normal"
204    log_target_prefix = u""
205    reduced_redundancy = False
206    storage_class = u""
207    follow_symlinks = False
208    # If too big, this value can be overriden by the OS socket timeouts max values.
209    # For example, on Linux, a connection attempt will automatically timeout after 120s.
210    socket_timeout = 300
211    invalidate_on_cf = False
212    # joseprio: new flags for default index invalidation
213    invalidate_default_index_on_cf = False
214    invalidate_default_index_root_on_cf = True
215    website_index = u"index.html"
216    website_error = u""
217    website_endpoint = u"http://%(bucket)s.s3-website-%(location)s.amazonaws.com/"
218    additional_destinations = []
219    files_from = []
220    cache_file = u""
221    add_headers = u""
222    remove_headers = []
223    expiry_days = u""
224    expiry_date = u""
225    expiry_prefix = u""
226    signature_v2 = False
227    limitrate = 0
228    requester_pays = False
229    stop_on_error = False
230    content_disposition = u""
231    content_type = u""
232    stats = False
233    # Disabled by default because can create a latency with a CONTINUE status reply
234    # expected for every send file requests.
235    use_http_expect = False
236    signurl_use_https = False
237    # Maximum sleep duration for throtte / limitrate.
238    # s3 will timeout if a request/transfer is stuck for more than a short time
239    throttle_max = 100
240    public_url_use_https = False
241    connection_pooling = True
242    # How long in seconds a connection can be kept idle in the pool and still
243    # be alive. AWS s3 is supposed to close connections that are idle for 20
244    # seconds or more, but in real life, undocumented, it closes https conns
245    # after around 6s of inactivity.
246    connection_max_age = 5
247
248    ## Creating a singleton
249    def __new__(self, configfile = None, access_key=None, secret_key=None, access_token=None):
250        if self._instance is None:
251            self._instance = object.__new__(self)
252        return self._instance
253
254    def __init__(self, configfile = None, access_key=None, secret_key=None, access_token=None):
255        if configfile:
256            try:
257                self.read_config_file(configfile)
258            except IOError:
259                if 'AWS_CREDENTIAL_FILE' in os.environ or 'AWS_PROFILE' in os.environ:
260                    self.aws_credential_file()
261
262            # override these if passed on the command-line
263            if access_key and secret_key:
264                self.access_key = access_key
265                self.secret_key = secret_key
266            if access_token:
267                self.access_token = access_token
268                # Do not refresh the IAM role when an access token is provided.
269                self._access_token_refresh = False
270
271            if len(self.access_key) == 0:
272                env_access_key = os.getenv('AWS_ACCESS_KEY') or os.getenv('AWS_ACCESS_KEY_ID')
273                env_secret_key = os.getenv('AWS_SECRET_KEY') or os.getenv('AWS_SECRET_ACCESS_KEY')
274                env_access_token = os.getenv('AWS_SESSION_TOKEN') or os.getenv('AWS_SECURITY_TOKEN')
275                if env_access_key:
276                    if not env_secret_key:
277                        raise ValueError(
278                            "AWS_ACCESS_KEY environment variable is used but"
279                            " AWS_SECRET_KEY variable is missing"
280                        )
281                    # py3 getenv returns unicode and py2 returns bytes.
282                    self.access_key = base_unicodise(env_access_key)
283                    self.secret_key = base_unicodise(env_secret_key)
284                    if env_access_token:
285                        # Do not refresh the IAM role when an access token is provided.
286                        self._access_token_refresh = False
287                        self.access_token = base_unicodise(env_access_token)
288                else:
289                    self.role_config()
290
291            #TODO check KMS key is valid
292            if self.kms_key and self.server_side_encryption == True:
293                warning('Cannot have server_side_encryption (S3 SSE) and KMS_key set (S3 KMS). KMS encryption will be used. Please set server_side_encryption to False')
294            if self.kms_key and self.signature_v2 == True:
295                raise Exception('KMS encryption requires signature v4. Please set signature_v2 to False')
296
297    def role_config(self):
298        """
299        Get credentials from IAM authentication and STS AssumeRole
300        """
301        try:
302            role_arn = os.environ.get('AWS_ROLE_ARN')
303            if role_arn:
304                role_session_name = 'role-session-%s' % (int(time.time()))
305                params = {
306                    'Action': 'AssumeRole',
307                    'Version': '2011-06-15',
308                    'RoleArn': role_arn,
309                    'RoleSessionName': role_session_name,
310                }
311                web_identity_token_file = os.environ.get('AWS_WEB_IDENTITY_TOKEN_FILE')
312                if web_identity_token_file:
313                    with open(web_identity_token_file) as f:
314                        web_identity_token = f.read()
315                    params['Action'] = 'AssumeRoleWithWebIdentity'
316                    params['WebIdentityToken'] = web_identity_token
317                encoded_params = '&'.join([
318                    '%s=%s' % (k, s3_quote(v, unicode_output=True))
319                    for k, v in params.items()
320                ])
321                conn = httplib.HTTPSConnection(host='sts.amazonaws.com',
322                                               timeout=2)
323                conn.request('POST', '/?' + encoded_params)
324                resp = conn.getresponse()
325                resp_content = resp.read()
326                if resp.status == 200 and len(resp_content) > 1:
327                    tree = getTreeFromXml(resp_content)
328                    result_dict = getDictFromTree(tree)
329                    if tree.tag == "AssumeRoleResponse":
330                        creds = result_dict['AssumeRoleResult']['Credentials']
331                    elif tree.tag == "AssumeRoleWithWebIdentityResponse":
332                        creds = result_dict['AssumeRoleWithWebIdentityResult']['Credentials']
333                    else:
334                        raise IOError("Unexpected XML message from STS server: <%s />" % tree.tag)
335                    Config().update_option('access_key', creds['AccessKeyId'])
336                    Config().update_option('secret_key', creds['SecretAccessKey'])
337                    Config().update_option('access_token', creds['SessionToken'])
338                    expiration = dateRFC822toPython(base_unicodise(creds['Expiration']))
339                    # Add a timedelta to prevent any expiration if the EC2 machine is not at the right date
340                    self._access_token_expiration = expiration - datetime.timedelta(minutes=15)
341                    # last update date is not provided in STS responses
342                    self._access_token_last_update = datetime.datetime.now(dateutil.tz.tzutc())
343                    # Others variables : Code / Type
344                else:
345                    raise IOError
346            else:
347                conn = httplib.HTTPConnection(host='169.254.169.254',
348                                              timeout=2)
349                conn.request('GET', "/latest/meta-data/iam/security-credentials/")
350                resp = conn.getresponse()
351                files = resp.read()
352                if resp.status == 200 and len(files) > 1:
353                    conn.request('GET', "/latest/meta-data/iam/security-credentials/%s" % files.decode('utf-8'))
354                    resp=conn.getresponse()
355                    if resp.status == 200:
356                        resp_content = base_unicodise(resp.read())
357                        creds=json.loads(resp_content)
358                        Config().update_option('access_key', base_unicodise(creds['AccessKeyId']))
359                        Config().update_option('secret_key', base_unicodise(creds['SecretAccessKey']))
360                        Config().update_option('access_token', base_unicodise(creds['Token']))
361                        expiration = dateRFC822toPython(base_unicodise(creds['Expiration']))
362                        # Add a timedelta to prevent any expiration if the EC2 machine is not at the right date
363                        self._access_token_expiration = expiration - datetime.timedelta(minutes=15)
364                        self._access_token_last_update = dateRFC822toPython(base_unicodise(creds['LastUpdated']))
365                        # Others variables : Code / Type
366                    else:
367                        raise IOError
368                else:
369                    raise IOError
370        except:
371            raise
372
373    def role_refresh(self):
374        if self._access_token_refresh:
375            now = datetime.datetime.now(dateutil.tz.tzutc())
376            if self._access_token_expiration \
377               and now < self._access_token_expiration \
378               and self._access_token_last_update \
379               and self._access_token_last_update <= now:
380                # current token is still valid. No need to refresh it
381                return
382            try:
383                self.role_config()
384            except Exception:
385                warning("Could not refresh role")
386
387    def aws_credential_file(self):
388        try:
389            aws_credential_file = os.path.expanduser('~/.aws/credentials')
390            credential_file_from_env = os.environ.get('AWS_CREDENTIAL_FILE')
391            if credential_file_from_env and \
392               os.path.isfile(credential_file_from_env):
393                aws_credential_file = base_unicodise(credential_file_from_env)
394            elif not os.path.isfile(aws_credential_file):
395                return
396
397            config = PyConfigParser()
398
399            debug("Reading AWS credentials from %s" % (aws_credential_file))
400            with io.open(aws_credential_file, "r",
401                         encoding=getattr(self, 'encoding', 'UTF-8')) as fp:
402                config_string = fp.read()
403            try:
404                try:
405                    # readfp is replaced by read_file in python3,
406                    # but so far readfp it is still available.
407                    config.readfp(io.StringIO(config_string))
408                except MissingSectionHeaderError:
409                    # if header is missing, this could be deprecated
410                    # credentials file format as described here:
411                    # https://blog.csanchez.org/2011/05/
412                    # then do the hacky-hack and add default header
413                    # to be able to read the file with PyConfigParser()
414                    config_string = u'[default]\n' + config_string
415                    config.readfp(io.StringIO(config_string))
416            except ParsingError as exc:
417                raise ValueError(
418                    "Error reading aws_credential_file "
419                    "(%s): %s" % (aws_credential_file, str(exc)))
420
421            profile = base_unicodise(os.environ.get('AWS_PROFILE', "default"))
422            debug("Using AWS profile '%s'" % (profile))
423
424            # get_key - helper function to read the aws profile credentials
425            # including the legacy ones as described here:
426            # https://blog.csanchez.org/2011/05/
427            def get_key(profile, key, legacy_key, print_warning=True):
428                result = None
429
430                try:
431                    result = config.get(profile, key)
432                except NoOptionError as e:
433                    # we may want to skip warning message for optional keys
434                    if print_warning:
435                        warning("Couldn't find key '%s' for the AWS Profile "
436                                "'%s' in the credentials file '%s'",
437                                e.option, e.section, aws_credential_file)
438                    # if the legacy_key defined and original one wasn't found,
439                    # try read the legacy_key
440                    if legacy_key:
441                        try:
442                            key = legacy_key
443                            profile = "default"
444                            result = config.get(profile, key)
445                            warning(
446                                "Legacy configuratin key '%s' used, please use"
447                                " the standardized config format as described "
448                                "here: https://aws.amazon.com/blogs/security/a-new-and-standardized-way-to-manage-credentials-in-the-aws-sdks/",
449                                key)
450                        except NoOptionError as e:
451                            pass
452
453                if result:
454                    debug("Found the configuration option '%s' for the AWS "
455                          "Profile '%s' in the credentials file %s",
456                          key, profile, aws_credential_file)
457                return result
458
459            profile_access_key = get_key(profile, "aws_access_key_id",
460                                         "AWSAccessKeyId")
461            if profile_access_key:
462                Config().update_option('access_key',
463                                       base_unicodise(profile_access_key))
464
465            profile_secret_key = get_key(profile, "aws_secret_access_key",
466                                         "AWSSecretKey")
467            if profile_secret_key:
468                Config().update_option('secret_key',
469                                       base_unicodise(profile_secret_key))
470
471            profile_access_token = get_key(profile, "aws_session_token", None,
472                                           False)
473            if profile_access_token:
474                Config().update_option('access_token',
475                                       base_unicodise(profile_access_token))
476
477        except IOError as e:
478            warning("Errno %d accessing credentials file %s", e.errno,
479                    aws_credential_file)
480        except NoSectionError as e:
481            warning("Couldn't find AWS Profile '%s' in the credentials file "
482                    "'%s'", profile, aws_credential_file)
483
484    def option_list(self):
485        retval = []
486        for option in dir(self):
487            ## Skip attributes that start with underscore or are not string, int or bool
488            option_type = type(getattr(Config, option))
489            if option.startswith("_") or \
490               not (option_type in (
491                    type(u"string"), # str
492                        type(42),   # int
493                    type(True))):   # bool
494                continue
495            retval.append(option)
496        return retval
497
498    def read_config_file(self, configfile):
499        cp = ConfigParser(configfile)
500        for option in self.option_list():
501            _option = cp.get(option)
502            if _option is not None:
503                _option = _option.strip()
504            self.update_option(option, _option)
505
506        # allow acl_public to be set from the config file too, even though by
507        # default it is set to None, and not present in the config file.
508        if cp.get('acl_public'):
509            self.update_option('acl_public', cp.get('acl_public'))
510
511        if cp.get('add_headers'):
512            for option in cp.get('add_headers').split(","):
513                (key, value) = option.split(':', 1)
514                self.extra_headers[key.strip()] = value.strip()
515
516        self._parsed_files.append(configfile)
517
518    def dump_config(self, stream):
519        ConfigDumper(stream).dump(u"default", self)
520
521    def update_option(self, option, value):
522        if value is None:
523            return
524
525        #### Handle environment reference
526        if unicode(value).startswith("$"):
527            return self.update_option(option, os.getenv(value[1:]))
528
529        #### Special treatment of some options
530        ## verbosity must be known to "logging" module
531        if option == "verbosity":
532            # support integer verboisities
533            try:
534                value = int(value)
535            except ValueError:
536                try:
537                    # otherwise it must be a key known to the logging module
538                    try:
539                        # python 3 support
540                        value = logging._levelNames[value]
541                    except AttributeError:
542                        value = logging._nameToLevel[value]
543                except KeyError:
544                    raise ValueError("Config: verbosity level '%s' is not valid" % value)
545
546        elif option == "limitrate":
547            #convert kb,mb to bytes
548            if value.endswith("k") or value.endswith("K"):
549                shift = 10
550            elif value.endswith("m") or value.endswith("M"):
551                shift = 20
552            else:
553                shift = 0
554            try:
555                value = shift and int(value[:-1]) << shift or int(value)
556            except Exception:
557                raise ValueError("Config: value of option %s must have suffix m, k, or nothing, not '%s'" % (option, value))
558
559        ## allow yes/no, true/false, on/off and 1/0 for boolean options
560        ## Some options default to None, if that's the case check the value to see if it is bool
561        elif (type(getattr(Config, option)) is type(True) or              # Config is bool
562              (getattr(Config, option) is None and is_bool(value))):  # Config is None and value is bool
563            if is_bool_true(value):
564                value = True
565            elif is_bool_false(value):
566                value = False
567            else:
568                raise ValueError("Config: value of option '%s' must be Yes or No, not '%s'" % (option, value))
569
570        elif type(getattr(Config, option)) is type(42):     # int
571            try:
572                value = int(value)
573            except ValueError:
574                raise ValueError("Config: value of option '%s' must be an integer, not '%s'" % (option, value))
575
576        elif option in ["host_base", "host_bucket", "cloudfront_host"]:
577            if value.startswith("http://"):
578                value = value[7:]
579            elif value.startswith("https://"):
580                value = value[8:]
581
582
583        setattr(Config, option, value)
584
585class ConfigParser(object):
586    def __init__(self, file, sections = []):
587        self.cfg = {}
588        self.parse_file(file, sections)
589
590    def parse_file(self, file, sections = []):
591        debug("ConfigParser: Reading file '%s'" % file)
592        if type(sections) != type([]):
593            sections = [sections]
594        in_our_section = True
595        r_comment = re.compile(r'^\s*#.*')
596        r_empty = re.compile(r'^\s*$')
597        r_section = re.compile(r'^\[([^\]]+)\]')
598        r_data = re.compile(r'^\s*(?P<key>\w+)\s*=\s*(?P<value>.*)')
599        r_quotes = re.compile(r'^"(.*)"\s*$')
600        with io.open(file, "r", encoding=self.get('encoding', 'UTF-8')) as fp:
601            for line in fp:
602                if r_comment.match(line) or r_empty.match(line):
603                    continue
604                is_section = r_section.match(line)
605                if is_section:
606                    section = is_section.groups()[0]
607                    in_our_section = (section in sections) or (len(sections) == 0)
608                    continue
609                is_data = r_data.match(line)
610                if is_data and in_our_section:
611                    data = is_data.groupdict()
612                    if r_quotes.match(data["value"]):
613                        data["value"] = data["value"][1:-1]
614                    self.__setitem__(data["key"], data["value"])
615                    if data["key"] in ("access_key", "secret_key", "gpg_passphrase"):
616                        print_value = ("%s...%d_chars...%s") % (data["value"][:2], len(data["value"]) - 3, data["value"][-1:])
617                    else:
618                        print_value = data["value"]
619                    debug("ConfigParser: %s->%s" % (data["key"], print_value))
620                    continue
621                warning("Ignoring invalid line in '%s': %s" % (file, line))
622
623    def __getitem__(self, name):
624        return self.cfg[name]
625
626    def __setitem__(self, name, value):
627        self.cfg[name] = value
628
629    def get(self, name, default = None):
630        if name in self.cfg:
631            return self.cfg[name]
632        return default
633
634class ConfigDumper(object):
635    def __init__(self, stream):
636        self.stream = stream
637
638    def dump(self, section, config):
639        self.stream.write(u"[%s]\n" % section)
640        for option in config.option_list():
641            value = getattr(config, option)
642            if option == "verbosity":
643                # we turn level numbers back into strings if possible
644                if isinstance(value, int):
645                    try:
646                        try:
647                            # python 3 support
648                            value = logging._levelNames[value]
649                        except AttributeError:
650                            value = logging._levelToName[value]
651                    except KeyError:
652                        pass
653            self.stream.write(u"%s = %s\n" % (option, value))
654
655# vim:et:ts=4:sts=4:ai
656