1#!/usr/bin/python
2# Copyright 2016 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Retrieve and store user provided metadata scripts."""
17
18import functools
19import re
20import socket
21import tempfile
22import time
23
24from google_compute_engine import metadata_watcher
25from google_compute_engine.compat import httpclient
26from google_compute_engine.compat import urlerror
27from google_compute_engine.compat import urlrequest
28from google_compute_engine.compat import urlretrieve
29
30
31def _RetryOnUnavailable(func):
32  """Function decorator template to retry on a service unavailable exception."""
33
34  @functools.wraps(func)
35  def Wrapper(*args, **kwargs):
36    final_exception = None
37    for _ in range(3):
38      try:
39        response = func(*args, **kwargs)
40      except (httpclient.HTTPException, socket.error, urlerror.URLError) as e:
41        final_exception = e
42        time.sleep(5)
43        continue
44      else:
45        return response
46    raise final_exception
47  return Wrapper
48
49
50@_RetryOnUnavailable
51def _UrlOpenWithRetry(request):
52  """Call urlopen with retry."""
53  return urlrequest.urlopen(request)
54
55
56@_RetryOnUnavailable
57def _UrlRetrieveWithRetry(url, dest):
58  """Call urlretrieve with retry."""
59  return urlretrieve.urlretrieve(url, dest)
60
61
62class ScriptRetriever(object):
63  """A class for retrieving and storing user provided metadata scripts."""
64  token_metadata_key = 'instance/service-accounts/default/token'
65  # Cached authentication token to be used when downloading from bucket.
66  token = None
67
68  def __init__(self, logger, script_type):
69    """Constructor.
70
71    Args:
72      logger: logger object, used to write to SysLog and serial port.
73      script_type: string, the metadata script type to run.
74    """
75    self.logger = logger
76    self.script_type = script_type
77    self.watcher = metadata_watcher.MetadataWatcher(logger=self.logger)
78
79  def _DownloadAuthUrl(self, url, dest_dir):
80    """Download a Google Storage URL using an authentication token.
81
82    If the token cannot be fetched, fallback to unauthenticated download.
83
84    Args:
85      url: string, the URL to download.
86      dest_dir: string, the path to a directory for storing metadata scripts.
87
88    Returns:
89      string, the path to the file storing the metadata script.
90    """
91    dest_file = tempfile.NamedTemporaryFile(dir=dest_dir, delete=False)
92    dest_file.close()
93    dest = dest_file.name
94
95    self.logger.info(
96        'Downloading url from %s to %s using authentication token.', url, dest)
97
98    if not self.token:
99      response = self.watcher.GetMetadata(
100          self.token_metadata_key, recursive=False, retry=False)
101
102      if not response:
103        self.logger.info(
104            'Authentication token not found. Attempting unauthenticated '
105            'download.')
106        return self._DownloadUrl(url, dest_dir)
107
108      self.token = '%s %s' % (
109          response.get('token_type', ''), response.get('access_token', ''))
110
111    try:
112      request = urlrequest.Request(url)
113      request.add_unredirected_header('Metadata-Flavor', 'Google')
114      request.add_unredirected_header('Authorization', self.token)
115      content = _UrlOpenWithRetry(request).read().decode('utf-8')
116    except Exception as e:
117      self.logger.warning('Could not download %s. %s.', url, str(e))
118      return None
119
120    with open(dest, 'w') as f:
121      f.write(content)
122
123    return dest
124
125  def _DownloadUrl(self, url, dest_dir):
126    """Download a script from a given URL.
127
128    Args:
129      url: string, the URL to download.
130      dest_dir: string, the path to a directory for storing metadata scripts.
131
132    Returns:
133      string, the path to the file storing the metadata script.
134    """
135    dest_file = tempfile.NamedTemporaryFile(dir=dest_dir, delete=False)
136    dest_file.close()
137    dest = dest_file.name
138
139    self.logger.info('Downloading url from %s to %s.', url, dest)
140    try:
141      _UrlRetrieveWithRetry(url, dest)
142      return dest
143    except (httpclient.HTTPException, socket.error, urlerror.URLError) as e:
144      self.logger.warning('Could not download %s. %s.', url, str(e))
145    except Exception as e:
146      self.logger.warning('Exception downloading %s. %s.', url, str(e))
147    return None
148
149  def _DownloadScript(self, url, dest_dir):
150    """Download the contents of the URL to the destination.
151
152    Args:
153      url: string, the URL to download.
154      dest_dir: string, the path to a directory for storing metadata scripts.
155
156    Returns:
157      string, the path to the file storing the metadata script.
158    """
159    # Check for the preferred Google Storage URL format:
160    # gs://<bucket>/<object>
161    if url.startswith(r'gs://'):
162      # Convert the string into a standard URL.
163      url = re.sub('^gs://', 'https://storage.googleapis.com/', url)
164      return self._DownloadAuthUrl(url, dest_dir)
165
166    header = r'http[s]?://'
167    domain = r'storage\.googleapis\.com'
168
169    # Many of the Google Storage URLs are supported below.
170    # It is prefered that customers specify their object using
171    # its gs://<bucket>/<object> url.
172    bucket = r'(?P<bucket>[a-z0-9][-_.a-z0-9]*[a-z0-9])'
173
174    # Accept any non-empty string that doesn't contain a wildcard character
175    obj = r'(?P<obj>[^\*\?]+)'
176
177    # Check for the Google Storage URLs:
178    # http://<bucket>.storage.googleapis.com/<object>
179    # https://<bucket>.storage.googleapis.com/<object>
180    gs_regex = re.compile(r'\A%s%s\.%s/%s\Z' % (header, bucket, domain, obj))
181    match = gs_regex.match(url)
182    if match:
183      return self._DownloadAuthUrl(url, dest_dir)
184
185    # Check for the other possible Google Storage URLs:
186    # http://storage.googleapis.com/<bucket>/<object>
187    # https://storage.googleapis.com/<bucket>/<object>
188    #
189    # The following are deprecated but checked:
190    # http://commondatastorage.googleapis.com/<bucket>/<object>
191    # https://commondatastorage.googleapis.com/<bucket>/<object>
192    gs_regex = re.compile(
193        r'\A%s(commondata)?%s/%s/%s\Z' % (header, domain, bucket, obj))
194    match = gs_regex.match(url)
195    if match:
196      return self._DownloadAuthUrl(url, dest_dir)
197
198    # Unauthenticated download of the object.
199    return self._DownloadUrl(url, dest_dir)
200
201  def _GetAttributeScripts(self, attribute_data, dest_dir):
202    """Retrieve the scripts from attribute metadata.
203
204    Args:
205      attribute_data: dict, the contents of the attributes metadata.
206      dest_dir: string, the path to a directory for storing metadata scripts.
207
208    Returns:
209      dict, a dictionary mapping metadata keys to files storing scripts.
210    """
211    script_dict = {}
212    attribute_data = attribute_data or {}
213    metadata_key = '%s-script' % self.script_type
214    metadata_value = attribute_data.get(metadata_key)
215    if metadata_value:
216      self.logger.info('Found %s in metadata.', metadata_key)
217      with tempfile.NamedTemporaryFile(
218          mode='w', dir=dest_dir, delete=False) as dest:
219        dest.write(metadata_value.lstrip())
220        script_dict[metadata_key] = dest.name
221
222    metadata_key = '%s-script-url' % self.script_type
223    metadata_value = attribute_data.get(metadata_key)
224    if metadata_value:
225      self.logger.info('Found %s in metadata.', metadata_key)
226      downloaded_dest = self._DownloadScript(metadata_value, dest_dir)
227      if downloaded_dest is None:
228        self.logger.warning('Failed to download metadata script.')
229      script_dict[metadata_key] = downloaded_dest
230
231    return script_dict
232
233  def GetScripts(self, dest_dir):
234    """Retrieve the scripts to execute.
235
236    Args:
237      dest_dir: string, the path to a directory for storing metadata scripts.
238
239    Returns:
240      dict, a dictionary mapping set metadata keys with associated scripts.
241    """
242    metadata_dict = self.watcher.GetMetadata() or {}
243
244    try:
245      instance_data = metadata_dict['instance']['attributes']
246    except KeyError:
247      instance_data = None
248      self.logger.warning('Instance attributes were not found.')
249
250    try:
251      project_data = metadata_dict['project']['attributes']
252    except KeyError:
253      project_data = None
254      self.logger.warning('Project attributes were not found.')
255
256    return (self._GetAttributeScripts(instance_data, dest_dir)
257            or self._GetAttributeScripts(project_data, dest_dir))
258