1#!/usr/bin/python
2# Copyright (c) 2012 The Native Client Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Provide Google Storage access.
7
8Provide an high-level interface to Google Storage.
9Operations are provided to read/write whole files and to
10read/write strings. This allows google storage to be treated
11more or less like a key+value data-store.
12"""
13
14
15import logging
16import os
17import posixpath
18import random
19import re
20import shutil
21import subprocess
22import string
23import sys
24import tempfile
25
26import pynacl.file_tools
27import pynacl.http_download
28import pynacl.platform
29
30
31GS_PATTERN = 'gs://%s'
32GS_HTTPS_PATTERN = 'https://storage.googleapis.com/%s'
33
34
35def LegalizeName(name):
36  """ Return a file name suitable for uploading to Google Storage.
37
38  The names of such files cannot contain dashes or other non-identifier
39  characters.
40  """
41  return re.sub(r'[^A-Za-z0-9_/.]', '_', name)
42
43
44def HttpDownload(url, target):
45  """Default download route."""
46  pynacl.http_download.HttpDownload(
47      url, os.path.abspath(target), verbose=False, logger=logging.debug)
48
49
50class GSDStorageError(Exception):
51  """Error indicating writing to storage failed."""
52  pass
53
54
55class GSDStorage(object):
56  """A wrapper for reading and writing to GSD buckets.
57
58  Multiple read buckets may be specified, and the wrapper will sequentially try
59  each and fall back to the next if the previous fails.
60  Writing is to a single bucket.
61  """
62  def __init__(self,
63               write_bucket,
64               read_buckets,
65               gsutil=None,
66               call=subprocess.call,
67               download=HttpDownload):
68    """Init for this class.
69
70    Args:
71      write_bucket: Google storage location to write to.
72      read_buckets: Google storage locations to read from in preferred order.
73      gsutil: List of cmd components needed to call gsutil.
74      call: Testing hook to intercept command invocation.
75      download: Testing hook to intercept download.
76    """
77    if gsutil is None:
78      gsutil_script = pynacl.platform.CygPath(
79          os.environ.get('GSUTIL', 'gsutil'))
80      try:
81        # Require that gsutil be Python if it is specified in the environment.
82        gsutil = [
83            sys.executable,
84            pynacl.file_tools.Which(gsutil_script, require_executable=False)
85        ]
86      except pynacl.file_tools.ExecutableNotFound:
87        gsutil = [gsutil_script]
88    assert isinstance(gsutil, list)
89    assert isinstance(read_buckets, list)
90    self._gsutil = gsutil
91    self._write_bucket = write_bucket
92    self._read_buckets = read_buckets
93    self._call = call
94    self._download = download
95
96  def Exists(self, key):
97    """Queries whether or not a key exists.
98
99    Args:
100      key: Key file is stored under.
101    Returns:
102      URL of existing key, or False if file does not exist.
103    """
104    for bucket in set(self._read_buckets + [self._write_bucket]):
105      obj = posixpath.join(bucket, key)
106      cmd = self._gsutil + ['ls', GS_PATTERN % obj]
107      logging.info('Running: %s', str(cmd))
108      if self._call(cmd) == 0:
109        return GS_HTTPS_PATTERN % obj
110
111    return False
112
113  def PutFile(self, path, key, clobber=True):
114    """Write a file to global storage.
115
116    Args:
117      path: Path of the file to write.
118      key: Key to store file under.
119    Raises:
120      GSDStorageError if the underlying storage fails.
121    Returns:
122      URL written to.
123    """
124    if self._write_bucket is None:
125      raise GSDStorageError('no bucket when storing %s to %s' % (path, key))
126    obj = self._write_bucket + '/' + key
127    cp_cmd = ['cp', '-a', 'public-read']
128    if not clobber:
129      cp_cmd.append('-n')
130
131    gs_path = GS_PATTERN % obj
132    gs_tmp_path = gs_path + '.tmp.' + ''.join(
133        random.choice(string.lowercase) for x in range(10))
134    # Using file://c:/foo/bar form of path as gsutil does not like drive
135    # letters without it.
136    file_path ='file://' + os.path.abspath(path).replace(os.sep, '/')
137
138    # Store to temporary location.
139    cmd = self._gsutil + cp_cmd + [file_path, gs_tmp_path]
140    logging.info('Running: %s' % str(cmd))
141    if self._call(cmd) != 0:
142      raise GSDStorageError('failed when storing %s to %s (%s)' % (
143        path, key, cmd))
144
145    # Copy to final location (so the window of time is short).
146    cmd = self._gsutil + cp_cmd + [gs_tmp_path, gs_path]
147    logging.info('Running: %s' % str(cmd))
148    if self._call(cmd) != 0:
149      raise GSDStorageError('failed when storing %s to %s (%s)' % (
150        path, key, cmd))
151
152    # Cleanup.
153    cmd = self._gsutil + ['rm', gs_tmp_path]
154    logging.info('Running: %s' % str(cmd))
155    if self._call(cmd) != 0:
156      raise GSDStorageError('failed when storing %s to %s (%s)' % (
157        path, key, cmd))
158
159    return GS_HTTPS_PATTERN % obj
160
161  def PutData(self, data, key, clobber=True):
162    """Write data to global storage.
163
164    Args:
165      data: Data to store.
166      key: Key to store file under.
167    Raises:
168      GSDStorageError if the underlying storage fails.
169    Returns:
170      URL written to.
171    """
172    handle, path = tempfile.mkstemp(prefix='gdstore', suffix='.tmp')
173    try:
174      os.close(handle)
175      pynacl.file_tools.WriteFile(data, path)
176      return self.PutFile(path, key, clobber=clobber)
177    finally:
178      os.remove(path)
179
180  def GetFile(self, key, path):
181    """Read a file from global storage.
182
183    Args:
184      key: Key to store file under.
185      path: Destination filename.
186    Returns:
187      URL used on success or None for failure.
188    """
189    for bucket in self._read_buckets:
190      try:
191        obj = bucket + '/' + key
192        uri = GS_HTTPS_PATTERN % obj
193        logging.debug('Downloading: %s to %s' % (uri, path))
194        self._download(uri, path)
195        return uri
196      except:
197        logging.debug('Failed downloading: %s to %s' % (uri, path))
198    return None
199
200  def GetSecureFile(self, key, path):
201    """ Read a non-publicly-accessible file from global storage.
202
203    Args:
204      key: Key file is stored under.
205      path: Destination filename
206    Returns:
207      command used on success or None on failure.
208    """
209    for bucket in self._read_buckets:
210      try:
211        obj = bucket + '/' + key
212        cmd = self._gsutil + [
213            'cp', GS_PATTERN % obj,
214            'file://' + os.path.abspath(path).replace(os.sep, '/')]
215        logging.info('Running: %s' % str(cmd))
216        if self._call(cmd) == 0:
217          return cmd
218      except:
219        logging.debug('Failed to fetch %s from %s (%s)' % (key, path, cmd))
220    return None
221
222  def GetData(self, key):
223    """Read data from global storage.
224
225    Args:
226      key: Key to store file under.
227    Returns:
228      Data from storage, or None for failure.
229    """
230    work_dir = tempfile.mkdtemp(prefix='gdstore', suffix='.tmp')
231    try:
232      path = os.path.join(work_dir, 'data')
233      if self.GetFile(key, path) is not None:
234        return pynacl.file_tools.ReadFile(path)
235      return None
236    finally:
237      shutil.rmtree(work_dir)
238