1#!/usr/bin/python 2# Copyright (c) 2012 The Native Client Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Provide Google Storage access. 7 8Provide an high-level interface to Google Storage. 9Operations are provided to read/write whole files and to 10read/write strings. This allows google storage to be treated 11more or less like a key+value data-store. 12""" 13 14 15import logging 16import os 17import posixpath 18import random 19import re 20import shutil 21import subprocess 22import string 23import sys 24import tempfile 25 26import pynacl.file_tools 27import pynacl.http_download 28import pynacl.platform 29 30 31GS_PATTERN = 'gs://%s' 32GS_HTTPS_PATTERN = 'https://storage.googleapis.com/%s' 33 34 35def LegalizeName(name): 36 """ Return a file name suitable for uploading to Google Storage. 37 38 The names of such files cannot contain dashes or other non-identifier 39 characters. 40 """ 41 return re.sub(r'[^A-Za-z0-9_/.]', '_', name) 42 43 44def HttpDownload(url, target): 45 """Default download route.""" 46 pynacl.http_download.HttpDownload( 47 url, os.path.abspath(target), verbose=False, logger=logging.debug) 48 49 50class GSDStorageError(Exception): 51 """Error indicating writing to storage failed.""" 52 pass 53 54 55class GSDStorage(object): 56 """A wrapper for reading and writing to GSD buckets. 57 58 Multiple read buckets may be specified, and the wrapper will sequentially try 59 each and fall back to the next if the previous fails. 60 Writing is to a single bucket. 61 """ 62 def __init__(self, 63 write_bucket, 64 read_buckets, 65 gsutil=None, 66 call=subprocess.call, 67 download=HttpDownload): 68 """Init for this class. 69 70 Args: 71 write_bucket: Google storage location to write to. 72 read_buckets: Google storage locations to read from in preferred order. 73 gsutil: List of cmd components needed to call gsutil. 74 call: Testing hook to intercept command invocation. 75 download: Testing hook to intercept download. 76 """ 77 if gsutil is None: 78 gsutil_script = pynacl.platform.CygPath( 79 os.environ.get('GSUTIL', 'gsutil')) 80 try: 81 # Require that gsutil be Python if it is specified in the environment. 82 gsutil = [ 83 sys.executable, 84 pynacl.file_tools.Which(gsutil_script, require_executable=False) 85 ] 86 except pynacl.file_tools.ExecutableNotFound: 87 gsutil = [gsutil_script] 88 assert isinstance(gsutil, list) 89 assert isinstance(read_buckets, list) 90 self._gsutil = gsutil 91 self._write_bucket = write_bucket 92 self._read_buckets = read_buckets 93 self._call = call 94 self._download = download 95 96 def Exists(self, key): 97 """Queries whether or not a key exists. 98 99 Args: 100 key: Key file is stored under. 101 Returns: 102 URL of existing key, or False if file does not exist. 103 """ 104 for bucket in set(self._read_buckets + [self._write_bucket]): 105 obj = posixpath.join(bucket, key) 106 cmd = self._gsutil + ['ls', GS_PATTERN % obj] 107 logging.info('Running: %s', str(cmd)) 108 if self._call(cmd) == 0: 109 return GS_HTTPS_PATTERN % obj 110 111 return False 112 113 def PutFile(self, path, key, clobber=True): 114 """Write a file to global storage. 115 116 Args: 117 path: Path of the file to write. 118 key: Key to store file under. 119 Raises: 120 GSDStorageError if the underlying storage fails. 121 Returns: 122 URL written to. 123 """ 124 if self._write_bucket is None: 125 raise GSDStorageError('no bucket when storing %s to %s' % (path, key)) 126 obj = self._write_bucket + '/' + key 127 cp_cmd = ['cp', '-a', 'public-read'] 128 if not clobber: 129 cp_cmd.append('-n') 130 131 gs_path = GS_PATTERN % obj 132 gs_tmp_path = gs_path + '.tmp.' + ''.join( 133 random.choice(string.lowercase) for x in range(10)) 134 # Using file://c:/foo/bar form of path as gsutil does not like drive 135 # letters without it. 136 file_path ='file://' + os.path.abspath(path).replace(os.sep, '/') 137 138 # Store to temporary location. 139 cmd = self._gsutil + cp_cmd + [file_path, gs_tmp_path] 140 logging.info('Running: %s' % str(cmd)) 141 if self._call(cmd) != 0: 142 raise GSDStorageError('failed when storing %s to %s (%s)' % ( 143 path, key, cmd)) 144 145 # Copy to final location (so the window of time is short). 146 cmd = self._gsutil + cp_cmd + [gs_tmp_path, gs_path] 147 logging.info('Running: %s' % str(cmd)) 148 if self._call(cmd) != 0: 149 raise GSDStorageError('failed when storing %s to %s (%s)' % ( 150 path, key, cmd)) 151 152 # Cleanup. 153 cmd = self._gsutil + ['rm', gs_tmp_path] 154 logging.info('Running: %s' % str(cmd)) 155 if self._call(cmd) != 0: 156 raise GSDStorageError('failed when storing %s to %s (%s)' % ( 157 path, key, cmd)) 158 159 return GS_HTTPS_PATTERN % obj 160 161 def PutData(self, data, key, clobber=True): 162 """Write data to global storage. 163 164 Args: 165 data: Data to store. 166 key: Key to store file under. 167 Raises: 168 GSDStorageError if the underlying storage fails. 169 Returns: 170 URL written to. 171 """ 172 handle, path = tempfile.mkstemp(prefix='gdstore', suffix='.tmp') 173 try: 174 os.close(handle) 175 pynacl.file_tools.WriteFile(data, path) 176 return self.PutFile(path, key, clobber=clobber) 177 finally: 178 os.remove(path) 179 180 def GetFile(self, key, path): 181 """Read a file from global storage. 182 183 Args: 184 key: Key to store file under. 185 path: Destination filename. 186 Returns: 187 URL used on success or None for failure. 188 """ 189 for bucket in self._read_buckets: 190 try: 191 obj = bucket + '/' + key 192 uri = GS_HTTPS_PATTERN % obj 193 logging.debug('Downloading: %s to %s' % (uri, path)) 194 self._download(uri, path) 195 return uri 196 except: 197 logging.debug('Failed downloading: %s to %s' % (uri, path)) 198 return None 199 200 def GetSecureFile(self, key, path): 201 """ Read a non-publicly-accessible file from global storage. 202 203 Args: 204 key: Key file is stored under. 205 path: Destination filename 206 Returns: 207 command used on success or None on failure. 208 """ 209 for bucket in self._read_buckets: 210 try: 211 obj = bucket + '/' + key 212 cmd = self._gsutil + [ 213 'cp', GS_PATTERN % obj, 214 'file://' + os.path.abspath(path).replace(os.sep, '/')] 215 logging.info('Running: %s' % str(cmd)) 216 if self._call(cmd) == 0: 217 return cmd 218 except: 219 logging.debug('Failed to fetch %s from %s (%s)' % (key, path, cmd)) 220 return None 221 222 def GetData(self, key): 223 """Read data from global storage. 224 225 Args: 226 key: Key to store file under. 227 Returns: 228 Data from storage, or None for failure. 229 """ 230 work_dir = tempfile.mkdtemp(prefix='gdstore', suffix='.tmp') 231 try: 232 path = os.path.join(work_dir, 'data') 233 if self.GetFile(key, path) is not None: 234 return pynacl.file_tools.ReadFile(path) 235 return None 236 finally: 237 shutil.rmtree(work_dir) 238