1#!/usr/bin/env python 2 3# This is a helper script for the Google Life Sciences instance to be able to: 4# 1. download a blob from storage, which is required at the onset of the Snakemake 5# gls.py download <bucket> <source> <destination> 6# workflow step to obtain the working directory. 7# 2. Upload logs back to storage (or some specified directory of files) 8# gls.py save <bucket> <source-dir> <destination-dir> 9# gls.py save <bucket> /google/logs/output source/logs 10 11import argparse 12import datetime 13 14from google.cloud import storage 15from glob import glob 16import sys 17import os 18 19 20def download_blob(bucket_name, source_blob_name, destination_file_name): 21 """Downloads a blob from the bucket.""" 22 storage_client = storage.Client() 23 bucket = storage_client.get_bucket(bucket_name) 24 blob = bucket.blob(source_blob_name) 25 26 blob.download_to_filename(destination_file_name) 27 28 print("Blob {} downloaded to {}.".format(source_blob_name, destination_file_name)) 29 30 31def save_files(bucket_name, source_path, destination_path): 32 """given a directory path, save all files recursively to storage""" 33 storage_client = storage.Client() 34 bucket = storage_client.get_bucket(bucket_name) 35 36 # destination path should be stripped of path indicators too 37 bucket_name = bucket_name.strip("/") 38 destination_path = destination_path.strip("/") 39 40 # These are fullpaths 41 filenames = get_source_files(source_path) 42 print("\nThe following files will be uploaded: %s" % "\n".join(filenames)) 43 44 if not filenames: 45 print("Did not find any filenames under %s" % source_path) 46 47 # Do the upload! 48 for filename in filenames: 49 50 # The relative path of the filename from the source path 51 relative_path = filename.replace(source_path, "", 1).strip("/") 52 53 # The path in storage includes relative path from destination_path 54 storage_path = os.path.join(destination_path, relative_path) 55 full_path = os.path.join(bucket_name, storage_path) 56 print( 57 "{filename} -> {full_path}".format(filename=filename, full_path=full_path) 58 ) 59 60 # Get the blob 61 blob = bucket.blob(storage_path) 62 if not blob.exists(): 63 print("Uploading %s to %s" % (filename, full_path)) 64 blob.upload_from_filename(filename) 65 66 67def get_source_files(source_path): 68 """Given a directory, return a listing of files to upload""" 69 filenames = [] 70 if not os.path.exists(source_path): 71 print("%s does not exist!" % source_path) 72 sys.exit(0) 73 74 for x in os.walk(source_path): 75 for name in glob(os.path.join(x[0], "*")): 76 if not os.path.isdir(name): 77 filenames.append(name) 78 return filenames 79 80 81def add_ending_slash(filename): 82 """Since we want to replace based on having an ending slash, ensure it's there""" 83 if not filename.endswith("/"): 84 filename = "%s/" % filename 85 return filename 86 87 88def blob_commands(args): 89 if args.command == "download": 90 download_blob( 91 args.bucket_name, args.source_blob_name, args.destination_file_name 92 ) 93 elif args.command == "save": 94 save_files(args.bucket_name, args.source_path, args.destination_path) 95 96 97def main(): 98 parser = argparse.ArgumentParser( 99 formatter_class=argparse.RawDescriptionHelpFormatter 100 ) 101 102 subparsers = parser.add_subparsers(dest="command") 103 104 # Download file from storage 105 download_parser = subparsers.add_parser("download", help=download_blob.__doc__) 106 download_parser.add_argument("bucket_name", help="Your cloud storage bucket.") 107 download_parser.add_argument("source_blob_name") 108 download_parser.add_argument("destination_file_name") 109 110 # Save logs to storage 111 save_parser = subparsers.add_parser("save", help=save_files.__doc__) 112 save_parser.add_argument("bucket_name", help="Your cloud storage bucket.") 113 save_parser.add_argument("source_path") 114 save_parser.add_argument("destination_path") 115 116 args = parser.parse_args() 117 blob_commands(args) 118 119 120if __name__ == "__main__": 121 main() 122