1 /*  hfile_gcs.c -- Google Cloud Storage backend for low-level file streams.
2 
3     Copyright (C) 2016 Genome Research Ltd.
4 
5     Author: John Marshall <jm18@sanger.ac.uk>
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE.  */
24 
25 #include <config.h>
26 
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <errno.h>
32 
33 #include "htslib/hts.h"
34 #include "htslib/kstring.h"
35 #include "hfile_internal.h"
36 #ifdef ENABLE_PLUGINS
37 #include "version.h"
38 #endif
39 
40 static hFILE *
gcs_rewrite(const char * gsurl,const char * mode,int mode_has_colon,va_list * argsp)41 gcs_rewrite(const char *gsurl, const char *mode, int mode_has_colon,
42             va_list *argsp)
43 {
44     const char *bucket, *path, *access_token;
45     kstring_t mode_colon = { 0, 0, NULL };
46     kstring_t url = { 0, 0, NULL };
47     kstring_t auth_hdr = { 0, 0, NULL };
48     hFILE *fp = NULL;
49 
50     // GCS URL format is gs[+SCHEME]://BUCKET/PATH
51 
52     if (gsurl[2] == '+') {
53         bucket = strchr(gsurl, ':') + 1;
54         kputsn(&gsurl[3], bucket - &gsurl[3], &url);
55     }
56     else {
57         kputs("https:", &url);
58         bucket = &gsurl[3];
59     }
60     while (*bucket == '/') kputc(*bucket++, &url);
61 
62     path = bucket + strcspn(bucket, "/?#");
63 
64     kputsn(bucket, path - bucket, &url);
65     if (strchr(mode, 'r')) kputs(".storage-download", &url);
66     else if (strchr(mode, 'w')) kputs(".storage-upload", &url);
67     else kputs(".storage", &url);
68     kputs(".googleapis.com", &url);
69 
70     kputs(path, &url);
71 
72     if (hts_verbose >= 8)
73         fprintf(stderr, "[M::gcs_open] rewrote URL as %s\n", url.s);
74 
75     // TODO Find the access token in a more standard way
76     access_token = getenv("GCS_OAUTH_TOKEN");
77 
78     if (access_token) {
79         kputs("Authorization: Bearer ", &auth_hdr);
80         kputs(access_token, &auth_hdr);
81     }
82 
83     if (argsp || auth_hdr.l > 0 || mode_has_colon) {
84         if (! mode_has_colon) {
85             kputs(mode, &mode_colon);
86             kputc(':', &mode_colon);
87             mode = mode_colon.s;
88         }
89 
90         fp = hopen(url.s, mode, "va_list", argsp,
91                    "httphdr", (auth_hdr.l > 0)? auth_hdr.s : NULL, NULL);
92     }
93     else
94         fp = hopen(url.s, mode);
95 
96     free(mode_colon.s);
97     free(url.s);
98     free(auth_hdr.s);
99     return fp;
100 }
101 
gcs_open(const char * url,const char * mode)102 static hFILE *gcs_open(const char *url, const char *mode)
103 {
104     return gcs_rewrite(url, mode, 0, NULL);
105 }
106 
gcs_vopen(const char * url,const char * mode_colon,va_list args0)107 static hFILE *gcs_vopen(const char *url, const char *mode_colon, va_list args0)
108 {
109     // Need to use va_copy() as we can only take the address of an actual
110     // va_list object, not that of a parameter as its type may have decayed.
111     va_list args;
112     va_copy(args, args0);
113     hFILE *fp = gcs_rewrite(url, mode_colon, 1, &args);
114     va_end(args);
115     return fp;
116 }
117 
PLUGIN_GLOBAL(hfile_plugin_init,_gcs)118 int PLUGIN_GLOBAL(hfile_plugin_init,_gcs)(struct hFILE_plugin *self)
119 {
120     static const struct hFILE_scheme_handler handler =
121         { gcs_open, hfile_always_remote, "Google Cloud Storage",
122           2000 + 50, gcs_vopen
123         };
124 
125 #ifdef ENABLE_PLUGINS
126     // Embed version string for examination via strings(1) or what(1)
127     static const char id[] = "@(#)hfile_gcs plugin (htslib)\t" HTS_VERSION;
128     if (hts_verbose >= 9)
129         fprintf(stderr, "[M::hfile_gcs.init] version %s\n", strchr(id, '\t')+1);
130 #endif
131 
132     self->name = "Google Cloud Storage";
133     hfile_add_scheme_handler("gs", &handler);
134     hfile_add_scheme_handler("gs+http", &handler);
135     hfile_add_scheme_handler("gs+https", &handler);
136     return 0;
137 }
138