1 /* hfile_s3.c -- Amazon S3 backend for low-level file streams.
2
3 Copyright (C) 2015-2017 Genome Research Ltd.
4
5 Author: John Marshall <jm18@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24
25 #include <config.h>
26
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32
33 #include "hts_internal.h"
34 #include "hfile_internal.h"
35 #ifdef ENABLE_PLUGINS
36 #include "version.h"
37 #endif
38 #include "htslib/hts.h" // for hts_version() and hts_verbose
39 #include "htslib/kstring.h"
40
41 #if defined HAVE_COMMONCRYPTO
42
43 #include <CommonCrypto/CommonHMAC.h>
44
45 #define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH
46
47 static size_t
s3_sign(unsigned char * digest,kstring_t * key,kstring_t * message)48 s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
49 {
50 CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest);
51 return CC_SHA1_DIGEST_LENGTH;
52 }
53
54 #elif defined HAVE_HMAC
55
56 #include <openssl/hmac.h>
57
58 #define DIGEST_BUFSIZ EVP_MAX_MD_SIZE
59
60 static size_t
s3_sign(unsigned char * digest,kstring_t * key,kstring_t * message)61 s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
62 {
63 unsigned int len;
64 HMAC(EVP_sha1(), key->s, key->l,
65 (unsigned char *) message->s, message->l, digest, &len);
66 return len;
67 }
68
69 #else
70 #error No HMAC() routine found by configure
71 #endif
72
73 static void
urldecode_kput(const char * s,int len,kstring_t * str)74 urldecode_kput(const char *s, int len, kstring_t *str)
75 {
76 char buf[3];
77 int i = 0;
78
79 while (i < len)
80 if (s[i] == '%' && i+2 < len) {
81 buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0';
82 kputc(strtol(buf, NULL, 16), str);
83 i += 3;
84 }
85 else kputc(s[i++], str);
86 }
87
base64_kput(const unsigned char * data,size_t len,kstring_t * str)88 static void base64_kput(const unsigned char *data, size_t len, kstring_t *str)
89 {
90 static const char base64[] =
91 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
92
93 size_t i = 0;
94 unsigned x = 0;
95 int bits = 0, pad = 0;
96
97 while (bits || i < len) {
98 if (bits < 6) {
99 x <<= 8, bits += 8;
100 if (i < len) x |= data[i++];
101 else pad++;
102 }
103
104 bits -= 6;
105 kputc(base64[(x >> bits) & 63], str);
106 }
107
108 str->l -= pad;
109 kputsn("==", pad, str);
110 }
111
is_dns_compliant(const char * s0,const char * slim)112 static int is_dns_compliant(const char *s0, const char *slim)
113 {
114 int has_nondigit = 0, len = 0;
115 const char *s;
116
117 for (s = s0; s < slim; len++, s++)
118 if (islower_c(*s))
119 has_nondigit = 1;
120 else if (*s == '-') {
121 has_nondigit = 1;
122 if (s == s0 || s+1 == slim) return 0;
123 }
124 else if (isdigit_c(*s))
125 ;
126 else if (*s == '.') {
127 if (s == s0 || ! isalnum_c(s[-1])) return 0;
128 if (s+1 == slim || ! isalnum_c(s[1])) return 0;
129 }
130 else return 0;
131
132 return has_nondigit && len >= 3 && len <= 63;
133 }
134
expand_tilde_open(const char * fname,const char * mode)135 static FILE *expand_tilde_open(const char *fname, const char *mode)
136 {
137 FILE *fp;
138
139 if (strncmp(fname, "~/", 2) == 0) {
140 kstring_t full_fname = { 0, 0, NULL };
141 const char *home = getenv("HOME");
142 if (! home) return NULL;
143
144 kputs(home, &full_fname);
145 kputs(&fname[1], &full_fname);
146
147 fp = fopen(full_fname.s, mode);
148 free(full_fname.s);
149 }
150 else
151 fp = fopen(fname, mode);
152
153 return fp;
154 }
155
parse_ini(const char * fname,const char * section,...)156 static void parse_ini(const char *fname, const char *section, ...)
157 {
158 kstring_t line = { 0, 0, NULL };
159 int active = 1; // Start active, so global properties are accepted
160 char *s;
161
162 FILE *fp = expand_tilde_open(fname, "r");
163 if (fp == NULL) return;
164
165 while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0)
166 if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) {
167 *s = '\0';
168 active = (strcmp(&line.s[1], section) == 0);
169 }
170 else if (active && (s = strpbrk(line.s, ":=")) != NULL) {
171 const char *key = line.s, *value = &s[1], *akey;
172 va_list args;
173
174 while (isspace_c(*key)) key++;
175 while (s > key && isspace_c(s[-1])) s--;
176 *s = '\0';
177
178 while (isspace_c(*value)) value++;
179 while (line.l > 0 && isspace_c(line.s[line.l-1]))
180 line.s[--line.l] = '\0';
181
182 va_start(args, section);
183 while ((akey = va_arg(args, const char *)) != NULL) {
184 kstring_t *avar = va_arg(args, kstring_t *);
185 if (strcmp(key, akey) == 0) { kputs(value, avar); break; }
186 }
187 va_end(args);
188 }
189
190 fclose(fp);
191 free(line.s);
192 }
193
parse_simple(const char * fname,kstring_t * id,kstring_t * secret)194 static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret)
195 {
196 kstring_t text = { 0, 0, NULL };
197 char *s;
198 size_t len;
199
200 FILE *fp = expand_tilde_open(fname, "r");
201 if (fp == NULL) return;
202
203 while (kgetline(&text, (kgets_func *) fgets, fp) >= 0)
204 kputc(' ', &text);
205 fclose(fp);
206
207 s = text.s;
208 while (isspace_c(*s)) s++;
209 kputsn(s, len = strcspn(s, " \t"), id);
210
211 s += len;
212 while (isspace_c(*s)) s++;
213 kputsn(s, strcspn(s, " \t"), secret);
214
215 free(text.s);
216 }
217
s3_rewrite(const char * s3url,const char * mode,va_list * argsp)218 static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp)
219 {
220 const char *bucket, *path;
221 char date_hdr[40];
222 char *header_list[4], **header = header_list;
223
224 kstring_t message = { 0, 0, NULL };
225 kstring_t url = { 0, 0, NULL };
226 kstring_t profile = { 0, 0, NULL };
227 kstring_t id = { 0, 0, NULL };
228 kstring_t secret = { 0, 0, NULL };
229 kstring_t host_base = { 0, 0, NULL };
230 kstring_t token = { 0, 0, NULL };
231 kstring_t token_hdr = { 0, 0, NULL };
232 kstring_t auth_hdr = { 0, 0, NULL };
233
234 time_t now = time(NULL);
235 #ifdef HAVE_GMTIME_R
236 struct tm tm_buffer;
237 struct tm *tm = gmtime_r(&now, &tm_buffer);
238 #else
239 struct tm *tm = gmtime(&now);
240 #endif
241
242 kputs(strchr(mode, 'r')? "GET\n" : "PUT\n", &message);
243 kputc('\n', &message);
244 kputc('\n', &message);
245 strftime(date_hdr, sizeof date_hdr, "Date: %a, %d %b %Y %H:%M:%S GMT", tm);
246 *header++ = date_hdr;
247 kputs(&date_hdr[6], &message);
248 kputc('\n', &message);
249
250 // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
251
252 if (s3url[2] == '+') {
253 bucket = strchr(s3url, ':') + 1;
254 kputsn(&s3url[3], bucket - &s3url[3], &url);
255 }
256 else {
257 kputs("https:", &url);
258 bucket = &s3url[3];
259 }
260 while (*bucket == '/') kputc(*bucket++, &url);
261
262 path = bucket + strcspn(bucket, "/?#@");
263 if (*path == '@') {
264 const char *colon = strpbrk(bucket, ":@");
265 if (*colon != ':') {
266 urldecode_kput(bucket, colon - bucket, &profile);
267 }
268 else {
269 const char *colon2 = strpbrk(&colon[1], ":@");
270 urldecode_kput(bucket, colon - bucket, &id);
271 urldecode_kput(&colon[1], colon2 - &colon[1], &secret);
272 if (*colon2 == ':')
273 urldecode_kput(&colon2[1], path - &colon2[1], &token);
274 }
275
276 bucket = &path[1];
277 path = bucket + strcspn(bucket, "/?#");
278 }
279 else {
280 // If the URL has no ID[:SECRET]@, consider environment variables.
281 const char *v;
282 if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &id);
283 if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &secret);
284 if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &token);
285
286 if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &profile);
287 else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &profile);
288 else kputs("default", &profile);
289 }
290
291 if (id.l == 0) {
292 const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
293 parse_ini(v? v : "~/.aws/credentials", profile.s,
294 "aws_access_key_id", &id, "aws_secret_access_key", &secret,
295 "aws_session_token", &token, NULL);
296 }
297 if (id.l == 0)
298 parse_ini("~/.s3cfg", profile.s, "access_key", &id,
299 "secret_key", &secret, "access_token", &token,
300 "host_base", &host_base, NULL);
301 if (id.l == 0)
302 parse_simple("~/.awssecret", &id, &secret);
303
304 if (host_base.l == 0)
305 kputs("s3.amazonaws.com", &host_base);
306 // Use virtual hosted-style access if possible, otherwise path-style.
307 if (is_dns_compliant(bucket, path)) {
308 kputsn(bucket, path - bucket, &url);
309 kputc('.', &url);
310 kputs(host_base.s, &url);
311 }
312 else {
313 kputs(host_base.s, &url);
314 kputc('/', &url);
315 kputsn(bucket, path - bucket, &url);
316 }
317 kputs(path, &url);
318
319 if (token.l > 0) {
320 kputs("x-amz-security-token:", &message);
321 kputs(token.s, &message);
322 kputc('\n', &message);
323
324 kputs("X-Amz-Security-Token: ", &token_hdr);
325 kputs(token.s, &token_hdr);
326 *header++ = token_hdr.s;
327 }
328
329 kputc('/', &message);
330 kputs(bucket, &message); // CanonicalizedResource is '/' + bucket + path
331
332 // If we have no id/secret, we can't sign the request but will
333 // still be able to access public data sets.
334 if (id.l > 0 && secret.l > 0) {
335 unsigned char digest[DIGEST_BUFSIZ];
336 size_t digest_len = s3_sign(digest, &secret, &message);
337
338 kputs("Authorization: AWS ", &auth_hdr);
339 kputs(id.s, &auth_hdr);
340 kputc(':', &auth_hdr);
341 base64_kput(digest, digest_len, &auth_hdr);
342
343 *header++ = auth_hdr.s;
344 }
345
346 *header = NULL;
347 hFILE *fp = hopen(url.s, mode, "va_list", argsp, "httphdr:v", header_list,
348 NULL);
349 free(message.s);
350 free(url.s);
351 free(profile.s);
352 free(id.s);
353 free(secret.s);
354 free(host_base.s);
355 free(token.s);
356 free(token_hdr.s);
357 free(auth_hdr.s);
358 return fp;
359 }
360
s3_open(const char * url,const char * mode)361 static hFILE *s3_open(const char *url, const char *mode)
362 {
363 kstring_t mode_colon = { 0, 0, NULL };
364 kputs(mode, &mode_colon);
365 kputc(':', &mode_colon);
366 hFILE *fp = s3_rewrite(url, mode_colon.s, NULL);
367 free(mode_colon.s);
368 return fp;
369 }
370
s3_vopen(const char * url,const char * mode_colon,va_list args0)371 static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0)
372 {
373 // Need to use va_copy() as we can only take the address of an actual
374 // va_list object, not that of a parameter whose type may have decayed.
375 va_list args;
376 va_copy(args, args0);
377 hFILE *fp = s3_rewrite(url, mode_colon, &args);
378 va_end(args);
379 return fp;
380 }
381
PLUGIN_GLOBAL(hfile_plugin_init,_s3)382 int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self)
383 {
384 static const struct hFILE_scheme_handler handler =
385 { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen
386 };
387
388 #ifdef ENABLE_PLUGINS
389 // Embed version string for examination via strings(1) or what(1)
390 static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION;
391 if (hts_verbose >= 9)
392 fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1);
393 #endif
394
395 self->name = "Amazon S3";
396 hfile_add_scheme_handler("s3", &handler);
397 hfile_add_scheme_handler("s3+http", &handler);
398 hfile_add_scheme_handler("s3+https", &handler);
399 return 0;
400 }
401