1 /*  hfile_s3.c -- Amazon S3 backend for low-level file streams.
2 
3     Copyright (C) 2015-2017 Genome Research Ltd.
4 
5     Author: John Marshall <jm18@sanger.ac.uk>
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE.  */
24 
25 #include <config.h>
26 
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32 
33 #include "hts_internal.h"
34 #include "hfile_internal.h"
35 #ifdef ENABLE_PLUGINS
36 #include "version.h"
37 #endif
38 #include "htslib/hts.h"  // for hts_version() and hts_verbose
39 #include "htslib/kstring.h"
40 
41 #if defined HAVE_COMMONCRYPTO
42 
43 #include <CommonCrypto/CommonHMAC.h>
44 
45 #define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH
46 
47 static size_t
s3_sign(unsigned char * digest,kstring_t * key,kstring_t * message)48 s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
49 {
50     CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest);
51     return CC_SHA1_DIGEST_LENGTH;
52 }
53 
54 #elif defined HAVE_HMAC
55 
56 #include <openssl/hmac.h>
57 
58 #define DIGEST_BUFSIZ EVP_MAX_MD_SIZE
59 
60 static size_t
s3_sign(unsigned char * digest,kstring_t * key,kstring_t * message)61 s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
62 {
63     unsigned int len;
64     HMAC(EVP_sha1(), key->s, key->l,
65          (unsigned char *) message->s, message->l, digest, &len);
66     return len;
67 }
68 
69 #else
70 #error No HMAC() routine found by configure
71 #endif
72 
73 static void
urldecode_kput(const char * s,int len,kstring_t * str)74 urldecode_kput(const char *s, int len, kstring_t *str)
75 {
76     char buf[3];
77     int i = 0;
78 
79     while (i < len)
80         if (s[i] == '%' && i+2 < len) {
81             buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0';
82             kputc(strtol(buf, NULL, 16), str);
83             i += 3;
84         }
85         else kputc(s[i++], str);
86 }
87 
base64_kput(const unsigned char * data,size_t len,kstring_t * str)88 static void base64_kput(const unsigned char *data, size_t len, kstring_t *str)
89 {
90     static const char base64[] =
91         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
92 
93     size_t i = 0;
94     unsigned x = 0;
95     int bits = 0, pad = 0;
96 
97     while (bits || i < len) {
98         if (bits < 6) {
99             x <<= 8, bits += 8;
100             if (i < len) x |= data[i++];
101             else pad++;
102         }
103 
104         bits -= 6;
105         kputc(base64[(x >> bits) & 63], str);
106     }
107 
108     str->l -= pad;
109     kputsn("==", pad, str);
110 }
111 
is_dns_compliant(const char * s0,const char * slim)112 static int is_dns_compliant(const char *s0, const char *slim)
113 {
114     int has_nondigit = 0, len = 0;
115     const char *s;
116 
117     for (s = s0; s < slim; len++, s++)
118         if (islower_c(*s))
119             has_nondigit = 1;
120         else if (*s == '-') {
121             has_nondigit = 1;
122             if (s == s0 || s+1 == slim) return 0;
123         }
124         else if (isdigit_c(*s))
125             ;
126         else if (*s == '.') {
127             if (s == s0 || ! isalnum_c(s[-1])) return 0;
128             if (s+1 == slim || ! isalnum_c(s[1])) return 0;
129         }
130         else return 0;
131 
132     return has_nondigit && len >= 3 && len <= 63;
133 }
134 
expand_tilde_open(const char * fname,const char * mode)135 static FILE *expand_tilde_open(const char *fname, const char *mode)
136 {
137     FILE *fp;
138 
139     if (strncmp(fname, "~/", 2) == 0) {
140         kstring_t full_fname = { 0, 0, NULL };
141         const char *home = getenv("HOME");
142         if (! home) return NULL;
143 
144         kputs(home, &full_fname);
145         kputs(&fname[1], &full_fname);
146 
147         fp = fopen(full_fname.s, mode);
148         free(full_fname.s);
149     }
150     else
151         fp = fopen(fname, mode);
152 
153     return fp;
154 }
155 
parse_ini(const char * fname,const char * section,...)156 static void parse_ini(const char *fname, const char *section, ...)
157 {
158     kstring_t line = { 0, 0, NULL };
159     int active = 1;  // Start active, so global properties are accepted
160     char *s;
161 
162     FILE *fp = expand_tilde_open(fname, "r");
163     if (fp == NULL) return;
164 
165     while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0)
166         if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) {
167             *s = '\0';
168             active = (strcmp(&line.s[1], section) == 0);
169         }
170         else if (active && (s = strpbrk(line.s, ":=")) != NULL) {
171             const char *key = line.s, *value = &s[1], *akey;
172             va_list args;
173 
174             while (isspace_c(*key)) key++;
175             while (s > key && isspace_c(s[-1])) s--;
176             *s = '\0';
177 
178             while (isspace_c(*value)) value++;
179             while (line.l > 0 && isspace_c(line.s[line.l-1]))
180                 line.s[--line.l] = '\0';
181 
182             va_start(args, section);
183             while ((akey = va_arg(args, const char *)) != NULL) {
184                 kstring_t *avar = va_arg(args, kstring_t *);
185                 if (strcmp(key, akey) == 0) { kputs(value, avar); break; }
186             }
187             va_end(args);
188         }
189 
190     fclose(fp);
191     free(line.s);
192 }
193 
parse_simple(const char * fname,kstring_t * id,kstring_t * secret)194 static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret)
195 {
196     kstring_t text = { 0, 0, NULL };
197     char *s;
198     size_t len;
199 
200     FILE *fp = expand_tilde_open(fname, "r");
201     if (fp == NULL) return;
202 
203     while (kgetline(&text, (kgets_func *) fgets, fp) >= 0)
204         kputc(' ', &text);
205     fclose(fp);
206 
207     s = text.s;
208     while (isspace_c(*s)) s++;
209     kputsn(s, len = strcspn(s, " \t"), id);
210 
211     s += len;
212     while (isspace_c(*s)) s++;
213     kputsn(s, strcspn(s, " \t"), secret);
214 
215     free(text.s);
216 }
217 
s3_rewrite(const char * s3url,const char * mode,va_list * argsp)218 static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp)
219 {
220     const char *bucket, *path;
221     char date_hdr[40];
222     char *header_list[4], **header = header_list;
223 
224     kstring_t message = { 0, 0, NULL };
225     kstring_t url = { 0, 0, NULL };
226     kstring_t profile = { 0, 0, NULL };
227     kstring_t id = { 0, 0, NULL };
228     kstring_t secret = { 0, 0, NULL };
229     kstring_t host_base = { 0, 0, NULL };
230     kstring_t token = { 0, 0, NULL };
231     kstring_t token_hdr = { 0, 0, NULL };
232     kstring_t auth_hdr = { 0, 0, NULL };
233 
234     time_t now = time(NULL);
235 #ifdef HAVE_GMTIME_R
236     struct tm tm_buffer;
237     struct tm *tm = gmtime_r(&now, &tm_buffer);
238 #else
239     struct tm *tm = gmtime(&now);
240 #endif
241 
242     kputs(strchr(mode, 'r')? "GET\n" : "PUT\n", &message);
243     kputc('\n', &message);
244     kputc('\n', &message);
245     strftime(date_hdr, sizeof date_hdr, "Date: %a, %d %b %Y %H:%M:%S GMT", tm);
246     *header++ = date_hdr;
247     kputs(&date_hdr[6], &message);
248     kputc('\n', &message);
249 
250     // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
251 
252     if (s3url[2] == '+') {
253         bucket = strchr(s3url, ':') + 1;
254         kputsn(&s3url[3], bucket - &s3url[3], &url);
255     }
256     else {
257         kputs("https:", &url);
258         bucket = &s3url[3];
259     }
260     while (*bucket == '/') kputc(*bucket++, &url);
261 
262     path = bucket + strcspn(bucket, "/?#@");
263     if (*path == '@') {
264         const char *colon = strpbrk(bucket, ":@");
265         if (*colon != ':') {
266             urldecode_kput(bucket, colon - bucket, &profile);
267         }
268         else {
269             const char *colon2 = strpbrk(&colon[1], ":@");
270             urldecode_kput(bucket, colon - bucket, &id);
271             urldecode_kput(&colon[1], colon2 - &colon[1], &secret);
272             if (*colon2 == ':')
273                 urldecode_kput(&colon2[1], path - &colon2[1], &token);
274         }
275 
276         bucket = &path[1];
277         path = bucket + strcspn(bucket, "/?#");
278     }
279     else {
280         // If the URL has no ID[:SECRET]@, consider environment variables.
281         const char *v;
282         if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &id);
283         if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &secret);
284         if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &token);
285 
286         if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &profile);
287         else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &profile);
288         else kputs("default", &profile);
289     }
290 
291     if (id.l == 0) {
292         const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
293         parse_ini(v? v : "~/.aws/credentials", profile.s,
294                   "aws_access_key_id", &id, "aws_secret_access_key", &secret,
295                   "aws_session_token", &token, NULL);
296     }
297     if (id.l == 0)
298         parse_ini("~/.s3cfg", profile.s, "access_key", &id,
299                   "secret_key", &secret, "access_token", &token,
300                   "host_base", &host_base, NULL);
301     if (id.l == 0)
302         parse_simple("~/.awssecret", &id, &secret);
303 
304     if (host_base.l == 0)
305         kputs("s3.amazonaws.com", &host_base);
306     // Use virtual hosted-style access if possible, otherwise path-style.
307     if (is_dns_compliant(bucket, path)) {
308         kputsn(bucket, path - bucket, &url);
309         kputc('.', &url);
310         kputs(host_base.s, &url);
311     }
312     else {
313         kputs(host_base.s, &url);
314         kputc('/', &url);
315         kputsn(bucket, path - bucket, &url);
316     }
317     kputs(path, &url);
318 
319     if (token.l > 0) {
320         kputs("x-amz-security-token:", &message);
321         kputs(token.s, &message);
322         kputc('\n', &message);
323 
324         kputs("X-Amz-Security-Token: ", &token_hdr);
325         kputs(token.s, &token_hdr);
326         *header++ = token_hdr.s;
327     }
328 
329     kputc('/', &message);
330     kputs(bucket, &message); // CanonicalizedResource is '/' + bucket + path
331 
332     // If we have no id/secret, we can't sign the request but will
333     // still be able to access public data sets.
334     if (id.l > 0 && secret.l > 0) {
335         unsigned char digest[DIGEST_BUFSIZ];
336         size_t digest_len = s3_sign(digest, &secret, &message);
337 
338         kputs("Authorization: AWS ", &auth_hdr);
339         kputs(id.s, &auth_hdr);
340         kputc(':', &auth_hdr);
341         base64_kput(digest, digest_len, &auth_hdr);
342 
343         *header++ = auth_hdr.s;
344     }
345 
346     *header = NULL;
347     hFILE *fp = hopen(url.s, mode, "va_list", argsp, "httphdr:v", header_list,
348                       NULL);
349     free(message.s);
350     free(url.s);
351     free(profile.s);
352     free(id.s);
353     free(secret.s);
354     free(host_base.s);
355     free(token.s);
356     free(token_hdr.s);
357     free(auth_hdr.s);
358     return fp;
359 }
360 
s3_open(const char * url,const char * mode)361 static hFILE *s3_open(const char *url, const char *mode)
362 {
363     kstring_t mode_colon = { 0, 0, NULL };
364     kputs(mode, &mode_colon);
365     kputc(':', &mode_colon);
366     hFILE *fp = s3_rewrite(url, mode_colon.s, NULL);
367     free(mode_colon.s);
368     return fp;
369 }
370 
s3_vopen(const char * url,const char * mode_colon,va_list args0)371 static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0)
372 {
373     // Need to use va_copy() as we can only take the address of an actual
374     // va_list object, not that of a parameter whose type may have decayed.
375     va_list args;
376     va_copy(args, args0);
377     hFILE *fp = s3_rewrite(url, mode_colon, &args);
378     va_end(args);
379     return fp;
380 }
381 
PLUGIN_GLOBAL(hfile_plugin_init,_s3)382 int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self)
383 {
384     static const struct hFILE_scheme_handler handler =
385         { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen
386         };
387 
388 #ifdef ENABLE_PLUGINS
389     // Embed version string for examination via strings(1) or what(1)
390     static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION;
391     if (hts_verbose >= 9)
392         fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1);
393 #endif
394 
395     self->name = "Amazon S3";
396     hfile_add_scheme_handler("s3", &handler);
397     hfile_add_scheme_handler("s3+http", &handler);
398     hfile_add_scheme_handler("s3+https", &handler);
399     return 0;
400 }
401