1 /*  hfile_s3.c -- Amazon S3 backend for low-level file streams.
2 
3     Copyright (C) 2015-2017, 2019-2020 Genome Research Ltd.
4 
5     Author: John Marshall <jm18@sanger.ac.uk>
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE.  */
24 
25 #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26 #include <config.h>
27 
28 #include <stdarg.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <time.h>
33 
34 #include <errno.h>
35 
36 #include "hfile_internal.h"
37 #ifdef ENABLE_PLUGINS
38 #include "version.h"
39 #endif
40 #include "htslib/hts.h"  // for hts_version() and hts_verbose
41 #include "htslib/kstring.h"
42 
43 typedef struct s3_auth_data {
44     kstring_t id;
45     kstring_t token;
46     kstring_t secret;
47     kstring_t region;
48     kstring_t canonical_query_string;
49     kstring_t user_query_string;
50     kstring_t host;
51     char *bucket;
52     kstring_t auth_hdr;
53     time_t auth_time;
54     char date[40];
55     char date_long[17];
56     char date_short[9];
57     kstring_t date_html;
58     char mode;
59     char *headers[4];
60     int refcount;
61 } s3_auth_data;
62 
63 #define AUTH_LIFETIME 60
64 
65 #if defined HAVE_COMMONCRYPTO
66 
67 #include <CommonCrypto/CommonHMAC.h>
68 
69 #define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH
70 #define SHA256_DIGEST_BUFSIZE CC_SHA256_DIGEST_LENGTH
71 #define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
72 
73 static size_t
s3_sign(unsigned char * digest,kstring_t * key,kstring_t * message)74 s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
75 {
76     CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest);
77     return CC_SHA1_DIGEST_LENGTH;
78 }
79 
80 
s3_sha256(const unsigned char * in,size_t length,unsigned char * out)81 static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
82     CC_SHA256(in, length, out);
83 }
84 
85 
s3_sign_sha256(const void * key,int key_len,const unsigned char * d,int n,unsigned char * md,unsigned int * md_len)86 static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
87     CCHmac(kCCHmacAlgSHA256, key, key_len, d, n, md);
88     *md_len = CC_SHA256_DIGEST_LENGTH;
89 }
90 
91 
92 #elif defined HAVE_HMAC
93 
94 #include <openssl/hmac.h>
95 #include <openssl/sha.h>
96 
97 #define DIGEST_BUFSIZ EVP_MAX_MD_SIZE
98 #define SHA256_DIGEST_BUFSIZE SHA256_DIGEST_LENGTH
99 #define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
100 
101 static size_t
s3_sign(unsigned char * digest,kstring_t * key,kstring_t * message)102 s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
103 {
104     unsigned int len;
105     HMAC(EVP_sha1(), key->s, key->l,
106          (unsigned char *) message->s, message->l, digest, &len);
107     return len;
108 }
109 
110 
s3_sha256(const unsigned char * in,size_t length,unsigned char * out)111 static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
112     SHA256(in, length, out);
113 }
114 
115 
s3_sign_sha256(const void * key,int key_len,const unsigned char * d,int n,unsigned char * md,unsigned int * md_len)116 static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
117     HMAC(EVP_sha256(), key, key_len, d, n, md, md_len);
118 }
119 
120 #else
121 #error No HMAC() routine found by configure
122 #endif
123 
124 static void
urldecode_kput(const char * s,int len,kstring_t * str)125 urldecode_kput(const char *s, int len, kstring_t *str)
126 {
127     char buf[3];
128     int i = 0;
129 
130     while (i < len)
131         if (s[i] == '%' && i+2 < len) {
132             buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0';
133             kputc(strtol(buf, NULL, 16), str);
134             i += 3;
135         }
136         else kputc(s[i++], str);
137 }
138 
base64_kput(const unsigned char * data,size_t len,kstring_t * str)139 static void base64_kput(const unsigned char *data, size_t len, kstring_t *str)
140 {
141     static const char base64[] =
142         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
143 
144     size_t i = 0;
145     unsigned x = 0;
146     int bits = 0, pad = 0;
147 
148     while (bits || i < len) {
149         if (bits < 6) {
150             x <<= 8, bits += 8;
151             if (i < len) x |= data[i++];
152             else pad++;
153         }
154 
155         bits -= 6;
156         kputc(base64[(x >> bits) & 63], str);
157     }
158 
159     str->l -= pad;
160     kputsn("==", pad, str);
161 }
162 
is_dns_compliant(const char * s0,const char * slim,int is_https)163 static int is_dns_compliant(const char *s0, const char *slim, int is_https)
164 {
165     int has_nondigit = 0, len = 0;
166     const char *s;
167 
168     for (s = s0; s < slim; len++, s++)
169         if (islower_c(*s))
170             has_nondigit = 1;
171         else if (*s == '-') {
172             has_nondigit = 1;
173             if (s == s0 || s+1 == slim) return 0;
174         }
175         else if (isdigit_c(*s))
176             ;
177         else if (*s == '.') {
178             if (is_https) return 0;
179             if (s == s0 || ! isalnum_c(s[-1])) return 0;
180             if (s+1 == slim || ! isalnum_c(s[1])) return 0;
181         }
182         else return 0;
183 
184     return has_nondigit && len >= 3 && len <= 63;
185 }
186 
expand_tilde_open(const char * fname,const char * mode)187 static FILE *expand_tilde_open(const char *fname, const char *mode)
188 {
189     FILE *fp;
190 
191     if (strncmp(fname, "~/", 2) == 0) {
192         kstring_t full_fname = { 0, 0, NULL };
193         const char *home = getenv("HOME");
194         if (! home) return NULL;
195 
196         kputs(home, &full_fname);
197         kputs(&fname[1], &full_fname);
198 
199         fp = fopen(full_fname.s, mode);
200         free(full_fname.s);
201     }
202     else
203         fp = fopen(fname, mode);
204 
205     return fp;
206 }
207 
parse_ini(const char * fname,const char * section,...)208 static void parse_ini(const char *fname, const char *section, ...)
209 {
210     kstring_t line = { 0, 0, NULL };
211     int active = 1;  // Start active, so global properties are accepted
212     char *s;
213 
214     FILE *fp = expand_tilde_open(fname, "r");
215     if (fp == NULL) return;
216 
217     while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0)
218         if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) {
219             *s = '\0';
220             active = (strcmp(&line.s[1], section) == 0);
221         }
222         else if (active && (s = strpbrk(line.s, ":=")) != NULL) {
223             const char *key = line.s, *value = &s[1], *akey;
224             va_list args;
225 
226             while (isspace_c(*key)) key++;
227             while (s > key && isspace_c(s[-1])) s--;
228             *s = '\0';
229 
230             while (isspace_c(*value)) value++;
231             while (line.l > 0 && isspace_c(line.s[line.l-1]))
232                 line.s[--line.l] = '\0';
233 
234             va_start(args, section);
235             while ((akey = va_arg(args, const char *)) != NULL) {
236                 kstring_t *avar = va_arg(args, kstring_t *);
237                 if (strcmp(key, akey) == 0) { kputs(value, avar); break; }
238             }
239             va_end(args);
240         }
241 
242     fclose(fp);
243     free(line.s);
244 }
245 
parse_simple(const char * fname,kstring_t * id,kstring_t * secret)246 static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret)
247 {
248     kstring_t text = { 0, 0, NULL };
249     char *s;
250     size_t len;
251 
252     FILE *fp = expand_tilde_open(fname, "r");
253     if (fp == NULL) return;
254 
255     while (kgetline(&text, (kgets_func *) fgets, fp) >= 0)
256         kputc(' ', &text);
257     fclose(fp);
258 
259     s = text.s;
260     while (isspace_c(*s)) s++;
261     kputsn(s, len = strcspn(s, " \t"), id);
262 
263     s += len;
264     while (isspace_c(*s)) s++;
265     kputsn(s, strcspn(s, " \t"), secret);
266 
267     free(text.s);
268 }
269 
copy_auth_headers(s3_auth_data * ad,char *** hdrs)270 static int copy_auth_headers(s3_auth_data *ad, char ***hdrs) {
271     char **hdr = &ad->headers[0];
272     *hdrs = hdr;
273     *hdr = strdup(ad->date);
274     if (!*hdr) return -1;
275     hdr++;
276     if (ad->auth_hdr.l) {
277         *hdr = strdup(ad->auth_hdr.s);
278         if (!*hdr) { free(ad->headers[0]); return -1; }
279         hdr++;
280     }
281     *hdr = NULL;
282     return 0;
283 }
284 
free_auth_data(s3_auth_data * ad)285 static void free_auth_data(s3_auth_data *ad) {
286     if (ad->refcount > 0) {
287         --ad->refcount;
288         return;
289     }
290     free(ad->id.s);
291     free(ad->token.s);
292     free(ad->secret.s);
293     free(ad->region.s);
294     free(ad->canonical_query_string.s);
295     free(ad->user_query_string.s);
296     free(ad->host.s);
297     free(ad->bucket);
298     free(ad->auth_hdr.s);
299     free(ad->date_html.s);
300     free(ad);
301 }
302 
auth_header_callback(void * ctx,char *** hdrs)303 static int auth_header_callback(void *ctx, char ***hdrs) {
304     s3_auth_data *ad = (s3_auth_data *) ctx;
305 
306     time_t now = time(NULL);
307 #ifdef HAVE_GMTIME_R
308     struct tm tm_buffer;
309     struct tm *tm = gmtime_r(&now, &tm_buffer);
310 #else
311     struct tm *tm = gmtime(&now);
312 #endif
313     kstring_t message = { 0, 0, NULL };
314     unsigned char digest[DIGEST_BUFSIZ];
315     size_t digest_len;
316 
317     if (!hdrs) { // Closing connection
318         free_auth_data(ad);
319         return 0;
320     }
321 
322     if (now - ad->auth_time < AUTH_LIFETIME) {
323         // Last auth string should still be valid
324         *hdrs = NULL;
325         return 0;
326     }
327 
328     strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm);
329     if (!ad->id.l || !ad->secret.l) {
330         ad->auth_time = now;
331         return copy_auth_headers(ad, hdrs);
332     }
333 
334     if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s%s",
335                  ad->mode == 'r' ? "GET" : "PUT", ad->date + 6,
336                  ad->token.l ? "x-amz-security-token:" : "",
337                  ad->token.l ? ad->token.s : "",
338                  ad->token.l ? "\n" : "",
339                  ad->bucket) < 0) {
340         return -1;
341     }
342 
343     digest_len = s3_sign(digest, &ad->secret, &message);
344     ad->auth_hdr.l = 0;
345     if (ksprintf(&ad->auth_hdr, "Authorization: AWS %s:", ad->id.s) < 0)
346         goto fail;
347     base64_kput(digest, digest_len, &ad->auth_hdr);
348 
349     free(message.s);
350     ad->auth_time = now;
351     return copy_auth_headers(ad, hdrs);
352 
353  fail:
354     free(message.s);
355     return -1;
356 }
357 
358 
359 /* like a escape path but for query strings '=' and '&' are untouched */
escape_query(const char * qs)360 static char *escape_query(const char *qs) {
361     size_t i, j = 0, length;
362     char *escaped;
363 
364     length = strlen(qs);
365 
366     if ((escaped = malloc(length * 3 + 1)) == NULL) {
367         return NULL;
368     }
369 
370     for (i = 0; i < length; i++) {
371         int c = qs[i];
372 
373         if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
374              c == '_' || c == '-' || c == '~' || c == '.' || c == '/' || c == '=' || c == '&') {
375             escaped[j++] = c;
376         } else {
377             sprintf(escaped + j, "%%%02X", c);
378             j += 3;
379         }
380     }
381 
382     if (i != length) {
383         // in the case of a '?' copy the rest of the qs across unchanged
384         strcpy(escaped + j, qs + i);
385     } else {
386         escaped[j] = '\0';
387     }
388 
389     return escaped;
390 }
391 
392 
escape_path(const char * path)393 static char *escape_path(const char *path) {
394     size_t i, j = 0, length;
395     char *escaped;
396 
397     length = strlen(path);
398 
399     if ((escaped = malloc(length * 3 + 1)) == NULL) {
400         return NULL;
401     }
402 
403     for (i = 0; i < length; i++) {
404         int c = path[i];
405 
406         if (c == '?') break; // don't escape ? or beyond
407 
408         if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
409              c == '_' || c == '-' || c == '~' || c == '.' || c == '/') {
410             escaped[j++] = c;
411         } else {
412             sprintf(escaped + j, "%%%02X", c);
413             j += 3;
414         }
415     }
416 
417     if (i != length) {
418         // in the case of a '?' copy the rest of the path across unchanged
419         strcpy(escaped + j, path + i);
420     } else {
421         escaped[j] = '\0';
422     }
423 
424     return escaped;
425 }
426 
427 
is_escaped(const char * str)428 static int is_escaped(const char *str) {
429     const char *c = str;
430     int escaped = 0;
431     int needs_escape = 0;
432 
433     while (*c != '\0') {
434         if (*c == '%' && c[1] != '\0' && c[2] != '\0') {
435             if (isxdigit_c(c[1]) && isxdigit_c(c[2])) {
436                 escaped = 1;
437                 c += 3;
438                 continue;
439             } else {
440                 // only escaped if all % signs are escaped
441                 escaped = 0;
442             }
443         }
444         if (!((*c >= '0' && *c <= '9') || (*c >= 'A' && *c <= 'Z')
445               || (*c >= 'a' && *c <= 'z') ||
446               *c == '_' || *c == '-' || *c == '~' || *c == '.' || *c == '/')) {
447             needs_escape = 1;
448         }
449         c++;
450     }
451 
452     return escaped || !needs_escape;
453 }
454 
redirect_endpoint_callback(void * auth,long response,kstring_t * header,kstring_t * url)455 static int redirect_endpoint_callback(void *auth, long response,
456                                       kstring_t *header, kstring_t *url) {
457     s3_auth_data *ad = (s3_auth_data *)auth;
458     char *new_region;
459     char *end;
460     int ret = -1;
461 
462     // get the new region from the reply header
463     if ((new_region = strstr(header->s, "x-amz-bucket-region: "))) {
464 
465         new_region += strlen("x-amz-bucket-region: ");
466         end = new_region;
467 
468         while (isalnum_c(*end) || ispunct_c(*end)) end++;
469 
470         *end = 0;
471 
472         if (strstr(ad->host.s, "amazonaws.com")) {
473             ad->region.l = 0;
474             kputs(new_region, &ad->region);
475 
476             ad->host.l = 0;
477             ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region);
478 
479             if (ad->region.l && ad->host.l) {
480                url->l = 0;
481                kputs(ad->host.s, url);
482                kputsn(ad->bucket, strlen(ad->bucket), url);
483                if (ad->user_query_string.l) {
484                    kputc('?', url);
485                    kputsn(ad->user_query_string.s, ad->user_query_string.l, url);
486                }
487                ret = 0;
488             }
489         }
490     }
491 
492     return ret;
493 }
494 
setup_auth_data(const char * s3url,const char * mode,int sigver,kstring_t * url)495 static s3_auth_data * setup_auth_data(const char *s3url, const char *mode,
496                                       int sigver, kstring_t *url)
497 {
498     s3_auth_data *ad = calloc(1, sizeof(*ad));
499     const char *bucket, *path;
500     char *escaped = NULL;
501     kstring_t profile = { 0, 0, NULL };
502     size_t url_path_pos;
503     ptrdiff_t bucket_len;
504     int is_https = 1, dns_compliant;
505     char *query_start;
506 
507     if (!ad)
508         return NULL;
509     ad->mode = strchr(mode, 'r') ? 'r' : 'w';
510 
511     // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
512 
513     if (s3url[2] == '+') {
514         bucket = strchr(s3url, ':') + 1;
515         if (bucket == NULL) {
516             free(ad);
517             return NULL;
518         }
519         kputsn(&s3url[3], bucket - &s3url[3], url);
520         is_https = strncmp(url->s, "https:", 6) == 0;
521     }
522     else {
523         kputs("https:", url);
524         bucket = &s3url[3];
525     }
526     while (*bucket == '/') kputc(*bucket++, url);
527 
528     path = bucket + strcspn(bucket, "/?#@");
529 
530     if (*path == '@') {
531         const char *colon = strpbrk(bucket, ":@");
532         if (*colon != ':') {
533             urldecode_kput(bucket, colon - bucket, &profile);
534         }
535         else {
536             const char *colon2 = strpbrk(&colon[1], ":@");
537             urldecode_kput(bucket, colon - bucket, &ad->id);
538             urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret);
539             if (*colon2 == ':')
540                 urldecode_kput(&colon2[1], path - &colon2[1], &ad->token);
541         }
542 
543         bucket = &path[1];
544         path = bucket + strcspn(bucket, "/?#");
545     }
546     else {
547         // If the URL has no ID[:SECRET]@, consider environment variables.
548         const char *v;
549         if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id);
550         if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret);
551         if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token);
552         if ((v = getenv("AWS_DEFAULT_REGION")) != NULL) kputs(v, &ad->region);
553         if ((v = getenv("HTS_S3_HOST")) != NULL) kputs(v, &ad->host);
554 
555         if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &profile);
556         else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &profile);
557         else kputs("default", &profile);
558     }
559 
560     if (ad->id.l == 0) {
561         const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
562         parse_ini(v? v : "~/.aws/credentials", profile.s,
563                   "aws_access_key_id", &ad->id,
564                   "aws_secret_access_key", &ad->secret,
565                   "aws_session_token", &ad->token,
566                   "region", &ad->region, NULL);
567     }
568 
569     if (ad->id.l == 0) {
570         const char *v = getenv("HTS_S3_S3CFG");
571         parse_ini(v? v : "~/.s3cfg", profile.s, "access_key", &ad->id,
572                   "secret_key", &ad->secret, "access_token", &ad->token,
573                   "host_base", &ad->host,
574                   "bucket_location", &ad->region, NULL);
575     }
576 
577     if (ad->id.l == 0)
578         parse_simple("~/.awssecret", &ad->id, &ad->secret);
579 
580     dns_compliant = is_dns_compliant(bucket, path, is_https);
581 
582     if (ad->host.l == 0)
583         kputs("s3.amazonaws.com", &ad->host);
584 
585     if (!dns_compliant && ad->region.l > 0
586         && strcmp(ad->host.s, "s3.amazonaws.com") == 0) {
587         // Can avoid a redirection by including the region in the host name
588         // (assuming the right one has been specified)
589         ad->host.l = 0;
590         ksprintf(&ad->host, "s3.%s.amazonaws.com", ad->region.s);
591     }
592 
593     if (ad->region.l == 0)
594         kputs("us-east-1", &ad->region);
595 
596     if (!is_escaped(path)) {
597         escaped = escape_path(path);
598         if (escaped == NULL) {
599             goto error;
600         }
601     }
602 
603     bucket_len = path - bucket;
604 
605     // Use virtual hosted-style access if possible, otherwise path-style.
606     if (dns_compliant) {
607         size_t url_host_pos = url->l;
608         // Append "bucket.host" to url
609         kputsn_(bucket, bucket_len, url);
610         kputc('.', url);
611         kputsn(ad->host.s, ad->host.l, url);
612         url_path_pos = url->l;
613 
614         if (sigver == 4) {
615             // Copy back to ad->host to use when making the signature
616             ad->host.l = 0;
617             kputsn(url->s + url_host_pos, url->l - url_host_pos, &ad->host);
618         }
619     }
620     else {
621         // Append "host/bucket" to url
622         kputsn(ad->host.s, ad->host.l, url);
623         url_path_pos = url->l;
624         kputc('/', url);
625         kputsn(bucket, bucket_len, url);
626     }
627 
628     kputs(escaped == NULL ? path : escaped, url);
629 
630     if (sigver == 4 || !dns_compliant) {
631         ad->bucket = malloc(url->l - url_path_pos + 1);
632         if (ad->bucket == NULL) {
633             goto error;
634         }
635         memcpy(ad->bucket, url->s + url_path_pos, url->l - url_path_pos + 1);
636     }
637     else {
638         ad->bucket = malloc(url->l - url_path_pos + bucket_len + 2);
639         if (ad->bucket == NULL) {
640             goto error;
641         }
642         ad->bucket[0] = '/';
643         memcpy(ad->bucket + 1, bucket, bucket_len);
644         memcpy(ad->bucket + bucket_len + 1,
645                url->s + url_path_pos, url->l - url_path_pos + 1);
646     }
647 
648     // write any query strings to its own place to use later
649     if ((query_start = strchr(ad->bucket, '?'))) {
650         kputs(query_start + 1, &ad->user_query_string);
651         *query_start = 0;
652     }
653 
654     free(profile.s);
655     free(escaped);
656 
657     return ad;
658 
659  error:
660     free(profile.s);
661     free(escaped);
662     free_auth_data(ad);
663     return NULL;
664 }
665 
s3_rewrite(const char * s3url,const char * mode,va_list * argsp)666 static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp)
667 {
668     char *header_list[4], **header = header_list;
669 
670     kstring_t url = { 0, 0, NULL };
671     kstring_t token_hdr = { 0, 0, NULL };
672     s3_auth_data *ad = setup_auth_data(s3url, mode, 2, &url);
673 
674     if (!ad)
675         return NULL;
676 
677     if (ad->token.l > 0) {
678         kputs("X-Amz-Security-Token: ", &token_hdr);
679         kputs(ad->token.s, &token_hdr);
680         *header++ = token_hdr.s;
681     }
682 
683     *header = NULL;
684     hFILE *fp = hopen(url.s, mode, "va_list", argsp, "httphdr:v", header_list,
685                       "httphdr_callback", auth_header_callback,
686                       "httphdr_callback_data", ad,
687                       "redirect_callback", redirect_endpoint_callback,
688                       "redirect_callback_data", ad,
689                       NULL);
690     if (!fp) goto fail;
691 
692     free(url.s);
693     free(token_hdr.s);
694     return fp;
695 
696  fail:
697     free(url.s);
698     free(token_hdr.s);
699     free_auth_data(ad);
700     return NULL;
701 }
702 
703 /***************************************************************
704 
705 AWS S3 sig version 4 writing code
706 
707 ****************************************************************/
708 
hash_string(char * in,size_t length,char * out)709 static void hash_string(char *in, size_t length, char *out) {
710     unsigned char hashed[SHA256_DIGEST_BUFSIZE];
711     int i, j;
712 
713     s3_sha256((const unsigned char *)in, length, hashed);
714 
715     for (i = 0, j = 0; i < SHA256_DIGEST_BUFSIZE; i++, j+= 2) {
716         sprintf(out + j, "%02x", hashed[i]);
717     }
718 }
719 
ksinit(kstring_t * s)720 static void ksinit(kstring_t *s) {
721     s->l = 0;
722     s->m = 0;
723     s->s = NULL;
724 }
725 
726 
ksfree(kstring_t * s)727 static void ksfree(kstring_t *s) {
728     free(s->s);
729     ksinit(s);
730 }
731 
732 
make_signature(s3_auth_data * ad,kstring_t * string_to_sign,char * signature_string)733 static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *signature_string) {
734     unsigned char date_key[SHA256_DIGEST_BUFSIZE];
735     unsigned char date_region_key[SHA256_DIGEST_BUFSIZE];
736     unsigned char date_region_service_key[SHA256_DIGEST_BUFSIZE];
737     unsigned char signing_key[SHA256_DIGEST_BUFSIZE];
738     unsigned char signature[SHA256_DIGEST_BUFSIZE];
739 
740     const unsigned char service[] = "s3";
741     const unsigned char request[] = "aws4_request";
742 
743     kstring_t secret_access_key = {0, 0, NULL};
744     unsigned int len;
745     unsigned int i, j;
746 
747     ksprintf(&secret_access_key, "AWS4%s", ad->secret.s);
748 
749     if (secret_access_key.l == 0) {
750         return -1;
751     }
752 
753     s3_sign_sha256(secret_access_key.s, secret_access_key.l, (const unsigned char *)ad->date_short, strlen(ad->date_short), date_key, &len);
754     s3_sign_sha256(date_key, len, (const unsigned char *)ad->region.s, ad->region.l, date_region_key, &len);
755     s3_sign_sha256(date_region_key, len, service, 2, date_region_service_key, &len);
756     s3_sign_sha256(date_region_service_key, len, request, 12, signing_key, &len);
757     s3_sign_sha256(signing_key, len, (const unsigned char *)string_to_sign->s, string_to_sign->l, signature, &len);
758 
759     for (i = 0, j = 0; i < len; i++, j+= 2) {
760         sprintf(signature_string + j, "%02x", signature[i]);
761     }
762 
763     ksfree(&secret_access_key);
764 
765     return 0;
766 }
767 
768 
make_authorisation(s3_auth_data * ad,char * http_request,char * content,kstring_t * auth)769 static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) {
770     kstring_t signed_headers = {0, 0, NULL};
771     kstring_t canonical_headers = {0, 0, NULL};
772     kstring_t canonical_request = {0, 0, NULL};
773     kstring_t scope = {0, 0, NULL};
774     kstring_t string_to_sign = {0, 0, NULL};
775     char cr_hash[HASH_LENGTH_SHA256];
776     char signature_string[HASH_LENGTH_SHA256];
777     int ret = -1;
778 
779 
780     if (!ad->token.l) {
781         kputs("host;x-amz-content-sha256;x-amz-date", &signed_headers);
782     } else {
783         kputs("host;x-amz-content-sha256;x-amz-date;x-amz-security-token", &signed_headers);
784     }
785 
786     if (signed_headers.l == 0) {
787         return -1;
788     }
789 
790 
791     if (!ad->token.l) {
792         ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\n",
793         ad->host.s, content, ad->date_long);
794     } else {
795         ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\nx-amz-security-token:%s\n",
796         ad->host.s, content, ad->date_long, ad->token.s);
797     }
798 
799     if (canonical_headers.l == 0) {
800         goto cleanup;
801     }
802 
803     // bucket == canonical_uri
804     ksprintf(&canonical_request, "%s\n%s\n%s\n%s\n%s\n%s",
805         http_request, ad->bucket, ad->canonical_query_string.s,
806         canonical_headers.s, signed_headers.s, content);
807 
808     if (canonical_request.l == 0) {
809         goto cleanup;
810     }
811 
812     hash_string(canonical_request.s, canonical_request.l, cr_hash);
813 
814     ksprintf(&scope, "%s/%s/s3/aws4_request", ad->date_short, ad->region.s);
815 
816     if (scope.l == 0) {
817         goto cleanup;
818     }
819 
820     ksprintf(&string_to_sign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", ad->date_long, scope.s, cr_hash);
821 
822     if (string_to_sign.l == 0) {
823         goto cleanup;
824     }
825 
826     if (make_signature(ad, &string_to_sign, signature_string)) {
827         goto cleanup;
828     }
829 
830     ksprintf(auth, "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,SignedHeaders=%s,Signature=%s",
831                 ad->id.s, ad->date_short, ad->region.s, signed_headers.s, signature_string);
832 
833     if (auth->l == 0) {
834         goto cleanup;
835     }
836 
837     ret = 0;
838 
839  cleanup:
840     ksfree(&signed_headers);
841     ksfree(&canonical_headers);
842     ksfree(&canonical_request);
843     ksfree(&scope);
844     ksfree(&string_to_sign);
845 
846     return ret;
847 }
848 
849 
update_time(s3_auth_data * ad)850 static int update_time(s3_auth_data *ad) {
851     int ret = -1;
852     time_t now = time(NULL);
853 #ifdef HAVE_GMTIME_R
854     struct tm tm_buffer;
855     struct tm *tm = gmtime_r(&now, &tm_buffer);
856 #else
857     struct tm *tm = gmtime(&now);
858 #endif
859 
860     if (now - ad->auth_time > AUTH_LIFETIME) {
861         // update timestamp
862         ad->auth_time = now;
863 
864         if (strftime(ad->date_long, 17, "%Y%m%dT%H%M%SZ", tm) != 16) {
865             return -1;
866         }
867 
868         if (strftime(ad->date_short, 9, "%Y%m%d", tm) != 8) {
869             return -1;;
870         }
871 
872         ad->date_html.l = 0;
873         ksprintf(&ad->date_html, "x-amz-date: %s", ad->date_long);
874     }
875 
876     if (ad->date_html.l) ret = 0;
877 
878     return ret;
879 }
880 
881 
query_cmp(const void * p1,const void * p2)882 static int query_cmp(const void *p1, const void *p2) {
883     char **q1 = (char **)p1;
884     char **q2 = (char **)p2;
885 
886     return strcmp(*q1, *q2);
887 }
888 
889 
890 /* Query strings must be in alphabetical order for authorisation */
891 
order_query_string(kstring_t * qs)892 static int order_query_string(kstring_t *qs) {
893     int *query_offset = NULL;
894     int num_queries, i;
895     char **queries = NULL;
896     kstring_t ordered = {0, 0, NULL};
897     char *escaped = NULL;
898     int ret = -1;
899 
900     if ((query_offset = ksplit(qs, '&', &num_queries)) == NULL) {
901         return -1;
902     }
903 
904     if ((queries = malloc(num_queries * sizeof(char*))) == NULL)
905         goto err;
906 
907     for (i = 0; i < num_queries; i++) {
908         queries[i] = qs->s + query_offset[i];
909     }
910 
911     qsort(queries, num_queries, sizeof(char *), query_cmp);
912 
913     for (i = 0; i < num_queries; i++) {
914         if (i) {
915             kputs("&", &ordered);
916         }
917 
918         kputs(queries[i], &ordered);
919     }
920 
921     if ((escaped = escape_query(ordered.s)) == NULL)
922         goto err;
923 
924     qs->l = 0;
925     kputs(escaped, qs);
926 
927     ret = 0;
928  err:
929     free(ordered.s);
930     free(queries);
931     free(query_offset);
932     free(escaped);
933 
934     return ret;
935 }
936 
937 
write_authorisation_callback(void * auth,char * request,kstring_t * content,char * cqs,kstring_t * hash,kstring_t * auth_str,kstring_t * date,kstring_t * token,int uqs)938 static int write_authorisation_callback(void *auth, char *request, kstring_t *content, char *cqs,
939                                         kstring_t *hash, kstring_t *auth_str, kstring_t *date,
940                                         kstring_t *token, int uqs) {
941     s3_auth_data *ad = (s3_auth_data *)auth;
942     char content_hash[HASH_LENGTH_SHA256];
943 
944     if (request == NULL) {
945         // signal to free auth data
946         free_auth_data(ad);
947         return 0;
948     }
949 
950     if (update_time(ad)) {
951         return -1;
952     }
953 
954     if (content) {
955         hash_string(content->s, content->l, content_hash);
956     } else {
957         // empty hash
958         hash_string("", 0, content_hash);
959     }
960 
961     ad->canonical_query_string.l = 0;
962     kputs(cqs, &ad->canonical_query_string);
963 
964     if (ad->canonical_query_string.l == 0) {
965         return -1;
966     }
967 
968     /* add a user provided query string, normally only useful on upload initiation */
969     if (uqs) {
970         kputs("&", &ad->canonical_query_string);
971         kputs(ad->user_query_string.s, &ad->canonical_query_string);
972 
973         if (order_query_string(&ad->canonical_query_string)) {
974             return -1;
975         }
976     }
977 
978     if (make_authorisation(ad, request, content_hash, auth_str)) {
979         return -1;
980     }
981 
982     kputs(ad->date_html.s, date);
983     kputsn(content_hash, HASH_LENGTH_SHA256, hash);
984 
985     if (date->l == 0 || hash->l == 0) {
986         return -1;
987     }
988 
989     if (ad->token.l) {
990         ksprintf(token, "x-amz-security-token: %s", ad->token.s);
991     }
992 
993     return 0;
994 }
995 
996 
v4_auth_header_callback(void * ctx,char *** hdrs)997 static int v4_auth_header_callback(void *ctx, char ***hdrs) {
998     s3_auth_data *ad = (s3_auth_data *) ctx;
999     char content_hash[HASH_LENGTH_SHA256];
1000     kstring_t content = {0, 0, NULL};
1001     kstring_t authorisation = {0, 0, NULL};
1002     char *date_html = NULL;
1003 
1004     if (!hdrs) { // Closing connection
1005         free_auth_data(ad);
1006         return 0;
1007     }
1008 
1009     if (update_time(ad)) {
1010         return -1;
1011     }
1012 
1013     hash_string("", 0, content_hash); // empty hash
1014 
1015     ad->canonical_query_string.l = 0;
1016 
1017     if (ad->user_query_string.l > 0) {
1018         kputs(ad->user_query_string.s, &ad->canonical_query_string);
1019 
1020         if (order_query_string(&ad->canonical_query_string)) {
1021             return -1;
1022         }
1023     } else {
1024         kputs("", &ad->canonical_query_string);
1025     }
1026 
1027     if (make_authorisation(ad, "GET", content_hash, &authorisation)) {
1028         return -1;
1029     }
1030 
1031     ksprintf(&content, "x-amz-content-sha256: %s", content_hash);
1032     date_html = strdup(ad->date_html.s);
1033 
1034     if (content.l == 0 || date_html == NULL) {
1035         ksfree(&authorisation);
1036         ksfree(&content);
1037         free(date_html);
1038         return -1;
1039     }
1040 
1041     *hdrs = &ad->headers[0];
1042     ad->headers[0] = ks_release(&authorisation);
1043     ad->headers[1] = date_html;
1044     ad->headers[2] = ks_release(&content);
1045     ad->headers[3] = NULL;
1046 
1047     return 0;
1048 }
1049 
handle_400_response(hFILE * fp,s3_auth_data * ad)1050 static int handle_400_response(hFILE *fp, s3_auth_data *ad) {
1051     // v4 signatures in virtual hosted mode return 400 Bad Request if the
1052     // wrong region is used to make the signature.  The response is an xml
1053     // document which includes the name of the correct region.  This can
1054     // be extracted and used to generate a corrected signature.
1055     // As the xml is fairly simple, go with something "good enough" instead
1056     // of trying to parse it properly.
1057 
1058     char buffer[1024], *region, *reg_end;
1059     ssize_t bytes;
1060 
1061     bytes = hread(fp, buffer, sizeof(buffer) - 1);
1062     if (bytes < 0) {
1063         return -1;
1064     }
1065     buffer[bytes] = '\0';
1066     region = strstr(buffer, "<Region>");
1067     if (region == NULL) {
1068         return -1;
1069     }
1070     region += 8;
1071     while (isspace((unsigned char) *region)) ++region;
1072     reg_end = strchr(region, '<');
1073     if (reg_end == NULL || strncmp(reg_end + 1, "/Region>", 8) != 0) {
1074         return -1;
1075     }
1076     while (reg_end > region && isspace((unsigned char) reg_end[-1])) --reg_end;
1077     ad->region.l = 0;
1078     kputsn(region, reg_end - region, &ad->region);
1079     if (ad->region.l == 0) {
1080         return -1;
1081     }
1082 
1083     return 0;
1084 }
1085 
set_region(void * adv,kstring_t * region)1086 static int set_region(void *adv, kstring_t *region) {
1087     s3_auth_data *ad = (s3_auth_data *) adv;
1088 
1089     ad->region.l = 0;
1090     return kputsn(region->s, region->l, &ad->region) < 0;
1091 }
1092 
http_status_errno(int status)1093 static int http_status_errno(int status)
1094 {
1095     if (status >= 500)
1096         switch (status) {
1097         case 501: return ENOSYS;
1098         case 503: return EBUSY;
1099         case 504: return ETIMEDOUT;
1100         default:  return EIO;
1101         }
1102     else if (status >= 400)
1103         switch (status) {
1104         case 401: return EPERM;
1105         case 403: return EACCES;
1106         case 404: return ENOENT;
1107         case 405: return EROFS;
1108         case 407: return EPERM;
1109         case 408: return ETIMEDOUT;
1110         case 410: return ENOENT;
1111         default:  return EINVAL;
1112         }
1113     else return 0;
1114 }
1115 
s3_open_v4(const char * s3url,const char * mode,va_list * argsp)1116 static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) {
1117     kstring_t url = { 0, 0, NULL };
1118     kstring_t token_hdr = { 0, 0, NULL };
1119 
1120     char *header_list[4], **header = header_list;
1121     s3_auth_data *ad = setup_auth_data(s3url, mode, 4, &url);
1122     hFILE *fp = NULL;
1123 
1124     if (ad == NULL) {
1125         return NULL;
1126     }
1127 
1128     if (ad->mode == 'r') {
1129         long http_response = 0;
1130 
1131         if (ad->token.l > 0) {
1132             kputs("x-amz-security-token: ", &token_hdr);
1133             kputs(ad->token.s, &token_hdr);
1134             *header++ = token_hdr.s;
1135         }
1136 
1137         *header = NULL;
1138         fp = hopen(url.s, mode, "va_list", argsp, "httphdr:v", header_list,
1139                    "httphdr_callback", v4_auth_header_callback,
1140                    "httphdr_callback_data", ad,
1141                    "redirect_callback", redirect_endpoint_callback,
1142                    "redirect_callback_data", ad,
1143                    "http_response_ptr", &http_response,
1144                    "fail_on_error", 0,
1145                    NULL);
1146 
1147         if (fp == NULL) goto error;
1148 
1149         if (http_response == 400) {
1150             ad->refcount = 1;
1151             if (handle_400_response(fp, ad) != 0) {
1152                 goto error;
1153             }
1154             hclose_abruptly(fp);
1155             fp = hopen(url.s, mode, "va_list", argsp, "httphdr:v", header_list,
1156                        "httphdr_callback", v4_auth_header_callback,
1157                        "httphdr_callback_data", ad,
1158                        "redirect_callback", redirect_endpoint_callback,
1159                        "redirect_callback_data", ad,
1160                        NULL);
1161         } else if (http_response > 400) {
1162             ad->refcount = 1;
1163             errno = http_status_errno(http_response);
1164             goto error;
1165         }
1166 
1167         if (fp == NULL) goto error;
1168     } else {
1169         kstring_t final_url = {0, 0, NULL};
1170 
1171          // add the scheme marker
1172         ksprintf(&final_url, "s3w+%s", url.s);
1173 
1174         if(final_url.l == 0) goto error;
1175 
1176         fp = hopen(final_url.s, mode, "va_list", argsp,
1177                    "s3_auth_callback",  write_authorisation_callback,
1178                    "s3_auth_callback_data", ad,
1179                    "redirect_callback", redirect_endpoint_callback,
1180                    "set_region_callback", set_region,
1181                    NULL);
1182         free(final_url.s);
1183 
1184         if (fp == NULL) goto error;
1185     }
1186 
1187     free(url.s);
1188     free(token_hdr.s);
1189 
1190     return fp;
1191 
1192   error:
1193 
1194     if (fp) hclose_abruptly(fp);
1195     free(url.s);
1196     free(token_hdr.s);
1197     free_auth_data(ad);
1198 
1199     return NULL;
1200 }
1201 
1202 
s3_open(const char * url,const char * mode)1203 static hFILE *s3_open(const char *url, const char *mode)
1204 {
1205     hFILE *fp;
1206 
1207     kstring_t mode_colon = { 0, 0, NULL };
1208     kputs(mode, &mode_colon);
1209     kputc(':', &mode_colon);
1210 
1211     if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code
1212         fp = s3_open_v4(url, mode_colon.s, NULL);
1213     } else {
1214         fp = s3_rewrite(url, mode_colon.s, NULL);
1215     }
1216 
1217     free(mode_colon.s);
1218 
1219     return fp;
1220 }
1221 
s3_vopen(const char * url,const char * mode_colon,va_list args0)1222 static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0)
1223 {
1224     hFILE *fp;
1225     // Need to use va_copy() as we can only take the address of an actual
1226     // va_list object, not that of a parameter whose type may have decayed.
1227     va_list args;
1228     va_copy(args, args0);
1229 
1230     if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code
1231         fp = s3_open_v4(url, mode_colon, &args);
1232     } else {
1233         fp = s3_rewrite(url, mode_colon, &args);
1234     }
1235 
1236     va_end(args);
1237     return fp;
1238 }
1239 
PLUGIN_GLOBAL(hfile_plugin_init,_s3)1240 int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self)
1241 {
1242     static const struct hFILE_scheme_handler handler =
1243         { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen
1244         };
1245 
1246 #ifdef ENABLE_PLUGINS
1247     // Embed version string for examination via strings(1) or what(1)
1248     static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION_TEXT;
1249     if (hts_verbose >= 9)
1250         fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1);
1251 #endif
1252 
1253     self->name = "Amazon S3";
1254     hfile_add_scheme_handler("s3", &handler);
1255     hfile_add_scheme_handler("s3+http", &handler);
1256     hfile_add_scheme_handler("s3+https", &handler);
1257     return 0;
1258 }
1259