1 /* hfile_s3.c -- Amazon S3 backend for low-level file streams.
2
3 Copyright (C) 2015-2017, 2019-2020 Genome Research Ltd.
4
5 Author: John Marshall <jm18@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24
25 #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26 #include <config.h>
27
28 #include <stdarg.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <time.h>
33
34 #include <errno.h>
35
36 #include "hfile_internal.h"
37 #ifdef ENABLE_PLUGINS
38 #include "version.h"
39 #endif
40 #include "htslib/hts.h" // for hts_version() and hts_verbose
41 #include "htslib/kstring.h"
42
43 typedef struct s3_auth_data {
44 kstring_t id;
45 kstring_t token;
46 kstring_t secret;
47 kstring_t region;
48 kstring_t canonical_query_string;
49 kstring_t user_query_string;
50 kstring_t host;
51 char *bucket;
52 kstring_t auth_hdr;
53 time_t auth_time;
54 char date[40];
55 char date_long[17];
56 char date_short[9];
57 kstring_t date_html;
58 char mode;
59 char *headers[4];
60 int refcount;
61 } s3_auth_data;
62
63 #define AUTH_LIFETIME 60
64
65 #if defined HAVE_COMMONCRYPTO
66
67 #include <CommonCrypto/CommonHMAC.h>
68
69 #define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH
70 #define SHA256_DIGEST_BUFSIZE CC_SHA256_DIGEST_LENGTH
71 #define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
72
73 static size_t
s3_sign(unsigned char * digest,kstring_t * key,kstring_t * message)74 s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
75 {
76 CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest);
77 return CC_SHA1_DIGEST_LENGTH;
78 }
79
80
s3_sha256(const unsigned char * in,size_t length,unsigned char * out)81 static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
82 CC_SHA256(in, length, out);
83 }
84
85
s3_sign_sha256(const void * key,int key_len,const unsigned char * d,int n,unsigned char * md,unsigned int * md_len)86 static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
87 CCHmac(kCCHmacAlgSHA256, key, key_len, d, n, md);
88 *md_len = CC_SHA256_DIGEST_LENGTH;
89 }
90
91
92 #elif defined HAVE_HMAC
93
94 #include <openssl/hmac.h>
95 #include <openssl/sha.h>
96
97 #define DIGEST_BUFSIZ EVP_MAX_MD_SIZE
98 #define SHA256_DIGEST_BUFSIZE SHA256_DIGEST_LENGTH
99 #define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
100
101 static size_t
s3_sign(unsigned char * digest,kstring_t * key,kstring_t * message)102 s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
103 {
104 unsigned int len;
105 HMAC(EVP_sha1(), key->s, key->l,
106 (unsigned char *) message->s, message->l, digest, &len);
107 return len;
108 }
109
110
s3_sha256(const unsigned char * in,size_t length,unsigned char * out)111 static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
112 SHA256(in, length, out);
113 }
114
115
s3_sign_sha256(const void * key,int key_len,const unsigned char * d,int n,unsigned char * md,unsigned int * md_len)116 static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
117 HMAC(EVP_sha256(), key, key_len, d, n, md, md_len);
118 }
119
120 #else
121 #error No HMAC() routine found by configure
122 #endif
123
124 static void
urldecode_kput(const char * s,int len,kstring_t * str)125 urldecode_kput(const char *s, int len, kstring_t *str)
126 {
127 char buf[3];
128 int i = 0;
129
130 while (i < len)
131 if (s[i] == '%' && i+2 < len) {
132 buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0';
133 kputc(strtol(buf, NULL, 16), str);
134 i += 3;
135 }
136 else kputc(s[i++], str);
137 }
138
base64_kput(const unsigned char * data,size_t len,kstring_t * str)139 static void base64_kput(const unsigned char *data, size_t len, kstring_t *str)
140 {
141 static const char base64[] =
142 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
143
144 size_t i = 0;
145 unsigned x = 0;
146 int bits = 0, pad = 0;
147
148 while (bits || i < len) {
149 if (bits < 6) {
150 x <<= 8, bits += 8;
151 if (i < len) x |= data[i++];
152 else pad++;
153 }
154
155 bits -= 6;
156 kputc(base64[(x >> bits) & 63], str);
157 }
158
159 str->l -= pad;
160 kputsn("==", pad, str);
161 }
162
is_dns_compliant(const char * s0,const char * slim,int is_https)163 static int is_dns_compliant(const char *s0, const char *slim, int is_https)
164 {
165 int has_nondigit = 0, len = 0;
166 const char *s;
167
168 for (s = s0; s < slim; len++, s++)
169 if (islower_c(*s))
170 has_nondigit = 1;
171 else if (*s == '-') {
172 has_nondigit = 1;
173 if (s == s0 || s+1 == slim) return 0;
174 }
175 else if (isdigit_c(*s))
176 ;
177 else if (*s == '.') {
178 if (is_https) return 0;
179 if (s == s0 || ! isalnum_c(s[-1])) return 0;
180 if (s+1 == slim || ! isalnum_c(s[1])) return 0;
181 }
182 else return 0;
183
184 return has_nondigit && len >= 3 && len <= 63;
185 }
186
expand_tilde_open(const char * fname,const char * mode)187 static FILE *expand_tilde_open(const char *fname, const char *mode)
188 {
189 FILE *fp;
190
191 if (strncmp(fname, "~/", 2) == 0) {
192 kstring_t full_fname = { 0, 0, NULL };
193 const char *home = getenv("HOME");
194 if (! home) return NULL;
195
196 kputs(home, &full_fname);
197 kputs(&fname[1], &full_fname);
198
199 fp = fopen(full_fname.s, mode);
200 free(full_fname.s);
201 }
202 else
203 fp = fopen(fname, mode);
204
205 return fp;
206 }
207
parse_ini(const char * fname,const char * section,...)208 static void parse_ini(const char *fname, const char *section, ...)
209 {
210 kstring_t line = { 0, 0, NULL };
211 int active = 1; // Start active, so global properties are accepted
212 char *s;
213
214 FILE *fp = expand_tilde_open(fname, "r");
215 if (fp == NULL) return;
216
217 while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0)
218 if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) {
219 *s = '\0';
220 active = (strcmp(&line.s[1], section) == 0);
221 }
222 else if (active && (s = strpbrk(line.s, ":=")) != NULL) {
223 const char *key = line.s, *value = &s[1], *akey;
224 va_list args;
225
226 while (isspace_c(*key)) key++;
227 while (s > key && isspace_c(s[-1])) s--;
228 *s = '\0';
229
230 while (isspace_c(*value)) value++;
231 while (line.l > 0 && isspace_c(line.s[line.l-1]))
232 line.s[--line.l] = '\0';
233
234 va_start(args, section);
235 while ((akey = va_arg(args, const char *)) != NULL) {
236 kstring_t *avar = va_arg(args, kstring_t *);
237 if (strcmp(key, akey) == 0) { kputs(value, avar); break; }
238 }
239 va_end(args);
240 }
241
242 fclose(fp);
243 free(line.s);
244 }
245
parse_simple(const char * fname,kstring_t * id,kstring_t * secret)246 static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret)
247 {
248 kstring_t text = { 0, 0, NULL };
249 char *s;
250 size_t len;
251
252 FILE *fp = expand_tilde_open(fname, "r");
253 if (fp == NULL) return;
254
255 while (kgetline(&text, (kgets_func *) fgets, fp) >= 0)
256 kputc(' ', &text);
257 fclose(fp);
258
259 s = text.s;
260 while (isspace_c(*s)) s++;
261 kputsn(s, len = strcspn(s, " \t"), id);
262
263 s += len;
264 while (isspace_c(*s)) s++;
265 kputsn(s, strcspn(s, " \t"), secret);
266
267 free(text.s);
268 }
269
copy_auth_headers(s3_auth_data * ad,char *** hdrs)270 static int copy_auth_headers(s3_auth_data *ad, char ***hdrs) {
271 char **hdr = &ad->headers[0];
272 *hdrs = hdr;
273 *hdr = strdup(ad->date);
274 if (!*hdr) return -1;
275 hdr++;
276 if (ad->auth_hdr.l) {
277 *hdr = strdup(ad->auth_hdr.s);
278 if (!*hdr) { free(ad->headers[0]); return -1; }
279 hdr++;
280 }
281 *hdr = NULL;
282 return 0;
283 }
284
free_auth_data(s3_auth_data * ad)285 static void free_auth_data(s3_auth_data *ad) {
286 if (ad->refcount > 0) {
287 --ad->refcount;
288 return;
289 }
290 free(ad->id.s);
291 free(ad->token.s);
292 free(ad->secret.s);
293 free(ad->region.s);
294 free(ad->canonical_query_string.s);
295 free(ad->user_query_string.s);
296 free(ad->host.s);
297 free(ad->bucket);
298 free(ad->auth_hdr.s);
299 free(ad->date_html.s);
300 free(ad);
301 }
302
auth_header_callback(void * ctx,char *** hdrs)303 static int auth_header_callback(void *ctx, char ***hdrs) {
304 s3_auth_data *ad = (s3_auth_data *) ctx;
305
306 time_t now = time(NULL);
307 #ifdef HAVE_GMTIME_R
308 struct tm tm_buffer;
309 struct tm *tm = gmtime_r(&now, &tm_buffer);
310 #else
311 struct tm *tm = gmtime(&now);
312 #endif
313 kstring_t message = { 0, 0, NULL };
314 unsigned char digest[DIGEST_BUFSIZ];
315 size_t digest_len;
316
317 if (!hdrs) { // Closing connection
318 free_auth_data(ad);
319 return 0;
320 }
321
322 if (now - ad->auth_time < AUTH_LIFETIME) {
323 // Last auth string should still be valid
324 *hdrs = NULL;
325 return 0;
326 }
327
328 strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm);
329 if (!ad->id.l || !ad->secret.l) {
330 ad->auth_time = now;
331 return copy_auth_headers(ad, hdrs);
332 }
333
334 if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s%s",
335 ad->mode == 'r' ? "GET" : "PUT", ad->date + 6,
336 ad->token.l ? "x-amz-security-token:" : "",
337 ad->token.l ? ad->token.s : "",
338 ad->token.l ? "\n" : "",
339 ad->bucket) < 0) {
340 return -1;
341 }
342
343 digest_len = s3_sign(digest, &ad->secret, &message);
344 ad->auth_hdr.l = 0;
345 if (ksprintf(&ad->auth_hdr, "Authorization: AWS %s:", ad->id.s) < 0)
346 goto fail;
347 base64_kput(digest, digest_len, &ad->auth_hdr);
348
349 free(message.s);
350 ad->auth_time = now;
351 return copy_auth_headers(ad, hdrs);
352
353 fail:
354 free(message.s);
355 return -1;
356 }
357
358
359 /* like a escape path but for query strings '=' and '&' are untouched */
escape_query(const char * qs)360 static char *escape_query(const char *qs) {
361 size_t i, j = 0, length;
362 char *escaped;
363
364 length = strlen(qs);
365
366 if ((escaped = malloc(length * 3 + 1)) == NULL) {
367 return NULL;
368 }
369
370 for (i = 0; i < length; i++) {
371 int c = qs[i];
372
373 if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
374 c == '_' || c == '-' || c == '~' || c == '.' || c == '/' || c == '=' || c == '&') {
375 escaped[j++] = c;
376 } else {
377 sprintf(escaped + j, "%%%02X", c);
378 j += 3;
379 }
380 }
381
382 if (i != length) {
383 // in the case of a '?' copy the rest of the qs across unchanged
384 strcpy(escaped + j, qs + i);
385 } else {
386 escaped[j] = '\0';
387 }
388
389 return escaped;
390 }
391
392
escape_path(const char * path)393 static char *escape_path(const char *path) {
394 size_t i, j = 0, length;
395 char *escaped;
396
397 length = strlen(path);
398
399 if ((escaped = malloc(length * 3 + 1)) == NULL) {
400 return NULL;
401 }
402
403 for (i = 0; i < length; i++) {
404 int c = path[i];
405
406 if (c == '?') break; // don't escape ? or beyond
407
408 if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
409 c == '_' || c == '-' || c == '~' || c == '.' || c == '/') {
410 escaped[j++] = c;
411 } else {
412 sprintf(escaped + j, "%%%02X", c);
413 j += 3;
414 }
415 }
416
417 if (i != length) {
418 // in the case of a '?' copy the rest of the path across unchanged
419 strcpy(escaped + j, path + i);
420 } else {
421 escaped[j] = '\0';
422 }
423
424 return escaped;
425 }
426
427
is_escaped(const char * str)428 static int is_escaped(const char *str) {
429 const char *c = str;
430 int escaped = 0;
431 int needs_escape = 0;
432
433 while (*c != '\0') {
434 if (*c == '%' && c[1] != '\0' && c[2] != '\0') {
435 if (isxdigit_c(c[1]) && isxdigit_c(c[2])) {
436 escaped = 1;
437 c += 3;
438 continue;
439 } else {
440 // only escaped if all % signs are escaped
441 escaped = 0;
442 }
443 }
444 if (!((*c >= '0' && *c <= '9') || (*c >= 'A' && *c <= 'Z')
445 || (*c >= 'a' && *c <= 'z') ||
446 *c == '_' || *c == '-' || *c == '~' || *c == '.' || *c == '/')) {
447 needs_escape = 1;
448 }
449 c++;
450 }
451
452 return escaped || !needs_escape;
453 }
454
redirect_endpoint_callback(void * auth,long response,kstring_t * header,kstring_t * url)455 static int redirect_endpoint_callback(void *auth, long response,
456 kstring_t *header, kstring_t *url) {
457 s3_auth_data *ad = (s3_auth_data *)auth;
458 char *new_region;
459 char *end;
460 int ret = -1;
461
462 // get the new region from the reply header
463 if ((new_region = strstr(header->s, "x-amz-bucket-region: "))) {
464
465 new_region += strlen("x-amz-bucket-region: ");
466 end = new_region;
467
468 while (isalnum_c(*end) || ispunct_c(*end)) end++;
469
470 *end = 0;
471
472 if (strstr(ad->host.s, "amazonaws.com")) {
473 ad->region.l = 0;
474 kputs(new_region, &ad->region);
475
476 ad->host.l = 0;
477 ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region);
478
479 if (ad->region.l && ad->host.l) {
480 url->l = 0;
481 kputs(ad->host.s, url);
482 kputsn(ad->bucket, strlen(ad->bucket), url);
483 if (ad->user_query_string.l) {
484 kputc('?', url);
485 kputsn(ad->user_query_string.s, ad->user_query_string.l, url);
486 }
487 ret = 0;
488 }
489 }
490 }
491
492 return ret;
493 }
494
setup_auth_data(const char * s3url,const char * mode,int sigver,kstring_t * url)495 static s3_auth_data * setup_auth_data(const char *s3url, const char *mode,
496 int sigver, kstring_t *url)
497 {
498 s3_auth_data *ad = calloc(1, sizeof(*ad));
499 const char *bucket, *path;
500 char *escaped = NULL;
501 kstring_t profile = { 0, 0, NULL };
502 size_t url_path_pos;
503 ptrdiff_t bucket_len;
504 int is_https = 1, dns_compliant;
505 char *query_start;
506
507 if (!ad)
508 return NULL;
509 ad->mode = strchr(mode, 'r') ? 'r' : 'w';
510
511 // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
512
513 if (s3url[2] == '+') {
514 bucket = strchr(s3url, ':') + 1;
515 if (bucket == NULL) {
516 free(ad);
517 return NULL;
518 }
519 kputsn(&s3url[3], bucket - &s3url[3], url);
520 is_https = strncmp(url->s, "https:", 6) == 0;
521 }
522 else {
523 kputs("https:", url);
524 bucket = &s3url[3];
525 }
526 while (*bucket == '/') kputc(*bucket++, url);
527
528 path = bucket + strcspn(bucket, "/?#@");
529
530 if (*path == '@') {
531 const char *colon = strpbrk(bucket, ":@");
532 if (*colon != ':') {
533 urldecode_kput(bucket, colon - bucket, &profile);
534 }
535 else {
536 const char *colon2 = strpbrk(&colon[1], ":@");
537 urldecode_kput(bucket, colon - bucket, &ad->id);
538 urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret);
539 if (*colon2 == ':')
540 urldecode_kput(&colon2[1], path - &colon2[1], &ad->token);
541 }
542
543 bucket = &path[1];
544 path = bucket + strcspn(bucket, "/?#");
545 }
546 else {
547 // If the URL has no ID[:SECRET]@, consider environment variables.
548 const char *v;
549 if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id);
550 if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret);
551 if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token);
552 if ((v = getenv("AWS_DEFAULT_REGION")) != NULL) kputs(v, &ad->region);
553 if ((v = getenv("HTS_S3_HOST")) != NULL) kputs(v, &ad->host);
554
555 if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &profile);
556 else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &profile);
557 else kputs("default", &profile);
558 }
559
560 if (ad->id.l == 0) {
561 const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
562 parse_ini(v? v : "~/.aws/credentials", profile.s,
563 "aws_access_key_id", &ad->id,
564 "aws_secret_access_key", &ad->secret,
565 "aws_session_token", &ad->token,
566 "region", &ad->region, NULL);
567 }
568
569 if (ad->id.l == 0) {
570 const char *v = getenv("HTS_S3_S3CFG");
571 parse_ini(v? v : "~/.s3cfg", profile.s, "access_key", &ad->id,
572 "secret_key", &ad->secret, "access_token", &ad->token,
573 "host_base", &ad->host,
574 "bucket_location", &ad->region, NULL);
575 }
576
577 if (ad->id.l == 0)
578 parse_simple("~/.awssecret", &ad->id, &ad->secret);
579
580 dns_compliant = is_dns_compliant(bucket, path, is_https);
581
582 if (ad->host.l == 0)
583 kputs("s3.amazonaws.com", &ad->host);
584
585 if (!dns_compliant && ad->region.l > 0
586 && strcmp(ad->host.s, "s3.amazonaws.com") == 0) {
587 // Can avoid a redirection by including the region in the host name
588 // (assuming the right one has been specified)
589 ad->host.l = 0;
590 ksprintf(&ad->host, "s3.%s.amazonaws.com", ad->region.s);
591 }
592
593 if (ad->region.l == 0)
594 kputs("us-east-1", &ad->region);
595
596 if (!is_escaped(path)) {
597 escaped = escape_path(path);
598 if (escaped == NULL) {
599 goto error;
600 }
601 }
602
603 bucket_len = path - bucket;
604
605 // Use virtual hosted-style access if possible, otherwise path-style.
606 if (dns_compliant) {
607 size_t url_host_pos = url->l;
608 // Append "bucket.host" to url
609 kputsn_(bucket, bucket_len, url);
610 kputc('.', url);
611 kputsn(ad->host.s, ad->host.l, url);
612 url_path_pos = url->l;
613
614 if (sigver == 4) {
615 // Copy back to ad->host to use when making the signature
616 ad->host.l = 0;
617 kputsn(url->s + url_host_pos, url->l - url_host_pos, &ad->host);
618 }
619 }
620 else {
621 // Append "host/bucket" to url
622 kputsn(ad->host.s, ad->host.l, url);
623 url_path_pos = url->l;
624 kputc('/', url);
625 kputsn(bucket, bucket_len, url);
626 }
627
628 kputs(escaped == NULL ? path : escaped, url);
629
630 if (sigver == 4 || !dns_compliant) {
631 ad->bucket = malloc(url->l - url_path_pos + 1);
632 if (ad->bucket == NULL) {
633 goto error;
634 }
635 memcpy(ad->bucket, url->s + url_path_pos, url->l - url_path_pos + 1);
636 }
637 else {
638 ad->bucket = malloc(url->l - url_path_pos + bucket_len + 2);
639 if (ad->bucket == NULL) {
640 goto error;
641 }
642 ad->bucket[0] = '/';
643 memcpy(ad->bucket + 1, bucket, bucket_len);
644 memcpy(ad->bucket + bucket_len + 1,
645 url->s + url_path_pos, url->l - url_path_pos + 1);
646 }
647
648 // write any query strings to its own place to use later
649 if ((query_start = strchr(ad->bucket, '?'))) {
650 kputs(query_start + 1, &ad->user_query_string);
651 *query_start = 0;
652 }
653
654 free(profile.s);
655 free(escaped);
656
657 return ad;
658
659 error:
660 free(profile.s);
661 free(escaped);
662 free_auth_data(ad);
663 return NULL;
664 }
665
s3_rewrite(const char * s3url,const char * mode,va_list * argsp)666 static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp)
667 {
668 char *header_list[4], **header = header_list;
669
670 kstring_t url = { 0, 0, NULL };
671 kstring_t token_hdr = { 0, 0, NULL };
672 s3_auth_data *ad = setup_auth_data(s3url, mode, 2, &url);
673
674 if (!ad)
675 return NULL;
676
677 if (ad->token.l > 0) {
678 kputs("X-Amz-Security-Token: ", &token_hdr);
679 kputs(ad->token.s, &token_hdr);
680 *header++ = token_hdr.s;
681 }
682
683 *header = NULL;
684 hFILE *fp = hopen(url.s, mode, "va_list", argsp, "httphdr:v", header_list,
685 "httphdr_callback", auth_header_callback,
686 "httphdr_callback_data", ad,
687 "redirect_callback", redirect_endpoint_callback,
688 "redirect_callback_data", ad,
689 NULL);
690 if (!fp) goto fail;
691
692 free(url.s);
693 free(token_hdr.s);
694 return fp;
695
696 fail:
697 free(url.s);
698 free(token_hdr.s);
699 free_auth_data(ad);
700 return NULL;
701 }
702
703 /***************************************************************
704
705 AWS S3 sig version 4 writing code
706
707 ****************************************************************/
708
hash_string(char * in,size_t length,char * out)709 static void hash_string(char *in, size_t length, char *out) {
710 unsigned char hashed[SHA256_DIGEST_BUFSIZE];
711 int i, j;
712
713 s3_sha256((const unsigned char *)in, length, hashed);
714
715 for (i = 0, j = 0; i < SHA256_DIGEST_BUFSIZE; i++, j+= 2) {
716 sprintf(out + j, "%02x", hashed[i]);
717 }
718 }
719
ksinit(kstring_t * s)720 static void ksinit(kstring_t *s) {
721 s->l = 0;
722 s->m = 0;
723 s->s = NULL;
724 }
725
726
ksfree(kstring_t * s)727 static void ksfree(kstring_t *s) {
728 free(s->s);
729 ksinit(s);
730 }
731
732
make_signature(s3_auth_data * ad,kstring_t * string_to_sign,char * signature_string)733 static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *signature_string) {
734 unsigned char date_key[SHA256_DIGEST_BUFSIZE];
735 unsigned char date_region_key[SHA256_DIGEST_BUFSIZE];
736 unsigned char date_region_service_key[SHA256_DIGEST_BUFSIZE];
737 unsigned char signing_key[SHA256_DIGEST_BUFSIZE];
738 unsigned char signature[SHA256_DIGEST_BUFSIZE];
739
740 const unsigned char service[] = "s3";
741 const unsigned char request[] = "aws4_request";
742
743 kstring_t secret_access_key = {0, 0, NULL};
744 unsigned int len;
745 unsigned int i, j;
746
747 ksprintf(&secret_access_key, "AWS4%s", ad->secret.s);
748
749 if (secret_access_key.l == 0) {
750 return -1;
751 }
752
753 s3_sign_sha256(secret_access_key.s, secret_access_key.l, (const unsigned char *)ad->date_short, strlen(ad->date_short), date_key, &len);
754 s3_sign_sha256(date_key, len, (const unsigned char *)ad->region.s, ad->region.l, date_region_key, &len);
755 s3_sign_sha256(date_region_key, len, service, 2, date_region_service_key, &len);
756 s3_sign_sha256(date_region_service_key, len, request, 12, signing_key, &len);
757 s3_sign_sha256(signing_key, len, (const unsigned char *)string_to_sign->s, string_to_sign->l, signature, &len);
758
759 for (i = 0, j = 0; i < len; i++, j+= 2) {
760 sprintf(signature_string + j, "%02x", signature[i]);
761 }
762
763 ksfree(&secret_access_key);
764
765 return 0;
766 }
767
768
make_authorisation(s3_auth_data * ad,char * http_request,char * content,kstring_t * auth)769 static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) {
770 kstring_t signed_headers = {0, 0, NULL};
771 kstring_t canonical_headers = {0, 0, NULL};
772 kstring_t canonical_request = {0, 0, NULL};
773 kstring_t scope = {0, 0, NULL};
774 kstring_t string_to_sign = {0, 0, NULL};
775 char cr_hash[HASH_LENGTH_SHA256];
776 char signature_string[HASH_LENGTH_SHA256];
777 int ret = -1;
778
779
780 if (!ad->token.l) {
781 kputs("host;x-amz-content-sha256;x-amz-date", &signed_headers);
782 } else {
783 kputs("host;x-amz-content-sha256;x-amz-date;x-amz-security-token", &signed_headers);
784 }
785
786 if (signed_headers.l == 0) {
787 return -1;
788 }
789
790
791 if (!ad->token.l) {
792 ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\n",
793 ad->host.s, content, ad->date_long);
794 } else {
795 ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\nx-amz-security-token:%s\n",
796 ad->host.s, content, ad->date_long, ad->token.s);
797 }
798
799 if (canonical_headers.l == 0) {
800 goto cleanup;
801 }
802
803 // bucket == canonical_uri
804 ksprintf(&canonical_request, "%s\n%s\n%s\n%s\n%s\n%s",
805 http_request, ad->bucket, ad->canonical_query_string.s,
806 canonical_headers.s, signed_headers.s, content);
807
808 if (canonical_request.l == 0) {
809 goto cleanup;
810 }
811
812 hash_string(canonical_request.s, canonical_request.l, cr_hash);
813
814 ksprintf(&scope, "%s/%s/s3/aws4_request", ad->date_short, ad->region.s);
815
816 if (scope.l == 0) {
817 goto cleanup;
818 }
819
820 ksprintf(&string_to_sign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", ad->date_long, scope.s, cr_hash);
821
822 if (string_to_sign.l == 0) {
823 goto cleanup;
824 }
825
826 if (make_signature(ad, &string_to_sign, signature_string)) {
827 goto cleanup;
828 }
829
830 ksprintf(auth, "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,SignedHeaders=%s,Signature=%s",
831 ad->id.s, ad->date_short, ad->region.s, signed_headers.s, signature_string);
832
833 if (auth->l == 0) {
834 goto cleanup;
835 }
836
837 ret = 0;
838
839 cleanup:
840 ksfree(&signed_headers);
841 ksfree(&canonical_headers);
842 ksfree(&canonical_request);
843 ksfree(&scope);
844 ksfree(&string_to_sign);
845
846 return ret;
847 }
848
849
update_time(s3_auth_data * ad)850 static int update_time(s3_auth_data *ad) {
851 int ret = -1;
852 time_t now = time(NULL);
853 #ifdef HAVE_GMTIME_R
854 struct tm tm_buffer;
855 struct tm *tm = gmtime_r(&now, &tm_buffer);
856 #else
857 struct tm *tm = gmtime(&now);
858 #endif
859
860 if (now - ad->auth_time > AUTH_LIFETIME) {
861 // update timestamp
862 ad->auth_time = now;
863
864 if (strftime(ad->date_long, 17, "%Y%m%dT%H%M%SZ", tm) != 16) {
865 return -1;
866 }
867
868 if (strftime(ad->date_short, 9, "%Y%m%d", tm) != 8) {
869 return -1;;
870 }
871
872 ad->date_html.l = 0;
873 ksprintf(&ad->date_html, "x-amz-date: %s", ad->date_long);
874 }
875
876 if (ad->date_html.l) ret = 0;
877
878 return ret;
879 }
880
881
query_cmp(const void * p1,const void * p2)882 static int query_cmp(const void *p1, const void *p2) {
883 char **q1 = (char **)p1;
884 char **q2 = (char **)p2;
885
886 return strcmp(*q1, *q2);
887 }
888
889
890 /* Query strings must be in alphabetical order for authorisation */
891
order_query_string(kstring_t * qs)892 static int order_query_string(kstring_t *qs) {
893 int *query_offset = NULL;
894 int num_queries, i;
895 char **queries = NULL;
896 kstring_t ordered = {0, 0, NULL};
897 char *escaped = NULL;
898 int ret = -1;
899
900 if ((query_offset = ksplit(qs, '&', &num_queries)) == NULL) {
901 return -1;
902 }
903
904 if ((queries = malloc(num_queries * sizeof(char*))) == NULL)
905 goto err;
906
907 for (i = 0; i < num_queries; i++) {
908 queries[i] = qs->s + query_offset[i];
909 }
910
911 qsort(queries, num_queries, sizeof(char *), query_cmp);
912
913 for (i = 0; i < num_queries; i++) {
914 if (i) {
915 kputs("&", &ordered);
916 }
917
918 kputs(queries[i], &ordered);
919 }
920
921 if ((escaped = escape_query(ordered.s)) == NULL)
922 goto err;
923
924 qs->l = 0;
925 kputs(escaped, qs);
926
927 ret = 0;
928 err:
929 free(ordered.s);
930 free(queries);
931 free(query_offset);
932 free(escaped);
933
934 return ret;
935 }
936
937
write_authorisation_callback(void * auth,char * request,kstring_t * content,char * cqs,kstring_t * hash,kstring_t * auth_str,kstring_t * date,kstring_t * token,int uqs)938 static int write_authorisation_callback(void *auth, char *request, kstring_t *content, char *cqs,
939 kstring_t *hash, kstring_t *auth_str, kstring_t *date,
940 kstring_t *token, int uqs) {
941 s3_auth_data *ad = (s3_auth_data *)auth;
942 char content_hash[HASH_LENGTH_SHA256];
943
944 if (request == NULL) {
945 // signal to free auth data
946 free_auth_data(ad);
947 return 0;
948 }
949
950 if (update_time(ad)) {
951 return -1;
952 }
953
954 if (content) {
955 hash_string(content->s, content->l, content_hash);
956 } else {
957 // empty hash
958 hash_string("", 0, content_hash);
959 }
960
961 ad->canonical_query_string.l = 0;
962 kputs(cqs, &ad->canonical_query_string);
963
964 if (ad->canonical_query_string.l == 0) {
965 return -1;
966 }
967
968 /* add a user provided query string, normally only useful on upload initiation */
969 if (uqs) {
970 kputs("&", &ad->canonical_query_string);
971 kputs(ad->user_query_string.s, &ad->canonical_query_string);
972
973 if (order_query_string(&ad->canonical_query_string)) {
974 return -1;
975 }
976 }
977
978 if (make_authorisation(ad, request, content_hash, auth_str)) {
979 return -1;
980 }
981
982 kputs(ad->date_html.s, date);
983 kputsn(content_hash, HASH_LENGTH_SHA256, hash);
984
985 if (date->l == 0 || hash->l == 0) {
986 return -1;
987 }
988
989 if (ad->token.l) {
990 ksprintf(token, "x-amz-security-token: %s", ad->token.s);
991 }
992
993 return 0;
994 }
995
996
v4_auth_header_callback(void * ctx,char *** hdrs)997 static int v4_auth_header_callback(void *ctx, char ***hdrs) {
998 s3_auth_data *ad = (s3_auth_data *) ctx;
999 char content_hash[HASH_LENGTH_SHA256];
1000 kstring_t content = {0, 0, NULL};
1001 kstring_t authorisation = {0, 0, NULL};
1002 char *date_html = NULL;
1003
1004 if (!hdrs) { // Closing connection
1005 free_auth_data(ad);
1006 return 0;
1007 }
1008
1009 if (update_time(ad)) {
1010 return -1;
1011 }
1012
1013 hash_string("", 0, content_hash); // empty hash
1014
1015 ad->canonical_query_string.l = 0;
1016
1017 if (ad->user_query_string.l > 0) {
1018 kputs(ad->user_query_string.s, &ad->canonical_query_string);
1019
1020 if (order_query_string(&ad->canonical_query_string)) {
1021 return -1;
1022 }
1023 } else {
1024 kputs("", &ad->canonical_query_string);
1025 }
1026
1027 if (make_authorisation(ad, "GET", content_hash, &authorisation)) {
1028 return -1;
1029 }
1030
1031 ksprintf(&content, "x-amz-content-sha256: %s", content_hash);
1032 date_html = strdup(ad->date_html.s);
1033
1034 if (content.l == 0 || date_html == NULL) {
1035 ksfree(&authorisation);
1036 ksfree(&content);
1037 free(date_html);
1038 return -1;
1039 }
1040
1041 *hdrs = &ad->headers[0];
1042 ad->headers[0] = ks_release(&authorisation);
1043 ad->headers[1] = date_html;
1044 ad->headers[2] = ks_release(&content);
1045 ad->headers[3] = NULL;
1046
1047 return 0;
1048 }
1049
handle_400_response(hFILE * fp,s3_auth_data * ad)1050 static int handle_400_response(hFILE *fp, s3_auth_data *ad) {
1051 // v4 signatures in virtual hosted mode return 400 Bad Request if the
1052 // wrong region is used to make the signature. The response is an xml
1053 // document which includes the name of the correct region. This can
1054 // be extracted and used to generate a corrected signature.
1055 // As the xml is fairly simple, go with something "good enough" instead
1056 // of trying to parse it properly.
1057
1058 char buffer[1024], *region, *reg_end;
1059 ssize_t bytes;
1060
1061 bytes = hread(fp, buffer, sizeof(buffer) - 1);
1062 if (bytes < 0) {
1063 return -1;
1064 }
1065 buffer[bytes] = '\0';
1066 region = strstr(buffer, "<Region>");
1067 if (region == NULL) {
1068 return -1;
1069 }
1070 region += 8;
1071 while (isspace((unsigned char) *region)) ++region;
1072 reg_end = strchr(region, '<');
1073 if (reg_end == NULL || strncmp(reg_end + 1, "/Region>", 8) != 0) {
1074 return -1;
1075 }
1076 while (reg_end > region && isspace((unsigned char) reg_end[-1])) --reg_end;
1077 ad->region.l = 0;
1078 kputsn(region, reg_end - region, &ad->region);
1079 if (ad->region.l == 0) {
1080 return -1;
1081 }
1082
1083 return 0;
1084 }
1085
set_region(void * adv,kstring_t * region)1086 static int set_region(void *adv, kstring_t *region) {
1087 s3_auth_data *ad = (s3_auth_data *) adv;
1088
1089 ad->region.l = 0;
1090 return kputsn(region->s, region->l, &ad->region) < 0;
1091 }
1092
http_status_errno(int status)1093 static int http_status_errno(int status)
1094 {
1095 if (status >= 500)
1096 switch (status) {
1097 case 501: return ENOSYS;
1098 case 503: return EBUSY;
1099 case 504: return ETIMEDOUT;
1100 default: return EIO;
1101 }
1102 else if (status >= 400)
1103 switch (status) {
1104 case 401: return EPERM;
1105 case 403: return EACCES;
1106 case 404: return ENOENT;
1107 case 405: return EROFS;
1108 case 407: return EPERM;
1109 case 408: return ETIMEDOUT;
1110 case 410: return ENOENT;
1111 default: return EINVAL;
1112 }
1113 else return 0;
1114 }
1115
s3_open_v4(const char * s3url,const char * mode,va_list * argsp)1116 static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) {
1117 kstring_t url = { 0, 0, NULL };
1118 kstring_t token_hdr = { 0, 0, NULL };
1119
1120 char *header_list[4], **header = header_list;
1121 s3_auth_data *ad = setup_auth_data(s3url, mode, 4, &url);
1122 hFILE *fp = NULL;
1123
1124 if (ad == NULL) {
1125 return NULL;
1126 }
1127
1128 if (ad->mode == 'r') {
1129 long http_response = 0;
1130
1131 if (ad->token.l > 0) {
1132 kputs("x-amz-security-token: ", &token_hdr);
1133 kputs(ad->token.s, &token_hdr);
1134 *header++ = token_hdr.s;
1135 }
1136
1137 *header = NULL;
1138 fp = hopen(url.s, mode, "va_list", argsp, "httphdr:v", header_list,
1139 "httphdr_callback", v4_auth_header_callback,
1140 "httphdr_callback_data", ad,
1141 "redirect_callback", redirect_endpoint_callback,
1142 "redirect_callback_data", ad,
1143 "http_response_ptr", &http_response,
1144 "fail_on_error", 0,
1145 NULL);
1146
1147 if (fp == NULL) goto error;
1148
1149 if (http_response == 400) {
1150 ad->refcount = 1;
1151 if (handle_400_response(fp, ad) != 0) {
1152 goto error;
1153 }
1154 hclose_abruptly(fp);
1155 fp = hopen(url.s, mode, "va_list", argsp, "httphdr:v", header_list,
1156 "httphdr_callback", v4_auth_header_callback,
1157 "httphdr_callback_data", ad,
1158 "redirect_callback", redirect_endpoint_callback,
1159 "redirect_callback_data", ad,
1160 NULL);
1161 } else if (http_response > 400) {
1162 ad->refcount = 1;
1163 errno = http_status_errno(http_response);
1164 goto error;
1165 }
1166
1167 if (fp == NULL) goto error;
1168 } else {
1169 kstring_t final_url = {0, 0, NULL};
1170
1171 // add the scheme marker
1172 ksprintf(&final_url, "s3w+%s", url.s);
1173
1174 if(final_url.l == 0) goto error;
1175
1176 fp = hopen(final_url.s, mode, "va_list", argsp,
1177 "s3_auth_callback", write_authorisation_callback,
1178 "s3_auth_callback_data", ad,
1179 "redirect_callback", redirect_endpoint_callback,
1180 "set_region_callback", set_region,
1181 NULL);
1182 free(final_url.s);
1183
1184 if (fp == NULL) goto error;
1185 }
1186
1187 free(url.s);
1188 free(token_hdr.s);
1189
1190 return fp;
1191
1192 error:
1193
1194 if (fp) hclose_abruptly(fp);
1195 free(url.s);
1196 free(token_hdr.s);
1197 free_auth_data(ad);
1198
1199 return NULL;
1200 }
1201
1202
s3_open(const char * url,const char * mode)1203 static hFILE *s3_open(const char *url, const char *mode)
1204 {
1205 hFILE *fp;
1206
1207 kstring_t mode_colon = { 0, 0, NULL };
1208 kputs(mode, &mode_colon);
1209 kputc(':', &mode_colon);
1210
1211 if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code
1212 fp = s3_open_v4(url, mode_colon.s, NULL);
1213 } else {
1214 fp = s3_rewrite(url, mode_colon.s, NULL);
1215 }
1216
1217 free(mode_colon.s);
1218
1219 return fp;
1220 }
1221
s3_vopen(const char * url,const char * mode_colon,va_list args0)1222 static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0)
1223 {
1224 hFILE *fp;
1225 // Need to use va_copy() as we can only take the address of an actual
1226 // va_list object, not that of a parameter whose type may have decayed.
1227 va_list args;
1228 va_copy(args, args0);
1229
1230 if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code
1231 fp = s3_open_v4(url, mode_colon, &args);
1232 } else {
1233 fp = s3_rewrite(url, mode_colon, &args);
1234 }
1235
1236 va_end(args);
1237 return fp;
1238 }
1239
PLUGIN_GLOBAL(hfile_plugin_init,_s3)1240 int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self)
1241 {
1242 static const struct hFILE_scheme_handler handler =
1243 { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen
1244 };
1245
1246 #ifdef ENABLE_PLUGINS
1247 // Embed version string for examination via strings(1) or what(1)
1248 static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION_TEXT;
1249 if (hts_verbose >= 9)
1250 fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1);
1251 #endif
1252
1253 self->name = "Amazon S3";
1254 hfile_add_scheme_handler("s3", &handler);
1255 hfile_add_scheme_handler("s3+http", &handler);
1256 hfile_add_scheme_handler("s3+https", &handler);
1257 return 0;
1258 }
1259