1 /* hfile_libcurl.c -- libcurl backend for low-level file streams.
2
3 Copyright (C) 2015-2017 Genome Research Ltd.
4
5 Author: John Marshall <jm18@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24
25 #include <config.h>
26
27 #include <stdarg.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <errno.h>
31 #include <pthread.h>
32 #ifndef _WIN32
33 # include <sys/select.h>
34 #endif
35 #include <assert.h>
36
37 #include "hfile_internal.h"
38 #ifdef ENABLE_PLUGINS
39 #include "version.h"
40 #endif
41 #include "htslib/hts.h" // for hts_version() and hts_verbose
42 #include "htslib/kstring.h"
43 #include "htslib/khash.h"
44
45 #include <curl/curl.h>
46
47 // Number of seconds to take off auth_token expiry, to allow for clock skew
48 // and slow servers
49 #define AUTH_REFRESH_EARLY_SECS 60
50
51 // Minimum number of bytes to skip when seeking forward. Seeks less than
52 // this will just read the data and throw it away. The optimal value
53 // depends on how long it takes to make a new connection compared
54 // to how fast the data arrives.
55 #define MIN_SEEK_FORWARD 1000000
56
57 typedef struct {
58 char *path;
59 char *token;
60 time_t expiry;
61 int failed;
62 pthread_mutex_t lock;
63 } auth_token;
64
65 // For the authorization header cache
66 KHASH_MAP_INIT_STR(auth_map, auth_token *)
67
68 // Curl-compatible header linked list
69 typedef struct {
70 struct curl_slist *list;
71 unsigned int num;
72 unsigned int size;
73 } hdrlist;
74
75 typedef struct {
76 hdrlist fixed; // List of headers supplied at hopen()
77 hdrlist extra; // List of headers from callback
78 hts_httphdr_callback callback; // Callback to get more headers
79 void *callback_data; // Data to pass to callback
80 auth_token *auth; // Authentication token
81 int auth_hdr_num; // Location of auth_token in hdrlist extra
82 // If -1, Authorization header is in fixed
83 // -2, it came from the callback
84 // -3, "auth_token_enabled", "false"
85 // passed to hopen()
86 } http_headers;
87
88 typedef struct {
89 hFILE base;
90 CURL *easy;
91 CURLM *multi;
92 off_t file_size;
93 struct {
94 union { char *rd; const char *wr; } ptr;
95 size_t len;
96 } buffer;
97 CURLcode final_result; // easy result code for finished transfers
98 // Flags for communicating with libcurl callbacks:
99 unsigned paused : 1; // callback tells us that it has paused transfer
100 unsigned closing : 1; // informs callback that hclose() has been invoked
101 unsigned finished : 1; // wait_perform() tells us transfer is complete
102 unsigned perform_again : 1;
103 unsigned is_read : 1; // Opened in read mode
104 unsigned can_seek : 1; // Can (attempt to) seek on this handle
105 unsigned is_recursive:1; // Opened by hfile_libcurl itself
106 unsigned tried_seek : 1; // At least one seek has been attempted
107 int nrunning;
108 http_headers headers;
109 off_t delayed_seek; // Location to seek to before reading
110 off_t last_offset; // Location we're seeking from
111 } hFILE_libcurl;
112
113 static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence);
114 static int restart_from_position(hFILE_libcurl *fp, off_t pos);
115
http_status_errno(int status)116 static int http_status_errno(int status)
117 {
118 if (status >= 500)
119 switch (status) {
120 case 501: return ENOSYS;
121 case 503: return EBUSY;
122 case 504: return ETIMEDOUT;
123 default: return EIO;
124 }
125 else if (status >= 400)
126 switch (status) {
127 case 401: return EPERM;
128 case 403: return EACCES;
129 case 404: return ENOENT;
130 case 405: return EROFS;
131 case 407: return EPERM;
132 case 408: return ETIMEDOUT;
133 case 410: return ENOENT;
134 default: return EINVAL;
135 }
136 else return 0;
137 }
138
easy_errno(CURL * easy,CURLcode err)139 static int easy_errno(CURL *easy, CURLcode err)
140 {
141 long lval;
142
143 switch (err) {
144 case CURLE_OK:
145 return 0;
146
147 case CURLE_UNSUPPORTED_PROTOCOL:
148 case CURLE_URL_MALFORMAT:
149 return EINVAL;
150
151 #if LIBCURL_VERSION_NUM >= 0x071505
152 case CURLE_NOT_BUILT_IN:
153 return ENOSYS;
154 #endif
155
156 case CURLE_COULDNT_RESOLVE_PROXY:
157 case CURLE_COULDNT_RESOLVE_HOST:
158 case CURLE_FTP_CANT_GET_HOST:
159 return EDESTADDRREQ; // Lookup failure
160
161 case CURLE_COULDNT_CONNECT:
162 case CURLE_SEND_ERROR:
163 case CURLE_RECV_ERROR:
164 if (curl_easy_getinfo(easy, CURLINFO_OS_ERRNO, &lval) == CURLE_OK)
165 return lval;
166 else
167 return ECONNABORTED;
168
169 case CURLE_REMOTE_ACCESS_DENIED:
170 case CURLE_LOGIN_DENIED:
171 case CURLE_TFTP_PERM:
172 return EACCES;
173
174 case CURLE_PARTIAL_FILE:
175 return EPIPE;
176
177 case CURLE_HTTP_RETURNED_ERROR:
178 if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &lval) == CURLE_OK)
179 return http_status_errno(lval);
180 else
181 return EIO;
182
183 case CURLE_OUT_OF_MEMORY:
184 return ENOMEM;
185
186 case CURLE_OPERATION_TIMEDOUT:
187 return ETIMEDOUT;
188
189 case CURLE_RANGE_ERROR:
190 return ESPIPE;
191
192 case CURLE_SSL_CONNECT_ERROR:
193 // TODO return SSL error buffer messages
194 return ECONNABORTED;
195
196 case CURLE_FILE_COULDNT_READ_FILE:
197 case CURLE_TFTP_NOTFOUND:
198 return ENOENT;
199
200 case CURLE_TOO_MANY_REDIRECTS:
201 return ELOOP;
202
203 case CURLE_FILESIZE_EXCEEDED:
204 return EFBIG;
205
206 case CURLE_REMOTE_DISK_FULL:
207 return ENOSPC;
208
209 case CURLE_REMOTE_FILE_EXISTS:
210 return EEXIST;
211
212 default:
213 return EIO;
214 }
215 }
216
multi_errno(CURLMcode errm)217 static int multi_errno(CURLMcode errm)
218 {
219 switch (errm) {
220 case CURLM_CALL_MULTI_PERFORM:
221 case CURLM_OK:
222 return 0;
223
224 case CURLM_BAD_HANDLE:
225 case CURLM_BAD_EASY_HANDLE:
226 case CURLM_BAD_SOCKET:
227 return EBADF;
228
229 case CURLM_OUT_OF_MEMORY:
230 return ENOMEM;
231
232 default:
233 return EIO;
234 }
235 }
236
237 static struct {
238 kstring_t useragent;
239 CURLSH *share;
240 char *auth_path;
241 khash_t(auth_map) *auth_map;
242 int allow_unencrypted_auth_header;
243 pthread_mutex_t auth_lock;
244 pthread_mutex_t share_lock;
245 } curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, PTHREAD_MUTEX_INITIALIZER,
246 PTHREAD_MUTEX_INITIALIZER };
247
share_lock(CURL * handle,curl_lock_data data,curl_lock_access access,void * userptr)248 static void share_lock(CURL *handle, curl_lock_data data,
249 curl_lock_access access, void *userptr) {
250 pthread_mutex_lock(&curl.share_lock);
251 }
252
share_unlock(CURL * handle,curl_lock_data data,void * userptr)253 static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
254 pthread_mutex_unlock(&curl.share_lock);
255 }
256
free_auth(auth_token * tok)257 static void free_auth(auth_token *tok) {
258 if (!tok) return;
259 if (pthread_mutex_destroy(&tok->lock)) abort();
260 free(tok->path);
261 free(tok->token);
262 free(tok);
263 }
264
libcurl_exit()265 static void libcurl_exit()
266 {
267 if (curl_share_cleanup(curl.share) == CURLSHE_OK)
268 curl.share = NULL;
269
270 free(curl.useragent.s);
271 curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
272
273 free(curl.auth_path);
274 curl.auth_path = NULL;
275
276 if (curl.auth_map) {
277 khiter_t i;
278 for (i = kh_begin(curl.auth_map); i != kh_end(curl.auth_map); ++i) {
279 if (kh_exist(curl.auth_map, i)) {
280 free_auth(kh_value(curl.auth_map, i));
281 kh_key(curl.auth_map, i) = NULL;
282 kh_value(curl.auth_map, i) = NULL;
283 }
284 }
285 kh_destroy(auth_map, curl.auth_map);
286 curl.auth_map = NULL;
287 }
288
289 curl_global_cleanup();
290 }
291
append_header(hdrlist * hdrs,const char * data,int dup)292 static int append_header(hdrlist *hdrs, const char *data, int dup) {
293 if (hdrs->num == hdrs->size) {
294 unsigned int new_sz = hdrs->size ? hdrs->size * 2 : 4, i;
295 struct curl_slist *new_list = realloc(hdrs->list,
296 new_sz * sizeof(*new_list));
297 if (!new_list) return -1;
298 hdrs->size = new_sz;
299 hdrs->list = new_list;
300 for (i = 1; i < hdrs->num; i++) hdrs->list[i-1].next = &hdrs->list[i];
301 }
302 // Annoyingly, libcurl doesn't declare the char * as const...
303 hdrs->list[hdrs->num].data = dup ? strdup(data) : (char *) data;
304 if (!hdrs->list[hdrs->num].data) return -1;
305 if (hdrs->num > 0) hdrs->list[hdrs->num - 1].next = &hdrs->list[hdrs->num];
306 hdrs->list[hdrs->num].next = NULL;
307 hdrs->num++;
308 return 0;
309 }
310
free_headers(hdrlist * hdrs,int completely)311 static void free_headers(hdrlist *hdrs, int completely) {
312 unsigned int i;
313 for (i = 0; i < hdrs->num; i++) {
314 free(hdrs->list[i].data);
315 hdrs->list[i].data = NULL;
316 hdrs->list[i].next = NULL;
317 }
318 hdrs->num = 0;
319 if (completely) {
320 free(hdrs->list);
321 hdrs->size = 0;
322 hdrs->list = NULL;
323 }
324 }
325
get_header_list(hFILE_libcurl * fp)326 static struct curl_slist * get_header_list(hFILE_libcurl *fp) {
327 if (fp->headers.fixed.num > 0)
328 return &fp->headers.fixed.list[0];
329 if (fp->headers.extra.num > 0)
330 return &fp->headers.extra.list[0];
331 return 0;
332 }
333
is_authorization(const char * hdr)334 static inline int is_authorization(const char *hdr) {
335 return (strncasecmp("authorization:", hdr, 14) == 0);
336 }
337
add_callback_headers(hFILE_libcurl * fp)338 static int add_callback_headers(hFILE_libcurl *fp) {
339 char **hdrs = NULL, **hdr;
340
341 if (!fp->headers.callback)
342 return 0;
343
344 // Get the headers from the callback
345 if (fp->headers.callback(fp->headers.callback_data, &hdrs) != 0) {
346 return -1;
347 }
348
349 if (!hdrs) // No change
350 return 0;
351
352 // Remove any old callback headers
353 if (fp->headers.fixed.num > 0) {
354 // Unlink lists
355 fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
356 }
357 free_headers(&fp->headers.extra, 0);
358
359 if (fp->headers.auth_hdr_num > 0 || fp->headers.auth_hdr_num == -2)
360 fp->headers.auth_hdr_num = 0; // Just removed it...
361
362 // Convert to libcurl-suitable form
363 for (hdr = hdrs; *hdr; hdr++) {
364 if (append_header(&fp->headers.extra, *hdr, 0) < 0) {
365 goto cleanup;
366 }
367 if (is_authorization(*hdr) && !fp->headers.auth_hdr_num)
368 fp->headers.auth_hdr_num = -2;
369 }
370 for (hdr = hdrs; *hdr; hdr++) *hdr = NULL;
371
372 if (fp->headers.fixed.num > 0 && fp->headers.extra.num > 0) {
373 // Relink lists
374 fp->headers.fixed.list[fp->headers.fixed.num - 1].next
375 = &fp->headers.extra.list[0];
376 }
377 return 0;
378
379 cleanup:
380 while (hdr && *hdr) {
381 free(*hdr);
382 *hdr = NULL;
383 }
384 return -1;
385 }
386
387 /*
388 * Read an OAUTH2-style Bearer access token (see
389 * https://tools.ietf.org/html/rfc6750#section-4).
390 * Returns 'v' for valid; 'i' for invalid (token missing or wrong sort);
391 * '?' for a JSON parse error; 'm' if it runs out of memory.
392 */
read_auth_json(auth_token * tok,hFILE * auth_fp)393 static int read_auth_json(auth_token *tok, hFILE *auth_fp) {
394 hts_json_token *t = hts_json_alloc_token();
395 kstring_t str = {0, 0, NULL};
396 char *token = NULL, *type = NULL, *expiry = NULL;
397 int ret = 'i';
398
399 if (!t) goto error;
400
401 if ((ret = hts_json_fnext(auth_fp, t, &str)) != '{') goto error;
402 while (hts_json_fnext(auth_fp, t, &str) != '}') {
403 char *key;
404 if (hts_json_token_type(t) != 's') {
405 ret = '?';
406 goto error;
407 }
408 key = hts_json_token_str(t);
409 if (!key) goto error;
410 if (strcmp(key, "access_token") == 0) {
411 if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
412 token = ks_release(&str);
413 } else if (strcmp(key, "token_type") == 0) {
414 if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
415 type = ks_release(&str);
416 } else if (strcmp(key, "expires_in") == 0) {
417 if ((ret = hts_json_fnext(auth_fp, t, &str)) != 'n') goto error;
418 expiry = ks_release(&str);
419 } else if (hts_json_fskip_value(auth_fp, '\0') != 'v') {
420 ret = '?';
421 goto error;
422 }
423 }
424
425 if (!token || (type && strcmp(type, "Bearer") != 0)) {
426 ret = 'i';
427 goto error;
428 }
429
430 ret = 'm';
431 str.l = 0;
432 if (kputs("Authorization: Bearer ", &str) < 0) goto error;
433 if (kputs(token, &str) < 0) goto error;
434 free(tok->token);
435 tok->token = ks_release(&str);
436 if (expiry) {
437 long exp = strtol(expiry, NULL, 10);
438 if (exp < 0) exp = 0;
439 tok->expiry = time(NULL) + exp;
440 } else {
441 tok->expiry = 0;
442 }
443 ret = 'v';
444
445 error:
446 free(token);
447 free(type);
448 free(expiry);
449 free(str.s);
450 hts_json_free_token(t);
451 return ret;
452 }
453
read_auth_plain(auth_token * tok,hFILE * auth_fp)454 static int read_auth_plain(auth_token *tok, hFILE *auth_fp) {
455 kstring_t line = {0, 0, NULL};
456 kstring_t token = {0, 0, NULL};
457 const char *start, *end;
458
459 if (kgetline(&line, (char * (*)(char *, int, void *)) hgets, auth_fp) < 0) goto error;
460 if (kputc('\0', &line) < 0) goto error;
461
462 for (start = line.s; *start && isspace_c(*start); start++) {}
463 for (end = start; *end && !isspace_c(*end); end++) {}
464
465 if (end > start) {
466 if (kputs("Authorization: Bearer ", &token) < 0) goto error;
467 if (kputsn(start, end - start, &token) < 0) goto error;
468 }
469
470 free(tok->token);
471 tok->token = ks_release(&token);
472 tok->expiry = 0;
473 free(line.s);
474 return 0;
475
476 error:
477 free(line.s);
478 free(token.s);
479 return -1;
480 }
481
renew_auth_token(auth_token * tok,int * changed)482 static int renew_auth_token(auth_token *tok, int *changed) {
483 hFILE *auth_fp = NULL;
484 char buffer[16];
485 ssize_t len;
486
487 *changed = 0;
488 if (tok->expiry == 0 || time(NULL) + AUTH_REFRESH_EARLY_SECS < tok->expiry)
489 return 0; // Still valid
490
491 if (tok->failed)
492 return -1;
493
494 *changed = 1;
495 auth_fp = hopen(tok->path, "rR");
496 if (!auth_fp) {
497 // Not worried about missing files; other errors are bad.
498 if (errno != ENOENT)
499 goto fail;
500
501 tok->expiry = 0; // Prevent retry
502 free(tok->token); // Just in case it was set
503 return 0;
504 }
505
506 len = hpeek(auth_fp, buffer, sizeof(buffer));
507 if (len < 0)
508 goto fail;
509
510 if (memchr(buffer, '{', len) != NULL) {
511 if (read_auth_json(tok, auth_fp) != 'v')
512 goto fail;
513 } else {
514 if (read_auth_plain(tok, auth_fp) < 0)
515 goto fail;
516 }
517
518 return hclose(auth_fp) < 0 ? -1 : 0;
519
520 fail:
521 tok->failed = 1;
522 if (auth_fp) hclose_abruptly(auth_fp);
523 return -1;
524 }
525
add_auth_header(hFILE_libcurl * fp)526 static int add_auth_header(hFILE_libcurl *fp) {
527 int changed = 0;
528
529 if (fp->headers.auth_hdr_num < 0)
530 return 0; // Have an Authorization header from open or header callback
531
532 if (!fp->headers.auth)
533 return 0; // Nothing to add
534
535 pthread_mutex_lock(&fp->headers.auth->lock);
536 if (renew_auth_token(fp->headers.auth, &changed) < 0)
537 goto unlock_fail;
538
539 if (!changed && fp->headers.auth_hdr_num > 0) {
540 pthread_mutex_unlock(&fp->headers.auth->lock);
541 return 0;
542 }
543
544 if (fp->headers.auth_hdr_num > 0) {
545 // Had a previous header, so swap in the new one
546 char *header = fp->headers.auth->token;
547 char *header_copy = header ? strdup(header) : NULL;
548 int idx = fp->headers.auth_hdr_num - 1;
549 if (header && !header_copy)
550 goto unlock_fail;
551
552 if (header_copy) {
553 free(fp->headers.extra.list[idx].data);
554 fp->headers.extra.list[idx].data = header_copy;
555 } else {
556 unsigned int j;
557 // More complicated case - need to get rid of the old header
558 // and tidy up linked lists
559 free(fp->headers.extra.list[idx].data);
560 for (j = idx + 1; j < fp->headers.extra.num; j++) {
561 fp->headers.extra.list[j - 1] = fp->headers.extra.list[j];
562 fp->headers.extra.list[j - 1].next = &fp->headers.extra.list[j];
563 }
564 fp->headers.extra.num--;
565 if (fp->headers.extra.num > 0) {
566 fp->headers.extra.list[fp->headers.extra.num-1].next = NULL;
567 } else if (fp->headers.fixed.num > 0) {
568 fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
569 }
570 fp->headers.auth_hdr_num = 0;
571 }
572 } else if (fp->headers.auth->token) {
573 // Add new header and remember where it is
574 if (append_header(&fp->headers.extra,
575 fp->headers.auth->token, 1) < 0) {
576 goto unlock_fail;
577 }
578 fp->headers.auth_hdr_num = fp->headers.extra.num;
579 }
580
581 pthread_mutex_unlock(&fp->headers.auth->lock);
582 return 0;
583
584 unlock_fail:
585 pthread_mutex_unlock(&fp->headers.auth->lock);
586 return -1;
587 }
588
get_auth_token(hFILE_libcurl * fp,const char * url)589 static int get_auth_token(hFILE_libcurl *fp, const char *url) {
590 const char *host = NULL, *p, *q;
591 kstring_t name = {0, 0, NULL};
592 size_t host_len = 0;
593 khiter_t idx;
594 auth_token *tok = NULL;
595
596 // Nothing to do if:
597 // curl.auth_path has not been set
598 // fp was made by hfile_libcurl (e.g. auth_path is a http:// url)
599 // we already have an Authorization header
600 if (!curl.auth_path || fp->is_recursive || fp->headers.auth_hdr_num != 0)
601 return 0;
602
603 // Insist on having a secure connection unless the user insists harder
604 if (!curl.allow_unencrypted_auth_header && strncmp(url, "https://", 8) != 0)
605 return 0;
606
607 host = strstr(url, "://");
608 if (host) {
609 host += 3;
610 host_len = strcspn(host, "/");
611 }
612
613 p = curl.auth_path;
614 while ((q = strstr(p, "%h")) != NULL) {
615 if (q - p > INT_MAX || host_len > INT_MAX) goto error;
616 if (kputsn_(p, q - p, &name) < 0) goto error;
617 if (kputsn_(host, host_len, &name) < 0) goto error;
618 p = q + 2;
619 }
620 if (kputs(p, &name) < 0) goto error;
621
622 pthread_mutex_lock(&curl.auth_lock);
623 idx = kh_get(auth_map, curl.auth_map, name.s);
624 if (idx < kh_end(curl.auth_map)) {
625 tok = kh_value(curl.auth_map, idx);
626 } else {
627 tok = calloc(1, sizeof(*tok));
628 if (tok && pthread_mutex_init(&tok->lock, NULL) != 0) {
629 free(tok);
630 tok = NULL;
631 }
632 if (tok) {
633 int ret = -1;
634 tok->path = ks_release(&name);
635 tok->token = NULL;
636 tok->expiry = 1; // Force refresh
637 idx = kh_put(auth_map, curl.auth_map, tok->path, &ret);
638 if (ret < 0) {
639 free_auth(tok);
640 tok = NULL;
641 }
642 kh_value(curl.auth_map, idx) = tok;
643 }
644 }
645 pthread_mutex_unlock(&curl.auth_lock);
646
647 fp->headers.auth = tok;
648 free(name.s);
649
650 return add_auth_header(fp);
651
652 error:
653 free(name.s);
654 return -1;
655 }
656
process_messages(hFILE_libcurl * fp)657 static void process_messages(hFILE_libcurl *fp)
658 {
659 CURLMsg *msg;
660 int remaining;
661
662 while ((msg = curl_multi_info_read(fp->multi, &remaining)) != NULL) {
663 switch (msg->msg) {
664 case CURLMSG_DONE:
665 fp->finished = 1;
666 fp->final_result = msg->data.result;
667 break;
668
669 default:
670 break;
671 }
672 }
673 }
674
wait_perform(hFILE_libcurl * fp)675 static int wait_perform(hFILE_libcurl *fp)
676 {
677 fd_set rd, wr, ex;
678 int maxfd, nrunning;
679 long timeout;
680 CURLMcode errm;
681
682 if (!fp->perform_again) {
683 FD_ZERO(&rd);
684 FD_ZERO(&wr);
685 FD_ZERO(&ex);
686 if (curl_multi_fdset(fp->multi, &rd, &wr, &ex, &maxfd) != CURLM_OK)
687 maxfd = -1, timeout = 1000;
688 else {
689 if (curl_multi_timeout(fp->multi, &timeout) != CURLM_OK)
690 timeout = 1000;
691 else if (timeout < 0) {
692 timeout = 10000; // as recommended by curl_multi_timeout(3)
693 }
694 }
695 if (maxfd < 0 && timeout > 100)
696 timeout = 100; // as recommended by curl_multi_fdset(3)
697
698 if (timeout > 0) {
699 struct timeval tval;
700 tval.tv_sec = (timeout / 1000);
701 tval.tv_usec = (timeout % 1000) * 1000;
702
703 if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1;
704 }
705 }
706
707 errm = curl_multi_perform(fp->multi, &nrunning);
708 fp->perform_again = 0;
709 if (errm == CURLM_CALL_MULTI_PERFORM) fp->perform_again = 1;
710 else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
711
712 if (nrunning < fp->nrunning) process_messages(fp);
713 return 0;
714 }
715
716
recv_callback(char * ptr,size_t size,size_t nmemb,void * fpv)717 static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
718 {
719 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
720 size_t n = size * nmemb;
721
722 if (n > fp->buffer.len) { fp->paused = 1; return CURL_WRITEFUNC_PAUSE; }
723 else if (n == 0) return 0;
724
725 memcpy(fp->buffer.ptr.rd, ptr, n);
726 fp->buffer.ptr.rd += n;
727 fp->buffer.len -= n;
728 return n;
729 }
730
libcurl_read(hFILE * fpv,void * bufferv,size_t nbytes)731 static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes)
732 {
733 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
734 char *buffer = (char *) bufferv;
735 off_t to_skip = -1;
736 ssize_t got = 0;
737 CURLcode err;
738
739 if (fp->delayed_seek >= 0) {
740 assert(fp->base.offset == fp->delayed_seek
741 && fp->base.begin == fp->base.buffer
742 && fp->base.end == fp->base.buffer);
743
744 if (fp->last_offset >= 0
745 && fp->delayed_seek > fp->last_offset
746 && fp->delayed_seek - fp->last_offset < MIN_SEEK_FORWARD) {
747 // If not seeking far, just read the data and throw it away. This
748 // is likely to be quicker than opening a new stream
749 to_skip = fp->delayed_seek - fp->last_offset;
750 } else {
751 if (restart_from_position(fp, fp->delayed_seek) < 0) {
752 return -1;
753 }
754 }
755 fp->delayed_seek = -1;
756 fp->last_offset = -1;
757 }
758
759 do {
760 fp->buffer.ptr.rd = buffer;
761 fp->buffer.len = nbytes;
762 fp->paused = 0;
763 err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
764 if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
765
766 while (! fp->paused && ! fp->finished)
767 if (wait_perform(fp) < 0) return -1;
768
769 got = fp->buffer.ptr.rd - buffer;
770
771 if (to_skip >= 0) { // Skipping over a small seek
772 if (got < to_skip) { // Need to skip more data
773 to_skip -= got;
774 } else {
775 got -= to_skip;
776 if (got > 0) { // If enough was skipped, return the rest
777 memmove(buffer, buffer + to_skip, got);
778 to_skip = -1;
779 }
780 }
781 }
782 } while (to_skip >= 0 && ! fp->finished);
783 fp->buffer.ptr.rd = NULL;
784 fp->buffer.len = 0;
785
786 if (fp->finished && fp->final_result != CURLE_OK) {
787 errno = easy_errno(fp->easy, fp->final_result);
788 return -1;
789 }
790
791 return got;
792 }
793
send_callback(char * ptr,size_t size,size_t nmemb,void * fpv)794 static size_t send_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
795 {
796 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
797 size_t n = size * nmemb;
798
799 if (fp->buffer.len == 0) {
800 // Send buffer is empty; normally pause, or signal EOF if we're closing
801 if (fp->closing) return 0;
802 else { fp->paused = 1; return CURL_READFUNC_PAUSE; }
803 }
804
805 if (n > fp->buffer.len) n = fp->buffer.len;
806 memcpy(ptr, fp->buffer.ptr.wr, n);
807 fp->buffer.ptr.wr += n;
808 fp->buffer.len -= n;
809 return n;
810 }
811
libcurl_write(hFILE * fpv,const void * bufferv,size_t nbytes)812 static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes)
813 {
814 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
815 const char *buffer = (const char *) bufferv;
816 CURLcode err;
817
818 fp->buffer.ptr.wr = buffer;
819 fp->buffer.len = nbytes;
820 fp->paused = 0;
821 err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
822 if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
823
824 while (! fp->paused && ! fp->finished)
825 if (wait_perform(fp) < 0) return -1;
826
827 nbytes = fp->buffer.ptr.wr - buffer;
828 fp->buffer.ptr.wr = NULL;
829 fp->buffer.len = 0;
830
831 if (fp->finished && fp->final_result != CURLE_OK) {
832 errno = easy_errno(fp->easy, fp->final_result);
833 return -1;
834 }
835
836 return nbytes;
837 }
838
libcurl_seek(hFILE * fpv,off_t offset,int whence)839 static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence)
840 {
841 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
842 off_t origin, pos;
843
844 if (!fp->is_read || !fp->can_seek) {
845 // Cowardly refuse to seek when writing or a previous seek failed.
846 errno = ESPIPE;
847 return -1;
848 }
849
850 switch (whence) {
851 case SEEK_SET:
852 origin = 0;
853 break;
854 case SEEK_CUR:
855 errno = ENOSYS;
856 return -1;
857 case SEEK_END:
858 if (fp->file_size < 0) { errno = ESPIPE; return -1; }
859 origin = fp->file_size;
860 break;
861 default:
862 errno = EINVAL;
863 return -1;
864 }
865
866 // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
867 if ((offset < 0)? origin + offset < 0
868 : (fp->file_size >= 0 && offset > fp->file_size - origin)) {
869 errno = EINVAL;
870 return -1;
871 }
872
873 pos = origin + offset;
874
875 if (fp->tried_seek) {
876 /* Seeking has worked at least once, so now we can delay doing
877 the actual work until the next read. This avoids lots of pointless
878 http or ftp reconnections if the caller does lots of seeks
879 without any intervening reads. */
880 if (fp->delayed_seek < 0) {
881 fp->last_offset = fp->base.offset + (fp->base.end - fp->base.buffer);
882 }
883 fp->delayed_seek = pos;
884 return pos;
885 }
886
887 if (restart_from_position(fp, pos) < 0) {
888 /* This value for errno may not be entirely true, but the caller may be
889 able to carry on with the existing handle. */
890 errno = ESPIPE;
891 return -1;
892 }
893
894 fp->tried_seek = 1;
895 return pos;
896 }
897
restart_from_position(hFILE_libcurl * fp,off_t pos)898 static int restart_from_position(hFILE_libcurl *fp, off_t pos) {
899 hFILE_libcurl temp_fp;
900 CURLcode err;
901 CURLMcode errm;
902 int update_headers = 0;
903 int save_errno = 0;
904
905 // TODO If we seem to be doing random access, use CURLOPT_RANGE to do
906 // limited reads (e.g. about a BAM block!) so seeking can reuse the
907 // existing connection more often.
908
909 // Get new headers from the callback (if defined). This changes the
910 // headers in fp before it gets duplicated, but they should be have been
911 // sent by now.
912
913 if (fp->headers.callback) {
914 if (add_callback_headers(fp) != 0)
915 return -1;
916 update_headers = 1;
917 }
918 if (fp->headers.auth_hdr_num > 0 && fp->headers.auth) {
919 if (add_auth_header(fp) != 0)
920 return -1;
921 update_headers = 1;
922 }
923 if (update_headers) {
924 struct curl_slist *list = get_header_list(fp);
925 if (list) {
926 err = curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
927 if (err != CURLE_OK) {
928 errno = easy_errno(fp->easy,err);
929 return -1;
930 }
931 }
932 }
933
934 /*
935 Duplicate the easy handle, and use CURLOPT_RESUME_FROM_LARGE to open
936 a new request to the server, reading from the location that we want
937 to seek to. If the new request works and returns the correct data,
938 the original easy handle in *fp is closed and replaced with the new
939 one. If not, we close the new handle and leave *fp unchanged.
940 */
941
942 memcpy(&temp_fp, fp, sizeof(temp_fp));
943 temp_fp.buffer.len = 0;
944 temp_fp.buffer.ptr.rd = NULL;
945 temp_fp.easy = curl_easy_duphandle(fp->easy);
946 if (!temp_fp.easy)
947 goto early_error;
948
949 err = curl_easy_setopt(temp_fp.easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos);
950 err |= curl_easy_setopt(temp_fp.easy, CURLOPT_PRIVATE, &temp_fp);
951 err |= curl_easy_setopt(temp_fp.easy, CURLOPT_WRITEDATA, &temp_fp);
952 if (err != CURLE_OK) {
953 save_errno = easy_errno(temp_fp.easy, err);
954 goto error;
955 }
956
957 temp_fp.buffer.len = 0; // Ensures we only read the response headers
958 temp_fp.paused = temp_fp.finished = 0;
959
960 // fp->multi and temp_fp.multi are the same.
961 errm = curl_multi_add_handle(fp->multi, temp_fp.easy);
962 if (errm != CURLM_OK) {
963 save_errno = multi_errno(errm);
964 goto error;
965 }
966 temp_fp.nrunning = ++fp->nrunning;
967
968 err = curl_easy_pause(temp_fp.easy, CURLPAUSE_CONT);
969 if (err != CURLE_OK) {
970 save_errno = easy_errno(temp_fp.easy, err);
971 goto error_remove;
972 }
973
974 while (! temp_fp.paused && ! temp_fp.finished)
975 if (wait_perform(&temp_fp) < 0) {
976 save_errno = errno;
977 goto error_remove;
978 }
979
980 if (temp_fp.finished && temp_fp.final_result != CURLE_OK) {
981 save_errno = easy_errno(temp_fp.easy, temp_fp.final_result);
982 goto error_remove;
983 }
984
985 // We've got a good response, close the original connection and
986 // replace it with the new one.
987
988 errm = curl_multi_remove_handle(fp->multi, fp->easy);
989 if (errm != CURLM_OK) {
990 // Clean up as much as possible
991 curl_easy_reset(temp_fp.easy);
992 if (curl_multi_remove_handle(fp->multi, temp_fp.easy) == CURLM_OK) {
993 fp->nrunning--;
994 curl_easy_cleanup(temp_fp.easy);
995 }
996 save_errno = multi_errno(errm);
997 goto early_error;
998 }
999 fp->nrunning--;
1000
1001 curl_easy_cleanup(fp->easy);
1002 fp->easy = temp_fp.easy;
1003 err = curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1004 err |= curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1005 if (err != CURLE_OK) {
1006 save_errno = easy_errno(fp->easy, err);
1007 curl_easy_reset(fp->easy);
1008 errno = save_errno;
1009 return -1;
1010 }
1011 fp->buffer.len = 0;
1012 fp->paused = temp_fp.paused;
1013 fp->finished = temp_fp.finished;
1014 fp->perform_again = temp_fp.perform_again;
1015 fp->final_result = temp_fp.final_result;
1016
1017 return 0;
1018
1019 error_remove:
1020 curl_easy_reset(temp_fp.easy); // Ensure no pointers to on-stack temp_fp
1021 errm = curl_multi_remove_handle(fp->multi, temp_fp.easy);
1022 if (errm != CURLM_OK) {
1023 errno = multi_errno(errm);
1024 return -1;
1025 }
1026 fp->nrunning--;
1027 error:
1028 curl_easy_cleanup(temp_fp.easy);
1029 early_error:
1030 fp->can_seek = 0; // Don't try to seek again
1031 if (save_errno)
1032 errno = save_errno;
1033 return -1;
1034 }
1035
libcurl_close(hFILE * fpv)1036 static int libcurl_close(hFILE *fpv)
1037 {
1038 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
1039 CURLcode err;
1040 CURLMcode errm;
1041 int save_errno = 0;
1042
1043 // Before closing the file, unpause it and perform on it so that uploads
1044 // have the opportunity to signal EOF to the server -- see send_callback().
1045
1046 fp->buffer.len = 0;
1047 fp->closing = 1;
1048 fp->paused = 0;
1049 err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
1050 if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err);
1051
1052 while (save_errno == 0 && ! fp->paused && ! fp->finished)
1053 if (wait_perform(fp) < 0) save_errno = errno;
1054
1055 if (fp->finished && fp->final_result != CURLE_OK)
1056 save_errno = easy_errno(fp->easy, fp->final_result);
1057
1058 errm = curl_multi_remove_handle(fp->multi, fp->easy);
1059 if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm);
1060 fp->nrunning--;
1061
1062 curl_easy_cleanup(fp->easy);
1063 curl_multi_cleanup(fp->multi);
1064
1065 if (fp->headers.callback) // Tell callback to free any data it needs to
1066 fp->headers.callback(fp->headers.callback_data, NULL);
1067 free_headers(&fp->headers.fixed, 1);
1068 free_headers(&fp->headers.extra, 1);
1069
1070 if (save_errno) { errno = save_errno; return -1; }
1071 else return 0;
1072 }
1073
1074 static const struct hFILE_backend libcurl_backend =
1075 {
1076 libcurl_read, libcurl_write, libcurl_seek, NULL, libcurl_close
1077 };
1078
1079 static hFILE *
libcurl_open(const char * url,const char * modes,http_headers * headers)1080 libcurl_open(const char *url, const char *modes, http_headers *headers)
1081 {
1082 hFILE_libcurl *fp;
1083 struct curl_slist *list;
1084 char mode;
1085 const char *s;
1086 CURLcode err;
1087 CURLMcode errm;
1088 int save, is_recursive;
1089
1090 is_recursive = strchr(modes, 'R') != NULL;
1091
1092 if ((s = strpbrk(modes, "rwa+")) != NULL) {
1093 mode = *s;
1094 if (strpbrk(&s[1], "rwa+")) mode = 'e';
1095 }
1096 else mode = '\0';
1097
1098 if (mode != 'r' && mode != 'w') { errno = EINVAL; goto early_error; }
1099
1100 fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0);
1101 if (fp == NULL) goto early_error;
1102
1103 if (headers) {
1104 fp->headers = *headers;
1105 } else {
1106 memset(&fp->headers, 0, sizeof(fp->headers));
1107 }
1108
1109 fp->file_size = -1;
1110 fp->buffer.ptr.rd = NULL;
1111 fp->buffer.len = 0;
1112 fp->final_result = (CURLcode) -1;
1113 fp->paused = fp->closing = fp->finished = fp->perform_again = 0;
1114 fp->can_seek = 1;
1115 fp->tried_seek = 0;
1116 fp->delayed_seek = fp->last_offset = -1;
1117 fp->is_recursive = is_recursive;
1118 fp->nrunning = 0;
1119 fp->easy = NULL;
1120
1121 fp->multi = curl_multi_init();
1122 if (fp->multi == NULL) { errno = ENOMEM; goto error; }
1123
1124 fp->easy = curl_easy_init();
1125 if (fp->easy == NULL) { errno = ENOMEM; goto error; }
1126
1127 // Make a route to the hFILE_libcurl* given just a CURL* easy handle
1128 err = curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1129
1130 // Avoid many repeated CWD calls with FTP, instead requesting the filename
1131 // by full path (as done in knet, but not strictly compliant with RFC1738).
1132 err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD, CURLFTPMETHOD_NOCWD);
1133
1134 if (mode == 'r') {
1135 err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback);
1136 err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1137 fp->is_read = 1;
1138 }
1139 else {
1140 err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback);
1141 err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp);
1142 err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L);
1143 if (append_header(&fp->headers.fixed,
1144 "Transfer-Encoding: chunked", 1) < 0)
1145 goto error;
1146 fp->is_read = 0;
1147 }
1148
1149 err |= curl_easy_setopt(fp->easy, CURLOPT_SHARE, curl.share);
1150 err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url);
1151 {
1152 char* env_curl_ca_bundle = getenv("CURL_CA_BUNDLE");
1153 if (env_curl_ca_bundle) {
1154 err |= curl_easy_setopt(fp->easy, CURLOPT_CAINFO, env_curl_ca_bundle);
1155 }
1156 }
1157 err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s);
1158 if (fp->headers.callback) {
1159 if (add_callback_headers(fp) != 0) goto error;
1160 }
1161 if (get_auth_token(fp, url) < 0)
1162 goto error;
1163 if ((list = get_header_list(fp)) != NULL)
1164 err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1165 err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L);
1166 if (hts_verbose <= 8)
1167 err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L);
1168 if (hts_verbose >= 8)
1169 err |= curl_easy_setopt(fp->easy, CURLOPT_VERBOSE, 1L);
1170
1171 if (err != 0) { errno = ENOSYS; goto error; }
1172
1173 errm = curl_multi_add_handle(fp->multi, fp->easy);
1174 if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; }
1175 fp->nrunning++;
1176
1177 while (! fp->paused && ! fp->finished)
1178 if (wait_perform(fp) < 0) goto error_remove;
1179
1180 if (fp->finished && fp->final_result != CURLE_OK) {
1181 errno = easy_errno(fp->easy, fp->final_result);
1182 goto error_remove;
1183 }
1184
1185 if (mode == 'r') {
1186 double dval;
1187 if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD,
1188 &dval) == CURLE_OK && dval >= 0.0)
1189 fp->file_size = (off_t) (dval + 0.1);
1190 }
1191
1192 fp->base.backend = &libcurl_backend;
1193 return &fp->base;
1194
1195 error_remove:
1196 save = errno;
1197 (void) curl_multi_remove_handle(fp->multi, fp->easy);
1198 fp->nrunning--;
1199 errno = save;
1200
1201 error:
1202 save = errno;
1203 if (fp->easy) curl_easy_cleanup(fp->easy);
1204 if (fp->multi) curl_multi_cleanup(fp->multi);
1205 free_headers(&fp->headers.extra, 1);
1206 hfile_destroy((hFILE *) fp);
1207 errno = save;
1208 return NULL;
1209
1210 early_error:
1211 return NULL;
1212 }
1213
hopen_libcurl(const char * url,const char * modes)1214 static hFILE *hopen_libcurl(const char *url, const char *modes)
1215 {
1216 return libcurl_open(url, modes, NULL);
1217 }
1218
parse_va_list(http_headers * headers,va_list args)1219 static int parse_va_list(http_headers *headers, va_list args)
1220 {
1221 const char *argtype;
1222
1223 while ((argtype = va_arg(args, const char *)) != NULL)
1224 if (strcmp(argtype, "httphdr:v") == 0) {
1225 const char **hdr;
1226 for (hdr = va_arg(args, const char **); *hdr; hdr++) {
1227 if (append_header(&headers->fixed, *hdr, 1) < 0)
1228 return -1;
1229 if (is_authorization(*hdr))
1230 headers->auth_hdr_num = -1;
1231 }
1232 }
1233 else if (strcmp(argtype, "httphdr:l") == 0) {
1234 const char *hdr;
1235 while ((hdr = va_arg(args, const char *)) != NULL) {
1236 if (append_header(&headers->fixed, hdr, 1) < 0)
1237 return -1;
1238 if (is_authorization(hdr))
1239 headers->auth_hdr_num = -1;
1240 }
1241 }
1242 else if (strcmp(argtype, "httphdr") == 0) {
1243 const char *hdr = va_arg(args, const char *);
1244 if (hdr) {
1245 if (append_header(&headers->fixed, hdr, 1) < 0)
1246 return -1;
1247 if (is_authorization(hdr))
1248 headers->auth_hdr_num = -1;
1249 }
1250 }
1251 else if (strcmp(argtype, "httphdr_callback") == 0) {
1252 headers->callback = va_arg(args, const hts_httphdr_callback);
1253 }
1254 else if (strcmp(argtype, "httphdr_callback_data") == 0) {
1255 headers->callback_data = va_arg(args, void *);
1256 }
1257 else if (strcmp(argtype, "va_list") == 0) {
1258 va_list *args2 = va_arg(args, va_list *);
1259 if (args2) {
1260 if (parse_va_list(headers, *args2) < 0) return -1;
1261 }
1262 }
1263 else if (strcmp(argtype, "auth_token_enabled") == 0) {
1264 const char *flag = va_arg(args, const char *);
1265 if (strcmp(flag, "false") == 0)
1266 headers->auth_hdr_num = -3;
1267 }
1268 else { errno = EINVAL; return -1; }
1269
1270 return 0;
1271 }
1272
1273 /*
1274 HTTP headers to be added to the request can be passed in as extra
1275 arguments to hopen(). The headers can be specified as follows:
1276
1277 * Single header:
1278 hopen(url, mode, "httphdr", "X-Hdr-1: text", NULL);
1279
1280 * Multiple headers in the argument list:
1281 hopen(url, mode, "httphdr:l", "X-Hdr-1: text", "X-Hdr-2: text", NULL, NULL);
1282
1283 * Multiple headers in a char* array:
1284 hopen(url, mode, "httphdr:v", hdrs, NULL);
1285 where `hdrs` is a char **. The list ends with a NULL pointer.
1286
1287 * A callback function
1288 hopen(url, mode, "httphdr_callback", func,
1289 "httphdr_callback_data", arg, NULL);
1290 `func` has type
1291 int (* hts_httphdr_callback) (void *cb_data, char ***hdrs);
1292 `arg` is passed to the callback as a void *.
1293
1294 The function is called at file open, and when attempting to seek (which
1295 opens a new HTTP request). This allows, for example, access tokens
1296 that may have gone stale to be regenerated. The function is also
1297 called (with `hdrs` == NULL) on file close so that the callback can
1298 free any memory that it needs to.
1299
1300 The callback should return 0 on success, non-zero on failure. It should
1301 return in *hdrs a list of strings containing the new headers (terminated
1302 with a NULL pointer). These will replace any headers previously supplied
1303 by the callback. If no changes are necessary, it can return NULL
1304 in *hdrs, in which case the previous headers will be left unchanged.
1305
1306 Ownership of the strings in the header list passes to hfile_libcurl,
1307 so the callback should not attempt to use or free them itself. The memory
1308 containing the array belongs to the callback and will not be freed by
1309 hfile_libcurl.
1310
1311 Headers supplied by the callback are appended after any specified
1312 using the "httphdr", "httphdr:l" or "httphdr:v" methods. No attempt
1313 is made to replace these headers (even if a key is repeated) so anything
1314 that is expected to vary needs to come from the callback.
1315 */
1316
vhopen_libcurl(const char * url,const char * modes,va_list args)1317 static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args)
1318 {
1319 hFILE *fp = NULL;
1320 http_headers headers = { { NULL, 0, 0 }, { NULL, 0, 0 }, NULL, NULL };
1321 if (parse_va_list(&headers, args) == 0) {
1322 fp = libcurl_open(url, modes, &headers);
1323 }
1324
1325 if (!fp) {
1326 free_headers(&headers.fixed, 1);
1327 }
1328 return fp;
1329 }
1330
PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)1331 int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
1332 {
1333 static const struct hFILE_scheme_handler handler =
1334 { hopen_libcurl, hfile_always_remote, "libcurl",
1335 2000 + 50,
1336 vhopen_libcurl };
1337
1338 #ifdef ENABLE_PLUGINS
1339 // Embed version string for examination via strings(1) or what(1)
1340 static const char id[] = "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION;
1341 const char *version = strchr(id, '\t')+1;
1342 #else
1343 const char *version = hts_version();
1344 #endif
1345 const curl_version_info_data *info;
1346 const char * const *protocol;
1347 const char *auth;
1348 CURLcode err;
1349 CURLSHcode errsh;
1350
1351 err = curl_global_init(CURL_GLOBAL_ALL);
1352 if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; }
1353
1354 curl.share = curl_share_init();
1355 if (curl.share == NULL) { curl_global_cleanup(); errno = EIO; return -1; }
1356 errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
1357 errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
1358 errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
1359 if (errsh != 0) {
1360 curl_share_cleanup(curl.share);
1361 curl_global_cleanup();
1362 errno = EIO;
1363 return -1;
1364 }
1365
1366 if ((auth = getenv("HTS_AUTH_LOCATION")) != NULL) {
1367 curl.auth_path = strdup(auth);
1368 curl.auth_map = kh_init(auth_map);
1369 if (!curl.auth_path || !curl.auth_map) {
1370 int save_errno = errno;
1371 free(curl.auth_path);
1372 kh_destroy(auth_map, curl.auth_map);
1373 curl_share_cleanup(curl.share);
1374 curl_global_cleanup();
1375 errno = save_errno;
1376 return -1;
1377 }
1378 }
1379 if ((auth = getenv("HTS_ALLOW_UNENCRYPTED_AUTHORIZATION_HEADER")) != NULL
1380 && strcmp(auth, "I understand the risks") == 0) {
1381 curl.allow_unencrypted_auth_header = 1;
1382 }
1383
1384 info = curl_version_info(CURLVERSION_NOW);
1385 ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
1386
1387 self->name = "libcurl";
1388 self->destroy = libcurl_exit;
1389
1390 for (protocol = info->protocols; *protocol; protocol++)
1391 hfile_add_scheme_handler(*protocol, &handler);
1392 return 0;
1393 }
1394