1 /* hfile_libcurl.c -- libcurl backend for low-level file streams.
2
3 Copyright (C) 2015-2017, 2019-2020 Genome Research Ltd.
4
5 Author: John Marshall <jm18@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24
25 #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26 #include <config.h>
27
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <strings.h>
32 #include <errno.h>
33 #include <pthread.h>
34 #ifndef _WIN32
35 # include <sys/select.h>
36 #endif
37 #include <assert.h>
38
39 #include "hfile_internal.h"
40 #ifdef ENABLE_PLUGINS
41 #include "version.h"
42 #endif
43 #include "htslib/hts.h" // for hts_version() and hts_verbose
44 #include "htslib/kstring.h"
45 #include "htslib/khash.h"
46
47 #include <curl/curl.h>
48
49 // Number of seconds to take off auth_token expiry, to allow for clock skew
50 // and slow servers
51 #define AUTH_REFRESH_EARLY_SECS 60
52
53 // Minimum number of bytes to skip when seeking forward. Seeks less than
54 // this will just read the data and throw it away. The optimal value
55 // depends on how long it takes to make a new connection compared
56 // to how fast the data arrives.
57 #define MIN_SEEK_FORWARD 1000000
58
59 typedef struct {
60 char *path;
61 char *token;
62 time_t expiry;
63 int failed;
64 pthread_mutex_t lock;
65 } auth_token;
66
67 // For the authorization header cache
68 KHASH_MAP_INIT_STR(auth_map, auth_token *)
69
70 // Curl-compatible header linked list
71 typedef struct {
72 struct curl_slist *list;
73 unsigned int num;
74 unsigned int size;
75 } hdrlist;
76
77 typedef struct {
78 hdrlist fixed; // List of headers supplied at hopen()
79 hdrlist extra; // List of headers from callback
80 hts_httphdr_callback callback; // Callback to get more headers
81 void *callback_data; // Data to pass to httphdr callback
82 auth_token *auth; // Authentication token
83 int auth_hdr_num; // Location of auth_token in hdrlist extra
84 // If -1, Authorization header is in fixed
85 // -2, it came from the callback
86 // -3, "auth_token_enabled", "false"
87 // passed to hopen()
88 redirect_callback redirect; // Callback to handle 3xx redirects
89 void *redirect_data; // Data to pass to redirect_callback
90 long *http_response_ptr; // Location to store http response code.
91 int fail_on_error; // Open fails on >400 response code
92 // (default true)
93 } http_headers;
94
95 typedef struct {
96 hFILE base;
97 CURL *easy;
98 CURLM *multi;
99 off_t file_size;
100 struct {
101 union { char *rd; const char *wr; } ptr;
102 size_t len;
103 } buffer;
104 CURLcode final_result; // easy result code for finished transfers
105 // Flags for communicating with libcurl callbacks:
106 unsigned paused : 1; // callback tells us that it has paused transfer
107 unsigned closing : 1; // informs callback that hclose() has been invoked
108 unsigned finished : 1; // wait_perform() tells us transfer is complete
109 unsigned perform_again : 1;
110 unsigned is_read : 1; // Opened in read mode
111 unsigned can_seek : 1; // Can (attempt to) seek on this handle
112 unsigned is_recursive:1; // Opened by hfile_libcurl itself
113 unsigned tried_seek : 1; // At least one seek has been attempted
114 int nrunning;
115 http_headers headers;
116
117 off_t delayed_seek; // Location to seek to before reading
118 off_t last_offset; // Location we're seeking from
119 char *preserved; // Preserved buffer content on seek
120 size_t preserved_bytes; // Number of preserved bytes
121 size_t preserved_size; // Size of preserved buffer
122 } hFILE_libcurl;
123
124 static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence);
125 static int restart_from_position(hFILE_libcurl *fp, off_t pos);
126
http_status_errno(int status)127 static int http_status_errno(int status)
128 {
129 if (status >= 500)
130 switch (status) {
131 case 501: return ENOSYS;
132 case 503: return EBUSY;
133 case 504: return ETIMEDOUT;
134 default: return EIO;
135 }
136 else if (status >= 400)
137 switch (status) {
138 case 401: return EPERM;
139 case 403: return EACCES;
140 case 404: return ENOENT;
141 case 405: return EROFS;
142 case 407: return EPERM;
143 case 408: return ETIMEDOUT;
144 case 410: return ENOENT;
145 default: return EINVAL;
146 }
147 else return 0;
148 }
149
easy_errno(CURL * easy,CURLcode err)150 static int easy_errno(CURL *easy, CURLcode err)
151 {
152 long lval;
153
154 switch (err) {
155 case CURLE_OK:
156 return 0;
157
158 case CURLE_UNSUPPORTED_PROTOCOL:
159 case CURLE_URL_MALFORMAT:
160 return EINVAL;
161
162 #if LIBCURL_VERSION_NUM >= 0x071505
163 case CURLE_NOT_BUILT_IN:
164 return ENOSYS;
165 #endif
166
167 case CURLE_COULDNT_RESOLVE_PROXY:
168 case CURLE_COULDNT_RESOLVE_HOST:
169 case CURLE_FTP_CANT_GET_HOST:
170 return EDESTADDRREQ; // Lookup failure
171
172 case CURLE_COULDNT_CONNECT:
173 case CURLE_SEND_ERROR:
174 case CURLE_RECV_ERROR:
175 if (curl_easy_getinfo(easy, CURLINFO_OS_ERRNO, &lval) == CURLE_OK)
176 return lval;
177 else
178 return ECONNABORTED;
179
180 case CURLE_REMOTE_ACCESS_DENIED:
181 case CURLE_LOGIN_DENIED:
182 case CURLE_TFTP_PERM:
183 return EACCES;
184
185 case CURLE_PARTIAL_FILE:
186 return EPIPE;
187
188 case CURLE_HTTP_RETURNED_ERROR:
189 if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &lval) == CURLE_OK)
190 return http_status_errno(lval);
191 else
192 return EIO;
193
194 case CURLE_OUT_OF_MEMORY:
195 return ENOMEM;
196
197 case CURLE_OPERATION_TIMEDOUT:
198 return ETIMEDOUT;
199
200 case CURLE_RANGE_ERROR:
201 return ESPIPE;
202
203 case CURLE_SSL_CONNECT_ERROR:
204 // TODO return SSL error buffer messages
205 return ECONNABORTED;
206
207 case CURLE_FILE_COULDNT_READ_FILE:
208 case CURLE_TFTP_NOTFOUND:
209 return ENOENT;
210
211 case CURLE_TOO_MANY_REDIRECTS:
212 return ELOOP;
213
214 case CURLE_FILESIZE_EXCEEDED:
215 return EFBIG;
216
217 case CURLE_REMOTE_DISK_FULL:
218 return ENOSPC;
219
220 case CURLE_REMOTE_FILE_EXISTS:
221 return EEXIST;
222
223 default:
224 hts_log_error("Libcurl reported error %d (%s)", (int) err,
225 curl_easy_strerror(err));
226 return EIO;
227 }
228 }
229
multi_errno(CURLMcode errm)230 static int multi_errno(CURLMcode errm)
231 {
232 switch (errm) {
233 case CURLM_CALL_MULTI_PERFORM:
234 case CURLM_OK:
235 return 0;
236
237 case CURLM_BAD_HANDLE:
238 case CURLM_BAD_EASY_HANDLE:
239 case CURLM_BAD_SOCKET:
240 return EBADF;
241
242 case CURLM_OUT_OF_MEMORY:
243 return ENOMEM;
244
245 default:
246 hts_log_error("Libcurl reported error %d (%s)", (int) errm,
247 curl_multi_strerror(errm));
248 return EIO;
249 }
250 }
251
252 static struct {
253 kstring_t useragent;
254 CURLSH *share;
255 char *auth_path;
256 khash_t(auth_map) *auth_map;
257 int allow_unencrypted_auth_header;
258 pthread_mutex_t auth_lock;
259 pthread_mutex_t share_lock;
260 } curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, PTHREAD_MUTEX_INITIALIZER,
261 PTHREAD_MUTEX_INITIALIZER };
262
share_lock(CURL * handle,curl_lock_data data,curl_lock_access access,void * userptr)263 static void share_lock(CURL *handle, curl_lock_data data,
264 curl_lock_access access, void *userptr) {
265 pthread_mutex_lock(&curl.share_lock);
266 }
267
share_unlock(CURL * handle,curl_lock_data data,void * userptr)268 static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
269 pthread_mutex_unlock(&curl.share_lock);
270 }
271
free_auth(auth_token * tok)272 static void free_auth(auth_token *tok) {
273 if (!tok) return;
274 if (pthread_mutex_destroy(&tok->lock)) abort();
275 free(tok->path);
276 free(tok->token);
277 free(tok);
278 }
279
libcurl_exit()280 static void libcurl_exit()
281 {
282 if (curl_share_cleanup(curl.share) == CURLSHE_OK)
283 curl.share = NULL;
284
285 free(curl.useragent.s);
286 curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
287
288 free(curl.auth_path);
289 curl.auth_path = NULL;
290
291 if (curl.auth_map) {
292 khiter_t i;
293 for (i = kh_begin(curl.auth_map); i != kh_end(curl.auth_map); ++i) {
294 if (kh_exist(curl.auth_map, i)) {
295 free_auth(kh_value(curl.auth_map, i));
296 kh_key(curl.auth_map, i) = NULL;
297 kh_value(curl.auth_map, i) = NULL;
298 }
299 }
300 kh_destroy(auth_map, curl.auth_map);
301 curl.auth_map = NULL;
302 }
303
304 curl_global_cleanup();
305 }
306
append_header(hdrlist * hdrs,const char * data,int dup)307 static int append_header(hdrlist *hdrs, const char *data, int dup) {
308 if (hdrs->num == hdrs->size) {
309 unsigned int new_sz = hdrs->size ? hdrs->size * 2 : 4, i;
310 struct curl_slist *new_list = realloc(hdrs->list,
311 new_sz * sizeof(*new_list));
312 if (!new_list) return -1;
313 hdrs->size = new_sz;
314 hdrs->list = new_list;
315 for (i = 1; i < hdrs->num; i++) hdrs->list[i-1].next = &hdrs->list[i];
316 }
317 // Annoyingly, libcurl doesn't declare the char * as const...
318 hdrs->list[hdrs->num].data = dup ? strdup(data) : (char *) data;
319 if (!hdrs->list[hdrs->num].data) return -1;
320 if (hdrs->num > 0) hdrs->list[hdrs->num - 1].next = &hdrs->list[hdrs->num];
321 hdrs->list[hdrs->num].next = NULL;
322 hdrs->num++;
323 return 0;
324 }
325
free_headers(hdrlist * hdrs,int completely)326 static void free_headers(hdrlist *hdrs, int completely) {
327 unsigned int i;
328 for (i = 0; i < hdrs->num; i++) {
329 free(hdrs->list[i].data);
330 hdrs->list[i].data = NULL;
331 hdrs->list[i].next = NULL;
332 }
333 hdrs->num = 0;
334 if (completely) {
335 free(hdrs->list);
336 hdrs->size = 0;
337 hdrs->list = NULL;
338 }
339 }
340
get_header_list(hFILE_libcurl * fp)341 static struct curl_slist * get_header_list(hFILE_libcurl *fp) {
342 if (fp->headers.fixed.num > 0)
343 return &fp->headers.fixed.list[0];
344 if (fp->headers.extra.num > 0)
345 return &fp->headers.extra.list[0];
346 return 0;
347 }
348
is_authorization(const char * hdr)349 static inline int is_authorization(const char *hdr) {
350 return (strncasecmp("authorization:", hdr, 14) == 0);
351 }
352
add_callback_headers(hFILE_libcurl * fp)353 static int add_callback_headers(hFILE_libcurl *fp) {
354 char **hdrs = NULL, **hdr;
355
356 if (!fp->headers.callback)
357 return 0;
358
359 // Get the headers from the callback
360 if (fp->headers.callback(fp->headers.callback_data, &hdrs) != 0) {
361 return -1;
362 }
363
364 if (!hdrs) // No change
365 return 0;
366
367 // Remove any old callback headers
368 if (fp->headers.fixed.num > 0) {
369 // Unlink lists
370 fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
371 }
372 free_headers(&fp->headers.extra, 0);
373
374 if (fp->headers.auth_hdr_num > 0 || fp->headers.auth_hdr_num == -2)
375 fp->headers.auth_hdr_num = 0; // Just removed it...
376
377 // Convert to libcurl-suitable form
378 for (hdr = hdrs; *hdr; hdr++) {
379 if (append_header(&fp->headers.extra, *hdr, 0) < 0) {
380 goto cleanup;
381 }
382 if (is_authorization(*hdr) && !fp->headers.auth_hdr_num)
383 fp->headers.auth_hdr_num = -2;
384 }
385 for (hdr = hdrs; *hdr; hdr++) *hdr = NULL;
386
387 if (fp->headers.fixed.num > 0 && fp->headers.extra.num > 0) {
388 // Relink lists
389 fp->headers.fixed.list[fp->headers.fixed.num - 1].next
390 = &fp->headers.extra.list[0];
391 }
392 return 0;
393
394 cleanup:
395 while (hdr && *hdr) {
396 free(*hdr);
397 *hdr = NULL;
398 }
399 return -1;
400 }
401
402 /*
403 * Read an OAUTH2-style Bearer access token (see
404 * https://tools.ietf.org/html/rfc6750#section-4).
405 * Returns 'v' for valid; 'i' for invalid (token missing or wrong sort);
406 * '?' for a JSON parse error; 'm' if it runs out of memory.
407 */
read_auth_json(auth_token * tok,hFILE * auth_fp)408 static int read_auth_json(auth_token *tok, hFILE *auth_fp) {
409 hts_json_token *t = hts_json_alloc_token();
410 kstring_t str = {0, 0, NULL};
411 char *token = NULL, *type = NULL, *expiry = NULL;
412 int ret = 'i';
413
414 if (!t) goto error;
415
416 if ((ret = hts_json_fnext(auth_fp, t, &str)) != '{') goto error;
417 while (hts_json_fnext(auth_fp, t, &str) != '}') {
418 char *key;
419 if (hts_json_token_type(t) != 's') {
420 ret = '?';
421 goto error;
422 }
423 key = hts_json_token_str(t);
424 if (!key) goto error;
425 if (strcmp(key, "access_token") == 0) {
426 if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
427 token = ks_release(&str);
428 } else if (strcmp(key, "token_type") == 0) {
429 if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
430 type = ks_release(&str);
431 } else if (strcmp(key, "expires_in") == 0) {
432 if ((ret = hts_json_fnext(auth_fp, t, &str)) != 'n') goto error;
433 expiry = ks_release(&str);
434 } else if (hts_json_fskip_value(auth_fp, '\0') != 'v') {
435 ret = '?';
436 goto error;
437 }
438 }
439
440 if (!token || (type && strcmp(type, "Bearer") != 0)) {
441 ret = 'i';
442 goto error;
443 }
444
445 ret = 'm';
446 str.l = 0;
447 if (kputs("Authorization: Bearer ", &str) < 0) goto error;
448 if (kputs(token, &str) < 0) goto error;
449 free(tok->token);
450 tok->token = ks_release(&str);
451 if (expiry) {
452 long exp = strtol(expiry, NULL, 10);
453 if (exp < 0) exp = 0;
454 tok->expiry = time(NULL) + exp;
455 } else {
456 tok->expiry = 0;
457 }
458 ret = 'v';
459
460 error:
461 free(token);
462 free(type);
463 free(expiry);
464 free(str.s);
465 hts_json_free_token(t);
466 return ret;
467 }
468
read_auth_plain(auth_token * tok,hFILE * auth_fp)469 static int read_auth_plain(auth_token *tok, hFILE *auth_fp) {
470 kstring_t line = {0, 0, NULL};
471 kstring_t token = {0, 0, NULL};
472 const char *start, *end;
473
474 if (kgetline(&line, (char * (*)(char *, int, void *)) hgets, auth_fp) < 0) goto error;
475 if (kputc('\0', &line) < 0) goto error;
476
477 for (start = line.s; *start && isspace_c(*start); start++) {}
478 for (end = start; *end && !isspace_c(*end); end++) {}
479
480 if (end > start) {
481 if (kputs("Authorization: Bearer ", &token) < 0) goto error;
482 if (kputsn(start, end - start, &token) < 0) goto error;
483 }
484
485 free(tok->token);
486 tok->token = ks_release(&token);
487 tok->expiry = 0;
488 free(line.s);
489 return 0;
490
491 error:
492 free(line.s);
493 free(token.s);
494 return -1;
495 }
496
renew_auth_token(auth_token * tok,int * changed)497 static int renew_auth_token(auth_token *tok, int *changed) {
498 hFILE *auth_fp = NULL;
499 char buffer[16];
500 ssize_t len;
501
502 *changed = 0;
503 if (tok->expiry == 0 || time(NULL) + AUTH_REFRESH_EARLY_SECS < tok->expiry)
504 return 0; // Still valid
505
506 if (tok->failed)
507 return -1;
508
509 *changed = 1;
510 auth_fp = hopen(tok->path, "rR");
511 if (!auth_fp) {
512 // Not worried about missing files; other errors are bad.
513 if (errno != ENOENT)
514 goto fail;
515
516 tok->expiry = 0; // Prevent retry
517 free(tok->token); // Just in case it was set
518 return 0;
519 }
520
521 len = hpeek(auth_fp, buffer, sizeof(buffer));
522 if (len < 0)
523 goto fail;
524
525 if (memchr(buffer, '{', len) != NULL) {
526 if (read_auth_json(tok, auth_fp) != 'v')
527 goto fail;
528 } else {
529 if (read_auth_plain(tok, auth_fp) < 0)
530 goto fail;
531 }
532
533 return hclose(auth_fp) < 0 ? -1 : 0;
534
535 fail:
536 tok->failed = 1;
537 if (auth_fp) hclose_abruptly(auth_fp);
538 return -1;
539 }
540
add_auth_header(hFILE_libcurl * fp)541 static int add_auth_header(hFILE_libcurl *fp) {
542 int changed = 0;
543
544 if (fp->headers.auth_hdr_num < 0)
545 return 0; // Have an Authorization header from open or header callback
546
547 if (!fp->headers.auth)
548 return 0; // Nothing to add
549
550 pthread_mutex_lock(&fp->headers.auth->lock);
551 if (renew_auth_token(fp->headers.auth, &changed) < 0)
552 goto unlock_fail;
553
554 if (!changed && fp->headers.auth_hdr_num > 0) {
555 pthread_mutex_unlock(&fp->headers.auth->lock);
556 return 0;
557 }
558
559 if (fp->headers.auth_hdr_num > 0) {
560 // Had a previous header, so swap in the new one
561 char *header = fp->headers.auth->token;
562 char *header_copy = header ? strdup(header) : NULL;
563 int idx = fp->headers.auth_hdr_num - 1;
564 if (header && !header_copy)
565 goto unlock_fail;
566
567 if (header_copy) {
568 free(fp->headers.extra.list[idx].data);
569 fp->headers.extra.list[idx].data = header_copy;
570 } else {
571 unsigned int j;
572 // More complicated case - need to get rid of the old header
573 // and tidy up linked lists
574 free(fp->headers.extra.list[idx].data);
575 for (j = idx + 1; j < fp->headers.extra.num; j++) {
576 fp->headers.extra.list[j - 1] = fp->headers.extra.list[j];
577 fp->headers.extra.list[j - 1].next = &fp->headers.extra.list[j];
578 }
579 fp->headers.extra.num--;
580 if (fp->headers.extra.num > 0) {
581 fp->headers.extra.list[fp->headers.extra.num-1].next = NULL;
582 } else if (fp->headers.fixed.num > 0) {
583 fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
584 }
585 fp->headers.auth_hdr_num = 0;
586 }
587 } else if (fp->headers.auth->token) {
588 // Add new header and remember where it is
589 if (append_header(&fp->headers.extra,
590 fp->headers.auth->token, 1) < 0) {
591 goto unlock_fail;
592 }
593 fp->headers.auth_hdr_num = fp->headers.extra.num;
594 }
595
596 pthread_mutex_unlock(&fp->headers.auth->lock);
597 return 0;
598
599 unlock_fail:
600 pthread_mutex_unlock(&fp->headers.auth->lock);
601 return -1;
602 }
603
get_auth_token(hFILE_libcurl * fp,const char * url)604 static int get_auth_token(hFILE_libcurl *fp, const char *url) {
605 const char *host = NULL, *p, *q;
606 kstring_t name = {0, 0, NULL};
607 size_t host_len = 0;
608 khiter_t idx;
609 auth_token *tok = NULL;
610
611 // Nothing to do if:
612 // curl.auth_path has not been set
613 // fp was made by hfile_libcurl (e.g. auth_path is a http:// url)
614 // we already have an Authorization header
615 if (!curl.auth_path || fp->is_recursive || fp->headers.auth_hdr_num != 0)
616 return 0;
617
618 // Insist on having a secure connection unless the user insists harder
619 if (!curl.allow_unencrypted_auth_header && strncmp(url, "https://", 8) != 0)
620 return 0;
621
622 host = strstr(url, "://");
623 if (host) {
624 host += 3;
625 host_len = strcspn(host, "/");
626 }
627
628 p = curl.auth_path;
629 while ((q = strstr(p, "%h")) != NULL) {
630 if (q - p > INT_MAX || host_len > INT_MAX) goto error;
631 if (kputsn_(p, q - p, &name) < 0) goto error;
632 if (kputsn_(host, host_len, &name) < 0) goto error;
633 p = q + 2;
634 }
635 if (kputs(p, &name) < 0) goto error;
636
637 pthread_mutex_lock(&curl.auth_lock);
638 idx = kh_get(auth_map, curl.auth_map, name.s);
639 if (idx < kh_end(curl.auth_map)) {
640 tok = kh_value(curl.auth_map, idx);
641 } else {
642 tok = calloc(1, sizeof(*tok));
643 if (tok && pthread_mutex_init(&tok->lock, NULL) != 0) {
644 free(tok);
645 tok = NULL;
646 }
647 if (tok) {
648 int ret = -1;
649 tok->path = ks_release(&name);
650 tok->token = NULL;
651 tok->expiry = 1; // Force refresh
652 idx = kh_put(auth_map, curl.auth_map, tok->path, &ret);
653 if (ret < 0) {
654 free_auth(tok);
655 tok = NULL;
656 }
657 kh_value(curl.auth_map, idx) = tok;
658 }
659 }
660 pthread_mutex_unlock(&curl.auth_lock);
661
662 fp->headers.auth = tok;
663 free(name.s);
664
665 return add_auth_header(fp);
666
667 error:
668 free(name.s);
669 return -1;
670 }
671
process_messages(hFILE_libcurl * fp)672 static void process_messages(hFILE_libcurl *fp)
673 {
674 CURLMsg *msg;
675 int remaining;
676
677 while ((msg = curl_multi_info_read(fp->multi, &remaining)) != NULL) {
678 switch (msg->msg) {
679 case CURLMSG_DONE:
680 fp->finished = 1;
681 fp->final_result = msg->data.result;
682 break;
683
684 default:
685 break;
686 }
687 }
688 }
689
wait_perform(hFILE_libcurl * fp)690 static int wait_perform(hFILE_libcurl *fp)
691 {
692 fd_set rd, wr, ex;
693 int maxfd, nrunning;
694 long timeout;
695 CURLMcode errm;
696
697 if (!fp->perform_again) {
698 FD_ZERO(&rd);
699 FD_ZERO(&wr);
700 FD_ZERO(&ex);
701 if (curl_multi_fdset(fp->multi, &rd, &wr, &ex, &maxfd) != CURLM_OK)
702 maxfd = -1, timeout = 1000;
703 else {
704 if (curl_multi_timeout(fp->multi, &timeout) != CURLM_OK)
705 timeout = 1000;
706 else if (timeout < 0) {
707 timeout = 10000; // as recommended by curl_multi_timeout(3)
708 }
709 }
710 if (maxfd < 0) {
711 if (timeout > 100)
712 timeout = 100; // as recommended by curl_multi_fdset(3)
713 #ifdef _WIN32
714 /* Windows ignores the first argument of select, so calling select
715 * with maxfd=-1 does not give the expected result of sleeping for
716 * timeout milliseconds in the conditional block below.
717 * So sleep here and skip the next block.
718 */
719 Sleep(timeout);
720 timeout = 0;
721 #endif
722 }
723
724 if (timeout > 0) {
725 struct timeval tval;
726 tval.tv_sec = (timeout / 1000);
727 tval.tv_usec = (timeout % 1000) * 1000;
728
729 if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1;
730 }
731 }
732
733 errm = curl_multi_perform(fp->multi, &nrunning);
734 fp->perform_again = 0;
735 if (errm == CURLM_CALL_MULTI_PERFORM) fp->perform_again = 1;
736 else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
737
738 if (nrunning < fp->nrunning) process_messages(fp);
739 return 0;
740 }
741
742
recv_callback(char * ptr,size_t size,size_t nmemb,void * fpv)743 static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
744 {
745 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
746 size_t n = size * nmemb;
747
748 if (n > fp->buffer.len) {
749 fp->paused = 1;
750 return CURL_WRITEFUNC_PAUSE;
751 }
752 else if (n == 0) return 0;
753
754 memcpy(fp->buffer.ptr.rd, ptr, n);
755 fp->buffer.ptr.rd += n;
756 fp->buffer.len -= n;
757 return n;
758 }
759
760
header_callback(void * contents,size_t size,size_t nmemb,void * userp)761 static size_t header_callback(void *contents, size_t size, size_t nmemb,
762 void *userp)
763 {
764 size_t realsize = size * nmemb;
765 kstring_t *resp = (kstring_t *)userp;
766
767 if (kputsn((const char *)contents, realsize, resp) == EOF) {
768 return 0;
769 }
770
771 return realsize;
772 }
773
774
libcurl_read(hFILE * fpv,void * bufferv,size_t nbytes)775 static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes)
776 {
777 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
778 char *buffer = (char *) bufferv;
779 off_t to_skip = -1;
780 ssize_t got = 0;
781 CURLcode err;
782
783 if (fp->delayed_seek >= 0) {
784 assert(fp->base.offset == fp->delayed_seek);
785
786 if (fp->preserved
787 && fp->last_offset > fp->delayed_seek
788 && fp->last_offset - fp->preserved_bytes <= fp->delayed_seek) {
789 // Can use buffer contents copied when seeking started, to
790 // avoid having to re-read data discarded by hseek().
791 // Note fp->last_offset is the offset of the *end* of the
792 // preserved buffer.
793 size_t n = fp->last_offset - fp->delayed_seek;
794 char *start = fp->preserved + (fp->preserved_bytes - n);
795 size_t bytes = n <= nbytes ? n : nbytes;
796 memcpy(buffer, start, bytes);
797 if (bytes < n) { // Part of the preserved buffer still left
798 fp->delayed_seek += bytes;
799 } else {
800 fp->last_offset = fp->delayed_seek = -1;
801 }
802 return bytes;
803 }
804
805 if (fp->last_offset >= 0
806 && fp->delayed_seek > fp->last_offset
807 && fp->delayed_seek - fp->last_offset < MIN_SEEK_FORWARD) {
808 // If not seeking far, just read the data and throw it away. This
809 // is likely to be quicker than opening a new stream
810 to_skip = fp->delayed_seek - fp->last_offset;
811 } else {
812 if (restart_from_position(fp, fp->delayed_seek) < 0) {
813 return -1;
814 }
815 }
816 fp->delayed_seek = -1;
817 fp->last_offset = -1;
818 fp->preserved_bytes = 0;
819 }
820
821 do {
822 fp->buffer.ptr.rd = buffer;
823 fp->buffer.len = nbytes;
824 fp->paused = 0;
825 if (!fp->finished) {
826 err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
827 if (err != CURLE_OK) {
828 errno = easy_errno(fp->easy, err);
829 return -1;
830 }
831 }
832
833 while (! fp->paused && ! fp->finished) {
834 if (wait_perform(fp) < 0) return -1;
835 }
836
837 got = fp->buffer.ptr.rd - buffer;
838
839 if (to_skip >= 0) { // Skipping over a small seek
840 if (got < to_skip) { // Need to skip more data
841 to_skip -= got;
842 } else {
843 got -= to_skip;
844 if (got > 0) { // If enough was skipped, return the rest
845 memmove(buffer, buffer + to_skip, got);
846 to_skip = -1;
847 }
848 }
849 }
850 } while (to_skip >= 0 && ! fp->finished);
851 fp->buffer.ptr.rd = NULL;
852 fp->buffer.len = 0;
853
854 if (fp->finished && fp->final_result != CURLE_OK) {
855 errno = easy_errno(fp->easy, fp->final_result);
856 return -1;
857 }
858
859 return got;
860 }
861
send_callback(char * ptr,size_t size,size_t nmemb,void * fpv)862 static size_t send_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
863 {
864 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
865 size_t n = size * nmemb;
866
867 if (fp->buffer.len == 0) {
868 // Send buffer is empty; normally pause, or signal EOF if we're closing
869 if (fp->closing) return 0;
870 else { fp->paused = 1; return CURL_READFUNC_PAUSE; }
871 }
872
873 if (n > fp->buffer.len) n = fp->buffer.len;
874 memcpy(ptr, fp->buffer.ptr.wr, n);
875 fp->buffer.ptr.wr += n;
876 fp->buffer.len -= n;
877 return n;
878 }
879
libcurl_write(hFILE * fpv,const void * bufferv,size_t nbytes)880 static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes)
881 {
882 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
883 const char *buffer = (const char *) bufferv;
884 CURLcode err;
885
886 fp->buffer.ptr.wr = buffer;
887 fp->buffer.len = nbytes;
888 fp->paused = 0;
889 err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
890 if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
891
892 while (! fp->paused && ! fp->finished)
893 if (wait_perform(fp) < 0) return -1;
894
895 nbytes = fp->buffer.ptr.wr - buffer;
896 fp->buffer.ptr.wr = NULL;
897 fp->buffer.len = 0;
898
899 if (fp->finished && fp->final_result != CURLE_OK) {
900 errno = easy_errno(fp->easy, fp->final_result);
901 return -1;
902 }
903
904 return nbytes;
905 }
906
preserve_buffer_content(hFILE_libcurl * fp)907 static void preserve_buffer_content(hFILE_libcurl *fp)
908 {
909 if (fp->base.begin == fp->base.end) {
910 fp->preserved_bytes = 0;
911 return;
912 }
913 if (!fp->preserved
914 || fp->preserved_size < fp->base.limit - fp->base.buffer) {
915 fp->preserved = malloc(fp->base.limit - fp->base.buffer);
916 if (!fp->preserved) return;
917 fp->preserved_size = fp->base.limit - fp->base.buffer;
918 }
919
920 assert(fp->base.end - fp->base.begin <= fp->preserved_size);
921
922 memcpy(fp->preserved, fp->base.begin, fp->base.end - fp->base.begin);
923 fp->preserved_bytes = fp->base.end - fp->base.begin;
924 return;
925 }
926
libcurl_seek(hFILE * fpv,off_t offset,int whence)927 static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence)
928 {
929 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
930 off_t origin, pos;
931
932 if (!fp->is_read || !fp->can_seek) {
933 // Cowardly refuse to seek when writing or a previous seek failed.
934 errno = ESPIPE;
935 return -1;
936 }
937
938 switch (whence) {
939 case SEEK_SET:
940 origin = 0;
941 break;
942 case SEEK_CUR:
943 errno = ENOSYS;
944 return -1;
945 case SEEK_END:
946 if (fp->file_size < 0) { errno = ESPIPE; return -1; }
947 origin = fp->file_size;
948 break;
949 default:
950 errno = EINVAL;
951 return -1;
952 }
953
954 // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
955 if ((offset < 0)? origin + offset < 0
956 : (fp->file_size >= 0 && offset > fp->file_size - origin)) {
957 errno = EINVAL;
958 return -1;
959 }
960
961 pos = origin + offset;
962
963 if (fp->tried_seek) {
964 /* Seeking has worked at least once, so now we can delay doing
965 the actual work until the next read. This avoids lots of pointless
966 http or ftp reconnections if the caller does lots of seeks
967 without any intervening reads. */
968 if (fp->delayed_seek < 0) {
969 fp->last_offset = fp->base.offset + (fp->base.end - fp->base.buffer);
970 // Stash the current hFILE buffer content in case it's useful later
971 preserve_buffer_content(fp);
972 }
973 fp->delayed_seek = pos;
974 return pos;
975 }
976
977 if (restart_from_position(fp, pos) < 0) {
978 /* This value for errno may not be entirely true, but the caller may be
979 able to carry on with the existing handle. */
980 errno = ESPIPE;
981 return -1;
982 }
983
984 fp->tried_seek = 1;
985 return pos;
986 }
987
restart_from_position(hFILE_libcurl * fp,off_t pos)988 static int restart_from_position(hFILE_libcurl *fp, off_t pos) {
989 hFILE_libcurl temp_fp;
990 CURLcode err;
991 CURLMcode errm;
992 int update_headers = 0;
993 int save_errno = 0;
994
995 // TODO If we seem to be doing random access, use CURLOPT_RANGE to do
996 // limited reads (e.g. about a BAM block!) so seeking can reuse the
997 // existing connection more often.
998
999 // Get new headers from the callback (if defined). This changes the
1000 // headers in fp before it gets duplicated, but they should be have been
1001 // sent by now.
1002
1003 if (fp->headers.callback) {
1004 if (add_callback_headers(fp) != 0)
1005 return -1;
1006 update_headers = 1;
1007 }
1008 if (fp->headers.auth_hdr_num > 0 && fp->headers.auth) {
1009 if (add_auth_header(fp) != 0)
1010 return -1;
1011 update_headers = 1;
1012 }
1013 if (update_headers) {
1014 struct curl_slist *list = get_header_list(fp);
1015 if (list) {
1016 err = curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1017 if (err != CURLE_OK) {
1018 errno = easy_errno(fp->easy,err);
1019 return -1;
1020 }
1021 }
1022 }
1023
1024 /*
1025 Duplicate the easy handle, and use CURLOPT_RESUME_FROM_LARGE to open
1026 a new request to the server, reading from the location that we want
1027 to seek to. If the new request works and returns the correct data,
1028 the original easy handle in *fp is closed and replaced with the new
1029 one. If not, we close the new handle and leave *fp unchanged.
1030 */
1031
1032 memcpy(&temp_fp, fp, sizeof(temp_fp));
1033 temp_fp.buffer.len = 0;
1034 temp_fp.buffer.ptr.rd = NULL;
1035 temp_fp.easy = curl_easy_duphandle(fp->easy);
1036 if (!temp_fp.easy)
1037 goto early_error;
1038
1039 err = curl_easy_setopt(temp_fp.easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos);
1040 err |= curl_easy_setopt(temp_fp.easy, CURLOPT_PRIVATE, &temp_fp);
1041 err |= curl_easy_setopt(temp_fp.easy, CURLOPT_WRITEDATA, &temp_fp);
1042 if (err != CURLE_OK) {
1043 save_errno = easy_errno(temp_fp.easy, err);
1044 goto error;
1045 }
1046
1047 temp_fp.buffer.len = 0; // Ensures we only read the response headers
1048 temp_fp.paused = temp_fp.finished = 0;
1049
1050 // fp->multi and temp_fp.multi are the same.
1051 errm = curl_multi_add_handle(fp->multi, temp_fp.easy);
1052 if (errm != CURLM_OK) {
1053 save_errno = multi_errno(errm);
1054 goto error;
1055 }
1056 temp_fp.nrunning = ++fp->nrunning;
1057
1058 while (! temp_fp.paused && ! temp_fp.finished)
1059 if (wait_perform(&temp_fp) < 0) {
1060 save_errno = errno;
1061 goto error_remove;
1062 }
1063
1064 if (temp_fp.finished && temp_fp.final_result != CURLE_OK) {
1065 save_errno = easy_errno(temp_fp.easy, temp_fp.final_result);
1066 goto error_remove;
1067 }
1068
1069 // We've got a good response, close the original connection and
1070 // replace it with the new one.
1071
1072 errm = curl_multi_remove_handle(fp->multi, fp->easy);
1073 if (errm != CURLM_OK) {
1074 // Clean up as much as possible
1075 curl_easy_reset(temp_fp.easy);
1076 if (curl_multi_remove_handle(fp->multi, temp_fp.easy) == CURLM_OK) {
1077 fp->nrunning--;
1078 curl_easy_cleanup(temp_fp.easy);
1079 }
1080 save_errno = multi_errno(errm);
1081 goto early_error;
1082 }
1083 fp->nrunning--;
1084
1085 curl_easy_cleanup(fp->easy);
1086 fp->easy = temp_fp.easy;
1087 err = curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1088 err |= curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1089 if (err != CURLE_OK) {
1090 save_errno = easy_errno(fp->easy, err);
1091 curl_easy_reset(fp->easy);
1092 errno = save_errno;
1093 return -1;
1094 }
1095 fp->buffer.len = 0;
1096 fp->paused = temp_fp.paused;
1097 fp->finished = temp_fp.finished;
1098 fp->perform_again = temp_fp.perform_again;
1099 fp->final_result = temp_fp.final_result;
1100
1101 return 0;
1102
1103 error_remove:
1104 curl_easy_reset(temp_fp.easy); // Ensure no pointers to on-stack temp_fp
1105 errm = curl_multi_remove_handle(fp->multi, temp_fp.easy);
1106 if (errm != CURLM_OK) {
1107 errno = multi_errno(errm);
1108 return -1;
1109 }
1110 fp->nrunning--;
1111 error:
1112 curl_easy_cleanup(temp_fp.easy);
1113 early_error:
1114 fp->can_seek = 0; // Don't try to seek again
1115 if (save_errno)
1116 errno = save_errno;
1117 return -1;
1118 }
1119
libcurl_close(hFILE * fpv)1120 static int libcurl_close(hFILE *fpv)
1121 {
1122 hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
1123 CURLcode err;
1124 CURLMcode errm;
1125 int save_errno = 0;
1126
1127 // Before closing the file, unpause it and perform on it so that uploads
1128 // have the opportunity to signal EOF to the server -- see send_callback().
1129
1130 fp->buffer.len = 0;
1131 fp->closing = 1;
1132 fp->paused = 0;
1133 if (!fp->finished) {
1134 err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
1135 if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err);
1136 }
1137
1138 while (save_errno == 0 && ! fp->paused && ! fp->finished)
1139 if (wait_perform(fp) < 0) save_errno = errno;
1140
1141 if (fp->finished && fp->final_result != CURLE_OK)
1142 save_errno = easy_errno(fp->easy, fp->final_result);
1143
1144 errm = curl_multi_remove_handle(fp->multi, fp->easy);
1145 if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm);
1146 fp->nrunning--;
1147
1148 curl_easy_cleanup(fp->easy);
1149 curl_multi_cleanup(fp->multi);
1150
1151 if (fp->headers.callback) // Tell callback to free any data it needs to
1152 fp->headers.callback(fp->headers.callback_data, NULL);
1153 free_headers(&fp->headers.fixed, 1);
1154 free_headers(&fp->headers.extra, 1);
1155
1156 free(fp->preserved);
1157
1158 if (save_errno) { errno = save_errno; return -1; }
1159 else return 0;
1160 }
1161
1162 static const struct hFILE_backend libcurl_backend =
1163 {
1164 libcurl_read, libcurl_write, libcurl_seek, NULL, libcurl_close
1165 };
1166
1167 static hFILE *
libcurl_open(const char * url,const char * modes,http_headers * headers)1168 libcurl_open(const char *url, const char *modes, http_headers *headers)
1169 {
1170 hFILE_libcurl *fp;
1171 struct curl_slist *list;
1172 char mode;
1173 const char *s;
1174 CURLcode err;
1175 CURLMcode errm;
1176 int save, is_recursive;
1177 kstring_t in_header = {0, 0, NULL};
1178 long response;
1179
1180 is_recursive = strchr(modes, 'R') != NULL;
1181
1182 if ((s = strpbrk(modes, "rwa+")) != NULL) {
1183 mode = *s;
1184 if (strpbrk(&s[1], "rwa+")) mode = 'e';
1185 }
1186 else mode = '\0';
1187
1188 if (mode != 'r' && mode != 'w') { errno = EINVAL; goto early_error; }
1189
1190 fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0);
1191 if (fp == NULL) goto early_error;
1192
1193 if (headers) {
1194 fp->headers = *headers;
1195 } else {
1196 memset(&fp->headers, 0, sizeof(fp->headers));
1197 fp->headers.fail_on_error = 1;
1198 }
1199
1200 fp->file_size = -1;
1201 fp->buffer.ptr.rd = NULL;
1202 fp->buffer.len = 0;
1203 fp->final_result = (CURLcode) -1;
1204 fp->paused = fp->closing = fp->finished = fp->perform_again = 0;
1205 fp->can_seek = 1;
1206 fp->tried_seek = 0;
1207 fp->delayed_seek = fp->last_offset = -1;
1208 fp->preserved = NULL;
1209 fp->preserved_bytes = fp->preserved_size = 0;
1210 fp->is_recursive = is_recursive;
1211 fp->nrunning = 0;
1212 fp->easy = NULL;
1213
1214 fp->multi = curl_multi_init();
1215 if (fp->multi == NULL) { errno = ENOMEM; goto error; }
1216
1217 fp->easy = curl_easy_init();
1218 if (fp->easy == NULL) { errno = ENOMEM; goto error; }
1219
1220 // Make a route to the hFILE_libcurl* given just a CURL* easy handle
1221 err = curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1222
1223 // Avoid many repeated CWD calls with FTP, instead requesting the filename
1224 // by full path (as done in knet, but not strictly compliant with RFC1738).
1225 err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD, CURLFTPMETHOD_NOCWD);
1226
1227 if (mode == 'r') {
1228 err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback);
1229 err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1230 fp->is_read = 1;
1231 }
1232 else {
1233 err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback);
1234 err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp);
1235 err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L);
1236 if (append_header(&fp->headers.fixed,
1237 "Transfer-Encoding: chunked", 1) < 0)
1238 goto error;
1239 fp->is_read = 0;
1240 }
1241
1242 err |= curl_easy_setopt(fp->easy, CURLOPT_SHARE, curl.share);
1243 err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url);
1244 {
1245 char* env_curl_ca_bundle = getenv("CURL_CA_BUNDLE");
1246 if (env_curl_ca_bundle) {
1247 err |= curl_easy_setopt(fp->easy, CURLOPT_CAINFO, env_curl_ca_bundle);
1248 }
1249 }
1250 err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s);
1251 if (fp->headers.callback) {
1252 if (add_callback_headers(fp) != 0) goto error;
1253 }
1254 if (get_auth_token(fp, url) < 0)
1255 goto error;
1256 if ((list = get_header_list(fp)) != NULL)
1257 err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1258
1259 if (hts_verbose <= 8 && fp->headers.fail_on_error)
1260 err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L);
1261 if (hts_verbose >= 8)
1262 err |= curl_easy_setopt(fp->easy, CURLOPT_VERBOSE, 1L);
1263
1264 if (fp->headers.redirect) {
1265 err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, header_callback);
1266 err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, (void *)&in_header);
1267 } else {
1268 err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L);
1269 }
1270
1271 if (err != 0) { errno = ENOSYS; goto error; }
1272
1273 errm = curl_multi_add_handle(fp->multi, fp->easy);
1274 if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; }
1275 fp->nrunning++;
1276
1277 while (! fp->paused && ! fp->finished) {
1278 if (wait_perform(fp) < 0) goto error_remove;
1279 }
1280
1281 curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE, &response);
1282 if (fp->headers.http_response_ptr) {
1283 *fp->headers.http_response_ptr = response;
1284 }
1285
1286 if (fp->finished && fp->final_result != CURLE_OK) {
1287 errno = easy_errno(fp->easy, fp->final_result);
1288 goto error_remove;
1289 }
1290
1291 if (fp->headers.redirect) {
1292 if (response >= 300 && response < 400) { // redirection
1293 kstring_t new_url = {0, 0, NULL};
1294
1295 if (fp->headers.redirect(fp->headers.redirect_data, response,
1296 &in_header, &new_url)) {
1297 errno = ENOSYS;
1298 goto error;
1299 }
1300
1301 err |= curl_easy_setopt(fp->easy, CURLOPT_URL, new_url.s);
1302 err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1303 err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1304 free(ks_release(&in_header));
1305
1306 if (err != 0) { errno = ENOSYS; goto error; }
1307 free(ks_release(&new_url));
1308
1309 if (restart_from_position(fp, 0) < 0) {
1310 goto error_remove;
1311 }
1312
1313 if (fp->headers.http_response_ptr) {
1314 curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE,
1315 fp->headers.http_response_ptr);
1316 }
1317
1318 if (fp->finished && fp->final_result != CURLE_OK) {
1319 errno = easy_errno(fp->easy, fp->final_result);
1320 goto error_remove;
1321 }
1322 } else {
1323 // we no longer need to look at the headers
1324 err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1325 err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1326 free(ks_release(&in_header));
1327
1328 if (err != 0) { errno = ENOSYS; goto error; }
1329 }
1330 }
1331
1332 if (mode == 'r') {
1333 double dval;
1334
1335 if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD,
1336 &dval) == CURLE_OK && dval >= 0.0)
1337 fp->file_size = (off_t) (dval + 0.1);
1338 }
1339
1340 fp->base.backend = &libcurl_backend;
1341 return &fp->base;
1342
1343 error_remove:
1344 save = errno;
1345 (void) curl_multi_remove_handle(fp->multi, fp->easy);
1346 fp->nrunning--;
1347 errno = save;
1348
1349 error:
1350 if (fp->headers.redirect) free(in_header.s);
1351 save = errno;
1352 if (fp->easy) curl_easy_cleanup(fp->easy);
1353 if (fp->multi) curl_multi_cleanup(fp->multi);
1354 free_headers(&fp->headers.extra, 1);
1355 hfile_destroy((hFILE *) fp);
1356 errno = save;
1357 return NULL;
1358
1359 early_error:
1360 return NULL;
1361 }
1362
hopen_libcurl(const char * url,const char * modes)1363 static hFILE *hopen_libcurl(const char *url, const char *modes)
1364 {
1365 return libcurl_open(url, modes, NULL);
1366 }
1367
parse_va_list(http_headers * headers,va_list args)1368 static int parse_va_list(http_headers *headers, va_list args)
1369 {
1370 const char *argtype;
1371
1372 while ((argtype = va_arg(args, const char *)) != NULL)
1373 if (strcmp(argtype, "httphdr:v") == 0) {
1374 const char **hdr;
1375 for (hdr = va_arg(args, const char **); *hdr; hdr++) {
1376 if (append_header(&headers->fixed, *hdr, 1) < 0)
1377 return -1;
1378 if (is_authorization(*hdr))
1379 headers->auth_hdr_num = -1;
1380 }
1381 }
1382 else if (strcmp(argtype, "httphdr:l") == 0) {
1383 const char *hdr;
1384 while ((hdr = va_arg(args, const char *)) != NULL) {
1385 if (append_header(&headers->fixed, hdr, 1) < 0)
1386 return -1;
1387 if (is_authorization(hdr))
1388 headers->auth_hdr_num = -1;
1389 }
1390 }
1391 else if (strcmp(argtype, "httphdr") == 0) {
1392 const char *hdr = va_arg(args, const char *);
1393 if (hdr) {
1394 if (append_header(&headers->fixed, hdr, 1) < 0)
1395 return -1;
1396 if (is_authorization(hdr))
1397 headers->auth_hdr_num = -1;
1398 }
1399 }
1400 else if (strcmp(argtype, "httphdr_callback") == 0) {
1401 headers->callback = va_arg(args, const hts_httphdr_callback);
1402 }
1403 else if (strcmp(argtype, "httphdr_callback_data") == 0) {
1404 headers->callback_data = va_arg(args, void *);
1405 }
1406 else if (strcmp(argtype, "va_list") == 0) {
1407 va_list *args2 = va_arg(args, va_list *);
1408 if (args2) {
1409 if (parse_va_list(headers, *args2) < 0) return -1;
1410 }
1411 }
1412 else if (strcmp(argtype, "auth_token_enabled") == 0) {
1413 const char *flag = va_arg(args, const char *);
1414 if (strcmp(flag, "false") == 0)
1415 headers->auth_hdr_num = -3;
1416 }
1417 else if (strcmp(argtype, "redirect_callback") == 0) {
1418 headers->redirect = va_arg(args, const redirect_callback);
1419 }
1420 else if (strcmp(argtype, "redirect_callback_data") == 0) {
1421 headers->redirect_data = va_arg(args, void *);
1422 }
1423 else if (strcmp(argtype, "http_response_ptr") == 0) {
1424 headers->http_response_ptr = va_arg(args, long *);
1425 }
1426 else if (strcmp(argtype, "fail_on_error") == 0) {
1427 headers->fail_on_error = va_arg(args, int);
1428 }
1429 else { errno = EINVAL; return -1; }
1430
1431 return 0;
1432 }
1433
1434 /*
1435 HTTP headers to be added to the request can be passed in as extra
1436 arguments to hopen(). The headers can be specified as follows:
1437
1438 * Single header:
1439 hopen(url, mode, "httphdr", "X-Hdr-1: text", NULL);
1440
1441 * Multiple headers in the argument list:
1442 hopen(url, mode, "httphdr:l", "X-Hdr-1: text", "X-Hdr-2: text", NULL, NULL);
1443
1444 * Multiple headers in a char* array:
1445 hopen(url, mode, "httphdr:v", hdrs, NULL);
1446 where `hdrs` is a char **. The list ends with a NULL pointer.
1447
1448 * A callback function
1449 hopen(url, mode, "httphdr_callback", func,
1450 "httphdr_callback_data", arg, NULL);
1451 `func` has type
1452 int (* hts_httphdr_callback) (void *cb_data, char ***hdrs);
1453 `arg` is passed to the callback as a void *.
1454
1455 The function is called at file open, and when attempting to seek (which
1456 opens a new HTTP request). This allows, for example, access tokens
1457 that may have gone stale to be regenerated. The function is also
1458 called (with `hdrs` == NULL) on file close so that the callback can
1459 free any memory that it needs to.
1460
1461 The callback should return 0 on success, non-zero on failure. It should
1462 return in *hdrs a list of strings containing the new headers (terminated
1463 with a NULL pointer). These will replace any headers previously supplied
1464 by the callback. If no changes are necessary, it can return NULL
1465 in *hdrs, in which case the previous headers will be left unchanged.
1466
1467 Ownership of the strings in the header list passes to hfile_libcurl,
1468 so the callback should not attempt to use or free them itself. The memory
1469 containing the array belongs to the callback and will not be freed by
1470 hfile_libcurl.
1471
1472 Headers supplied by the callback are appended after any specified
1473 using the "httphdr", "httphdr:l" or "httphdr:v" methods. No attempt
1474 is made to replace these headers (even if a key is repeated) so anything
1475 that is expected to vary needs to come from the callback.
1476 */
1477
vhopen_libcurl(const char * url,const char * modes,va_list args)1478 static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args)
1479 {
1480 hFILE *fp = NULL;
1481 http_headers headers = { .fail_on_error = 1 };
1482
1483 if (parse_va_list(&headers, args) == 0) {
1484 fp = libcurl_open(url, modes, &headers);
1485 }
1486
1487 if (!fp) {
1488 free_headers(&headers.fixed, 1);
1489 }
1490 return fp;
1491 }
1492
PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)1493 int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
1494 {
1495 static const struct hFILE_scheme_handler handler =
1496 { hopen_libcurl, hfile_always_remote, "libcurl",
1497 2000 + 50,
1498 vhopen_libcurl };
1499
1500 #ifdef ENABLE_PLUGINS
1501 // Embed version string for examination via strings(1) or what(1)
1502 static const char id[] =
1503 "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION_TEXT;
1504 const char *version = strchr(id, '\t')+1;
1505 #else
1506 const char *version = hts_version();
1507 #endif
1508 const curl_version_info_data *info;
1509 const char * const *protocol;
1510 const char *auth;
1511 CURLcode err;
1512 CURLSHcode errsh;
1513
1514 err = curl_global_init(CURL_GLOBAL_ALL);
1515 if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; }
1516
1517 curl.share = curl_share_init();
1518 if (curl.share == NULL) { curl_global_cleanup(); errno = EIO; return -1; }
1519 errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
1520 errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
1521 errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
1522 if (errsh != 0) {
1523 curl_share_cleanup(curl.share);
1524 curl_global_cleanup();
1525 errno = EIO;
1526 return -1;
1527 }
1528
1529 if ((auth = getenv("HTS_AUTH_LOCATION")) != NULL) {
1530 curl.auth_path = strdup(auth);
1531 curl.auth_map = kh_init(auth_map);
1532 if (!curl.auth_path || !curl.auth_map) {
1533 int save_errno = errno;
1534 free(curl.auth_path);
1535 kh_destroy(auth_map, curl.auth_map);
1536 curl_share_cleanup(curl.share);
1537 curl_global_cleanup();
1538 errno = save_errno;
1539 return -1;
1540 }
1541 }
1542 if ((auth = getenv("HTS_ALLOW_UNENCRYPTED_AUTHORIZATION_HEADER")) != NULL
1543 && strcmp(auth, "I understand the risks") == 0) {
1544 curl.allow_unencrypted_auth_header = 1;
1545 }
1546
1547 info = curl_version_info(CURLVERSION_NOW);
1548 ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
1549
1550 self->name = "libcurl";
1551 self->destroy = libcurl_exit;
1552
1553 for (protocol = info->protocols; *protocol; protocol++)
1554 hfile_add_scheme_handler(*protocol, &handler);
1555 return 0;
1556 }
1557