1 /* hfile.c -- buffered low-level input/output streams.
2
3 Copyright (C) 2013-2016 Genome Research Ltd.
4
5 Author: John Marshall <jm18@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24
25 #include <config.h>
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <errno.h>
32 #include <limits.h>
33
34 #include <pthread.h>
35
36 #include "htslib/hfile.h"
37 #include "hfile_internal.h"
38
39 #ifndef ENOTSUP
40 #define ENOTSUP EINVAL
41 #endif
42 #ifndef EOVERFLOW
43 #define EOVERFLOW ERANGE
44 #endif
45 #ifndef EPROTONOSUPPORT
46 #define EPROTONOSUPPORT ENOSYS
47 #endif
48
49 #ifndef SSIZE_MAX /* SSIZE_MAX is POSIX 1 */
50 #define SSIZE_MAX LONG_MAX
51 #endif
52
53 /* hFILE fields are used as follows:
54
55 char *buffer; // Pointer to the start of the I/O buffer
56 char *begin; // First not-yet-read character / unused position
57 char *end; // First unfilled/unfillable position
58 char *limit; // Pointer to the first position past the buffer
59
60 const hFILE_backend *backend; // Methods to refill/flush I/O buffer
61
62 off_t offset; // Offset within the stream of buffer position 0
63 unsigned at_eof:1;// For reading, whether EOF has been seen
64 unsigned mobile:1;// Buffer is a mobile window or fixed full contents
65 unsigned readonly:1;// Whether opened as "r" rather than "r+"/"w"/"a"
66 int has_errno; // Error number from the last failure on this stream
67
68 For reading, begin is the first unread character in the buffer and end is the
69 first unfilled position:
70
71 -----------ABCDEFGHIJKLMNO---------------
72 ^buffer ^begin ^end ^limit
73
74 For writing, begin is the first unused position and end is unused so remains
75 equal to buffer:
76
77 ABCDEFGHIJKLMNOPQRSTUVWXYZ---------------
78 ^buffer ^begin ^limit
79 ^end
80
81 Thus if begin > end then there is a non-empty write buffer, if begin < end
82 then there is a non-empty read buffer, and if begin == end then both buffers
83 are empty. In all cases, the stream's file position indicator corresponds
84 to the position pointed to by begin.
85
86 The above is the normal scenario of a mobile window. For in-memory
87 streams (eg via hfile_init_fixed) the buffer can be used as the full
88 contents without any separate backend behind it. These always have at_eof
89 set, offset set to 0, need no read() method, and should just return EINVAL
90 for seek():
91
92 abcdefghijkLMNOPQRSTUVWXYZ------
93 ^buffer ^begin ^end ^limit
94 */
95
hfile_init(size_t struct_size,const char * mode,size_t capacity)96 hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)
97 {
98 hFILE *fp = (hFILE *) malloc(struct_size);
99 if (fp == NULL) goto error;
100
101 if (capacity == 0) capacity = 32768;
102 // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory
103 if (strchr(mode, 'r') && capacity > 32768) capacity = 32768;
104
105 fp->buffer = (char *) malloc(capacity);
106 if (fp->buffer == NULL) goto error;
107
108 fp->begin = fp->end = fp->buffer;
109 fp->limit = &fp->buffer[capacity];
110
111 fp->offset = 0;
112 fp->at_eof = 0;
113 fp->mobile = 1;
114 fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+'));
115 fp->has_errno = 0;
116 return fp;
117
118 error:
119 hfile_destroy(fp);
120 return NULL;
121 }
122
hfile_init_fixed(size_t struct_size,const char * mode,char * buffer,size_t buf_filled,size_t buf_size)123 hFILE *hfile_init_fixed(size_t struct_size, const char *mode,
124 char *buffer, size_t buf_filled, size_t buf_size)
125 {
126 hFILE *fp = (hFILE *) malloc(struct_size);
127 if (fp == NULL) return NULL;
128
129 fp->buffer = fp->begin = buffer;
130 fp->end = &fp->buffer[buf_filled];
131 fp->limit = &fp->buffer[buf_size];
132
133 fp->offset = 0;
134 fp->at_eof = 1;
135 fp->mobile = 0;
136 fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+'));
137 fp->has_errno = 0;
138 return fp;
139 }
140
141 static const struct hFILE_backend mem_backend;
142
hfile_destroy(hFILE * fp)143 void hfile_destroy(hFILE *fp)
144 {
145 int save = errno;
146 if (fp) free(fp->buffer);
147 free(fp);
148 errno = save;
149 }
150
writebuffer_is_nonempty(hFILE * fp)151 static inline int writebuffer_is_nonempty(hFILE *fp)
152 {
153 return fp->begin > fp->end;
154 }
155
156 /* Refills the read buffer from the backend (once, so may only partially
157 fill the buffer), returning the number of additional characters read
158 (which might be 0), or negative when an error occurred. */
refill_buffer(hFILE * fp)159 static ssize_t refill_buffer(hFILE *fp)
160 {
161 ssize_t n;
162
163 // Move any unread characters to the start of the buffer
164 if (fp->mobile && fp->begin > fp->buffer) {
165 fp->offset += fp->begin - fp->buffer;
166 memmove(fp->buffer, fp->begin, fp->end - fp->begin);
167 fp->end = &fp->buffer[fp->end - fp->begin];
168 fp->begin = fp->buffer;
169 }
170
171 // Read into the available buffer space at fp->[end,limit)
172 if (fp->at_eof || fp->end == fp->limit) n = 0;
173 else {
174 n = fp->backend->read(fp, fp->end, fp->limit - fp->end);
175 if (n < 0) { fp->has_errno = errno; return n; }
176 else if (n == 0) fp->at_eof = 1;
177 }
178
179 fp->end += n;
180 return n;
181 }
182
183 /*
184 * Changes the buffer size for an hFILE. Ideally this is done
185 * immediately after opening. If performed later, this function may
186 * fail if we are reducing the buffer size and the current offset into
187 * the buffer is beyond the new capacity.
188 *
189 * Returns 0 on success;
190 * -1 on failure.
191 */
hfile_set_blksize(hFILE * fp,size_t bufsiz)192 int hfile_set_blksize(hFILE *fp, size_t bufsiz) {
193 char *buffer;
194 ptrdiff_t curr_used;
195 if (!fp) return -1;
196 curr_used = (fp->begin > fp->end ? fp->begin : fp->end) - fp->buffer;
197 if (bufsiz == 0) bufsiz = 32768;
198
199 // Ensure buffer resize will not erase live data
200 if (bufsiz < curr_used)
201 return -1;
202
203 if (!(buffer = (char *) realloc(fp->buffer, bufsiz))) return -1;
204
205 fp->begin = buffer + (fp->begin - fp->buffer);
206 fp->end = buffer + (fp->end - fp->buffer);
207 fp->buffer = buffer;
208 fp->limit = &fp->buffer[bufsiz];
209
210 return 0;
211 }
212
213 /* Called only from hgetc(), when our buffer is empty. */
hgetc2(hFILE * fp)214 int hgetc2(hFILE *fp)
215 {
216 return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF;
217 }
218
hgetdelim(char * buffer,size_t size,int delim,hFILE * fp)219 ssize_t hgetdelim(char *buffer, size_t size, int delim, hFILE *fp)
220 {
221 char *found;
222 size_t n, copied = 0;
223 ssize_t got;
224
225 if (size < 1 || size > SSIZE_MAX) {
226 fp->has_errno = errno = EINVAL;
227 return -1;
228 }
229 if (writebuffer_is_nonempty(fp)) {
230 fp->has_errno = errno = EBADF;
231 return -1;
232 }
233
234 --size; /* to allow space for the NUL terminator */
235
236 do {
237 n = fp->end - fp->begin;
238 if (n > size - copied) n = size - copied;
239
240 /* Look in the hFILE buffer for the delimiter */
241 found = memchr(fp->begin, delim, n);
242 if (found != NULL) {
243 n = found - fp->begin + 1;
244 memcpy(buffer + copied, fp->begin, n);
245 buffer[n + copied] = '\0';
246 fp->begin += n;
247 return n + copied;
248 }
249
250 /* No delimiter yet, copy as much as we can and refill if necessary */
251 memcpy(buffer + copied, fp->begin, n);
252 fp->begin += n;
253 copied += n;
254
255 if (copied == size) { /* Output buffer full */
256 buffer[copied] = '\0';
257 return copied;
258 }
259
260 got = refill_buffer(fp);
261 } while (got > 0);
262
263 if (got < 0) return -1; /* Error on refill. */
264
265 buffer[copied] = '\0'; /* EOF, return anything that was copied. */
266 return copied;
267 }
268
hgets(char * buffer,int size,hFILE * fp)269 char *hgets(char *buffer, int size, hFILE *fp)
270 {
271 if (size < 1) {
272 fp->has_errno = errno = EINVAL;
273 return NULL;
274 }
275 return hgetln(buffer, size, fp) > 0 ? buffer : NULL;
276 }
277
hpeek(hFILE * fp,void * buffer,size_t nbytes)278 ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes)
279 {
280 size_t n = fp->end - fp->begin;
281 while (n < nbytes) {
282 ssize_t ret = refill_buffer(fp);
283 if (ret < 0) return ret;
284 else if (ret == 0) break;
285 else n += ret;
286 }
287
288 if (n > nbytes) n = nbytes;
289 memcpy(buffer, fp->begin, n);
290 return n;
291 }
292
293 /* Called only from hread(); when called, our buffer is empty and nread bytes
294 have already been placed in the destination buffer. */
hread2(hFILE * fp,void * destv,size_t nbytes,size_t nread)295 ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread)
296 {
297 const size_t capacity = fp->limit - fp->buffer;
298 int buffer_invalidated = 0;
299 char *dest = (char *) destv;
300 dest += nread, nbytes -= nread;
301
302 // Read large requests directly into the destination buffer
303 while (nbytes * 2 >= capacity && !fp->at_eof) {
304 ssize_t n = fp->backend->read(fp, dest, nbytes);
305 if (n < 0) { fp->has_errno = errno; return n; }
306 else if (n == 0) fp->at_eof = 1;
307 else buffer_invalidated = 1;
308 fp->offset += n;
309 dest += n, nbytes -= n;
310 nread += n;
311 }
312
313 if (buffer_invalidated) {
314 // Our unread buffer is empty, so begin == end, but our already-read
315 // buffer [buffer,begin) is likely non-empty and is no longer valid as
316 // its contents are no longer adjacent to the file position indicator.
317 // Discard it so that hseek() can't try to take advantage of it.
318 fp->offset += fp->begin - fp->buffer;
319 fp->begin = fp->end = fp->buffer;
320 }
321
322 while (nbytes > 0 && !fp->at_eof) {
323 size_t n;
324 ssize_t ret = refill_buffer(fp);
325 if (ret < 0) return ret;
326
327 n = fp->end - fp->begin;
328 if (n > nbytes) n = nbytes;
329 memcpy(dest, fp->begin, n);
330 fp->begin += n;
331 dest += n, nbytes -= n;
332 nread += n;
333 }
334
335 return nread;
336 }
337
338 /* Flushes the write buffer, fp->[buffer,begin), out through the backend
339 returning 0 on success or negative if an error occurred. */
flush_buffer(hFILE * fp)340 static ssize_t flush_buffer(hFILE *fp)
341 {
342 const char *buffer = fp->buffer;
343 while (buffer < fp->begin) {
344 ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer);
345 if (n < 0) { fp->has_errno = errno; return n; }
346 buffer += n;
347 fp->offset += n;
348 }
349
350 fp->begin = fp->buffer; // Leave the buffer empty
351 return 0;
352 }
353
hflush(hFILE * fp)354 int hflush(hFILE *fp)
355 {
356 if (flush_buffer(fp) < 0) return EOF;
357 if (fp->backend->flush) {
358 if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; }
359 }
360 return 0;
361 }
362
363 /* Called only from hputc(), when our buffer is already full. */
hputc2(int c,hFILE * fp)364 int hputc2(int c, hFILE *fp)
365 {
366 if (flush_buffer(fp) < 0) return EOF;
367 *(fp->begin++) = c;
368 return c;
369 }
370
371 /* Called only from hwrite() and hputs2(); when called, our buffer is full and
372 ncopied bytes from the source have already been copied to our buffer. */
hwrite2(hFILE * fp,const void * srcv,size_t totalbytes,size_t ncopied)373 ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied)
374 {
375 const char *src = (const char *) srcv;
376 ssize_t ret;
377 const size_t capacity = fp->limit - fp->buffer;
378 size_t remaining = totalbytes - ncopied;
379 src += ncopied;
380
381 ret = flush_buffer(fp);
382 if (ret < 0) return ret;
383
384 // Write large blocks out directly from the source buffer
385 while (remaining * 2 >= capacity) {
386 ssize_t n = fp->backend->write(fp, src, remaining);
387 if (n < 0) { fp->has_errno = errno; return n; }
388 fp->offset += n;
389 src += n, remaining -= n;
390 }
391
392 // Just buffer any remaining characters
393 memcpy(fp->begin, src, remaining);
394 fp->begin += remaining;
395
396 return totalbytes;
397 }
398
399 /* Called only from hputs(), when our buffer is already full. */
hputs2(const char * text,size_t totalbytes,size_t ncopied,hFILE * fp)400 int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp)
401 {
402 return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF;
403 }
404
hseek(hFILE * fp,off_t offset,int whence)405 off_t hseek(hFILE *fp, off_t offset, int whence)
406 {
407 off_t curpos, pos;
408
409 if (writebuffer_is_nonempty(fp) && fp->mobile) {
410 int ret = flush_buffer(fp);
411 if (ret < 0) return ret;
412 }
413
414 curpos = htell(fp);
415
416 // Relative offsets are given relative to the hFILE's stream position,
417 // which may differ from the backend's physical position due to buffering
418 // read-ahead. Correct for this by converting to an absolute position.
419 if (whence == SEEK_CUR) {
420 if (curpos + offset < 0) {
421 // Either a negative offset resulted in a position before the
422 // start of the file, or we overflowed when given a positive offset
423 fp->has_errno = errno = (offset < 0)? EINVAL : EOVERFLOW;
424 return -1;
425 }
426
427 whence = SEEK_SET;
428 offset = curpos + offset;
429 }
430 // For fixed immobile buffers, convert everything else to SEEK_SET too
431 // so that seeking can be avoided for all (within range) requests.
432 else if (! fp->mobile && whence == SEEK_END) {
433 size_t length = fp->end - fp->buffer;
434 if (offset > 0 || -offset > length) {
435 fp->has_errno = errno = EINVAL;
436 return -1;
437 }
438
439 whence = SEEK_SET;
440 offset = length + offset;
441 }
442
443 // Avoid seeking if the desired position is within our read buffer.
444 // (But not when the next operation may be a write on a mobile buffer.)
445 if (whence == SEEK_SET && (! fp->mobile || fp->readonly) &&
446 offset >= fp->offset && offset - fp->offset <= fp->end - fp->buffer) {
447 fp->begin = &fp->buffer[offset - fp->offset];
448 return offset;
449 }
450
451 pos = fp->backend->seek(fp, offset, whence);
452 if (pos < 0) { fp->has_errno = errno; return pos; }
453
454 // Seeking succeeded, so discard any non-empty read buffer
455 fp->begin = fp->end = fp->buffer;
456 fp->at_eof = 0;
457
458 fp->offset = pos;
459 return pos;
460 }
461
hclose(hFILE * fp)462 int hclose(hFILE *fp)
463 {
464 int err = fp->has_errno;
465
466 if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno;
467 if (fp->backend->close(fp) < 0) err = errno;
468 hfile_destroy(fp);
469
470 if (err) {
471 errno = err;
472 return EOF;
473 }
474 else return 0;
475 }
476
hclose_abruptly(hFILE * fp)477 void hclose_abruptly(hFILE *fp)
478 {
479 int save = errno;
480 if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ }
481 hfile_destroy(fp);
482 errno = save;
483 }
484
485
486 /***************************
487 * File descriptor backend *
488 ***************************/
489
490 #ifndef _WIN32
491 #include <sys/socket.h>
492 #include <sys/stat.h>
493 #define HAVE_STRUCT_STAT_ST_BLKSIZE
494 #else
495 #include <winsock2.h>
496 #define HAVE_CLOSESOCKET
497 #define HAVE_SETMODE
498 #endif
499 #include <fcntl.h>
500 #include <unistd.h>
501
502 /* For Unix, it doesn't matter whether a file descriptor is a socket.
503 However Windows insists on send()/recv() and its own closesocket()
504 being used when fd happens to be a socket. */
505
506 typedef struct {
507 hFILE base;
508 int fd;
509 unsigned is_socket:1;
510 } hFILE_fd;
511
fd_read(hFILE * fpv,void * buffer,size_t nbytes)512 static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes)
513 {
514 hFILE_fd *fp = (hFILE_fd *) fpv;
515 ssize_t n;
516 do {
517 n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0)
518 : read(fp->fd, buffer, nbytes);
519 } while (n < 0 && errno == EINTR);
520 return n;
521 }
522
fd_write(hFILE * fpv,const void * buffer,size_t nbytes)523 static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes)
524 {
525 hFILE_fd *fp = (hFILE_fd *) fpv;
526 ssize_t n;
527 do {
528 n = fp->is_socket? send(fp->fd, buffer, nbytes, 0)
529 : write(fp->fd, buffer, nbytes);
530 } while (n < 0 && errno == EINTR);
531 #ifdef _WIN32
532 // On windows we have no SIGPIPE. Instead write returns
533 // EINVAL. We check for this and our fd being a pipe.
534 // If so, we raise SIGTERM instead of SIGPIPE. It's not
535 // ideal, but I think the only alternative is extra checking
536 // in every single piece of code.
537 if (n < 0 && errno == EINVAL &&
538 GetLastError() == ERROR_NO_DATA &&
539 GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) {
540 raise(SIGTERM);
541 }
542 #endif
543 return n;
544 }
545
fd_seek(hFILE * fpv,off_t offset,int whence)546 static off_t fd_seek(hFILE *fpv, off_t offset, int whence)
547 {
548 hFILE_fd *fp = (hFILE_fd *) fpv;
549 return lseek(fp->fd, offset, whence);
550 }
551
fd_flush(hFILE * fpv)552 static int fd_flush(hFILE *fpv)
553 {
554 int ret = 0;
555 do {
556 #ifdef HAVE_FDATASYNC
557 hFILE_fd *fp = (hFILE_fd *) fpv;
558 ret = fdatasync(fp->fd);
559 #elif defined(HAVE_FSYNC)
560 hFILE_fd *fp = (hFILE_fd *) fpv;
561 ret = fsync(fp->fd);
562 #endif
563 // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe,
564 // and operation-not-supported errors (Mac OS X)
565 if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0;
566 } while (ret < 0 && errno == EINTR);
567 return ret;
568 }
569
fd_close(hFILE * fpv)570 static int fd_close(hFILE *fpv)
571 {
572 hFILE_fd *fp = (hFILE_fd *) fpv;
573 int ret;
574 do {
575 #ifdef HAVE_CLOSESOCKET
576 ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd);
577 #else
578 ret = close(fp->fd);
579 #endif
580 } while (ret < 0 && errno == EINTR);
581 return ret;
582 }
583
584 static const struct hFILE_backend fd_backend =
585 {
586 fd_read, fd_write, fd_seek, fd_flush, fd_close
587 };
588
blksize(int fd)589 static size_t blksize(int fd)
590 {
591 #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
592 struct stat sbuf;
593 if (fstat(fd, &sbuf) != 0) return 0;
594 return sbuf.st_blksize;
595 #else
596 return 0;
597 #endif
598 }
599
hopen_fd(const char * filename,const char * mode)600 static hFILE *hopen_fd(const char *filename, const char *mode)
601 {
602 hFILE_fd *fp = NULL;
603 int fd = open(filename, hfile_oflags(mode), 0666);
604 if (fd < 0) goto error;
605
606 fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
607 if (fp == NULL) goto error;
608
609 fp->fd = fd;
610 fp->is_socket = 0;
611 fp->base.backend = &fd_backend;
612 return &fp->base;
613
614 error:
615 if (fd >= 0) { int save = errno; (void) close(fd); errno = save; }
616 hfile_destroy((hFILE *) fp);
617 return NULL;
618 }
619
620 // Loads the contents of filename to produced a read-only, in memory,
621 // immobile hfile. fp is the already opened file. We always close this
622 // input fp, irrespective of whether we error or whether we return a new
623 // immobile hfile.
hpreload(hFILE * fp)624 static hFILE *hpreload(hFILE *fp) {
625 hFILE *mem_fp;
626 char *buf = NULL;
627 off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len;
628
629 for (;;) {
630 if (buf_a - buf_sz < 5000) {
631 buf_a += buf_inc;
632 char *t = realloc(buf, buf_a);
633 if (!t) goto err;
634 buf = t;
635 if (buf_inc < 1000000) buf_inc *= 1.3;
636 }
637 len = hread(fp, buf+buf_sz, buf_a-buf_sz);
638 if (len > 0)
639 buf_sz += len;
640 else
641 break;
642 }
643
644 if (len < 0) goto err;
645 mem_fp = hfile_init_fixed(sizeof(hFILE), "r", buf, buf_sz, buf_a);
646 if (!mem_fp) goto err;
647 mem_fp->backend = &mem_backend;
648
649 if (hclose(fp) < 0) {
650 hclose_abruptly(mem_fp);
651 goto err;
652 }
653 return mem_fp;
654
655 err:
656 free(buf);
657 hclose_abruptly(fp);
658 return NULL;
659 }
660
is_preload_url_remote(const char * url)661 static int is_preload_url_remote(const char *url){
662 return hisremote(url + 8); // len("preload:") = 8
663 }
664
hopen_preload(const char * url,const char * mode)665 static hFILE *hopen_preload(const char *url, const char *mode){
666 hFILE* fp = hopen(url + 8, mode);
667 return hpreload(fp);
668 }
669
hdopen(int fd,const char * mode)670 hFILE *hdopen(int fd, const char *mode)
671 {
672 hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
673 if (fp == NULL) return NULL;
674
675 fp->fd = fd;
676 fp->is_socket = (strchr(mode, 's') != NULL);
677 fp->base.backend = &fd_backend;
678 return &fp->base;
679 }
680
hopen_fd_fileuri(const char * url,const char * mode)681 static hFILE *hopen_fd_fileuri(const char *url, const char *mode)
682 {
683 if (strncmp(url, "file://localhost/", 17) == 0) url += 16;
684 else if (strncmp(url, "file:///", 8) == 0) url += 7;
685 else { errno = EPROTONOSUPPORT; return NULL; }
686
687 #ifdef _WIN32
688 // For cases like C:/foo
689 if (url[0] == '/' && url[2] == ':' && url[3] == '/') url++;
690 #endif
691
692 return hopen_fd(url, mode);
693 }
694
hopen_fd_stdinout(const char * mode)695 static hFILE *hopen_fd_stdinout(const char *mode)
696 {
697 int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO;
698 #if defined HAVE_SETMODE && defined O_BINARY
699 if (setmode(fd, O_BINARY) < 0) return NULL;
700 #endif
701 return hdopen(fd, mode);
702 }
703
hfile_oflags(const char * mode)704 int hfile_oflags(const char *mode)
705 {
706 int rdwr = 0, flags = 0;
707 const char *s;
708 for (s = mode; *s; s++)
709 switch (*s) {
710 case 'r': rdwr = O_RDONLY; break;
711 case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break;
712 case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break;
713 case '+': rdwr = O_RDWR; break;
714 #ifdef O_CLOEXEC
715 case 'e': flags |= O_CLOEXEC; break;
716 #endif
717 #ifdef O_EXCL
718 case 'x': flags |= O_EXCL; break;
719 #endif
720 default: break;
721 }
722
723 #ifdef O_BINARY
724 flags |= O_BINARY;
725 #endif
726
727 return rdwr | flags;
728 }
729
730
731 /*********************
732 * In-memory backend *
733 *********************/
734
735 #include "hts_internal.h"
736
737 typedef struct {
738 hFILE base;
739 } hFILE_mem;
740
mem_seek(hFILE * fpv,off_t offset,int whence)741 static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
742 {
743 errno = EINVAL;
744 return -1;
745 }
746
mem_close(hFILE * fpv)747 static int mem_close(hFILE *fpv)
748 {
749 return 0;
750 }
751
752 static const struct hFILE_backend mem_backend =
753 {
754 NULL, NULL, mem_seek, NULL, mem_close
755 };
756
cmp_prefix(const char * key,const char * s)757 static int cmp_prefix(const char *key, const char *s)
758 {
759 while (*key)
760 if (tolower_c(*s) != *key) return +1;
761 else s++, key++;
762
763 return 0;
764 }
765
create_hfile_mem(char * buffer,const char * mode,size_t buf_filled,size_t buf_size)766 static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size)
767 {
768 hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size);
769 if (fp == NULL)
770 return NULL;
771
772 fp->base.backend = &mem_backend;
773 return &fp->base;
774 }
775
hopen_mem(const char * url,const char * mode)776 static hFILE *hopen_mem(const char *url, const char *mode)
777 {
778 size_t length, size;
779 char *buffer;
780 const char *data, *comma = strchr(url, ',');
781 if (comma == NULL) { errno = EINVAL; return NULL; }
782 data = comma+1;
783
784 // TODO Implement write modes
785 if (strchr(mode, 'r') == NULL) { errno = EROFS; return NULL; }
786
787 if (comma - url >= 7 && cmp_prefix(";base64", &comma[-7]) == 0) {
788 size = hts_base64_decoded_length(strlen(data));
789 buffer = malloc(size);
790 if (buffer == NULL) return NULL;
791 hts_decode_base64(buffer, &length, data);
792 }
793 else {
794 size = strlen(data) + 1;
795 buffer = malloc(size);
796 if (buffer == NULL) return NULL;
797 hts_decode_percent(buffer, &length, data);
798 }
799 hFILE* hf;
800
801 if(!(hf = create_hfile_mem(buffer, mode, length, size))){
802 free(buffer);
803 return NULL;
804 }
805
806 return hf;
807 }
808
hopenv_mem(const char * filename,const char * mode,va_list args)809 hFILE *hopenv_mem(const char *filename, const char *mode, va_list args)
810 {
811 char* buffer = va_arg(args, char*);
812 size_t sz = va_arg(args, size_t);
813 va_end(args);
814
815 hFILE* hf;
816
817 if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){
818 free(buffer);
819 return NULL;
820 }
821
822 return hf;
823 }
824
hfile_mem_get_buffer(hFILE * file,size_t * length)825 char *hfile_mem_get_buffer(hFILE *file, size_t *length) {
826 if (file->backend != &mem_backend) {
827 errno = EINVAL;
828 return NULL;
829 }
830
831 if (length)
832 *length = file->buffer - file->limit;
833
834 return file->buffer;
835 }
836
hfile_mem_steal_buffer(hFILE * file,size_t * length)837 char *hfile_mem_steal_buffer(hFILE *file, size_t *length) {
838 char *buf = hfile_mem_get_buffer(file, length);
839 if (buf)
840 file->buffer = NULL;
841 return buf;
842 }
843
hfile_plugin_init_mem(struct hFILE_plugin * self)844 int hfile_plugin_init_mem(struct hFILE_plugin *self)
845 {
846 // mem files are declared remote so they work with a tabix index
847 static const struct hFILE_scheme_handler handler =
848 {NULL, hfile_always_remote, "mem", 2000 + 50, hopenv_mem};
849 self->name = "mem";
850 hfile_add_scheme_handler("mem", &handler);
851 return 0;
852 }
853
854
855 /*****************************************
856 * Plugin and hopen() backend dispatcher *
857 *****************************************/
858
859 #include "htslib/khash.h"
860
861 KHASH_MAP_INIT_STR(scheme_string, const struct hFILE_scheme_handler *)
862 static khash_t(scheme_string) *schemes = NULL;
863
864 struct hFILE_plugin_list {
865 struct hFILE_plugin plugin;
866 struct hFILE_plugin_list *next;
867 };
868
869 static struct hFILE_plugin_list *plugins = NULL;
870 static pthread_mutex_t plugins_lock = PTHREAD_MUTEX_INITIALIZER;
871
hfile_exit()872 static void hfile_exit()
873 {
874 pthread_mutex_lock(&plugins_lock);
875
876 kh_destroy(scheme_string, schemes);
877
878 while (plugins != NULL) {
879 struct hFILE_plugin_list *p = plugins;
880 if (p->plugin.destroy) p->plugin.destroy();
881 #ifdef ENABLE_PLUGINS
882 if (p->plugin.obj) close_plugin(p->plugin.obj);
883 #endif
884 plugins = p->next;
885 free(p);
886 }
887
888 pthread_mutex_unlock(&plugins_lock);
889 pthread_mutex_destroy(&plugins_lock);
890 }
891
priority(const struct hFILE_scheme_handler * handler)892 static inline int priority(const struct hFILE_scheme_handler *handler)
893 {
894 return handler->priority % 1000;
895 }
896
hfile_add_scheme_handler(const char * scheme,const struct hFILE_scheme_handler * handler)897 void hfile_add_scheme_handler(const char *scheme,
898 const struct hFILE_scheme_handler *handler)
899 {
900 int absent;
901 khint_t k = kh_put(scheme_string, schemes, scheme, &absent);
902 if (absent || priority(handler) > priority(kh_value(schemes, k))) {
903 kh_value(schemes, k) = handler;
904 }
905 }
906
init_add_plugin(void * obj,int (* init)(struct hFILE_plugin *),const char * pluginname)907 static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *),
908 const char *pluginname)
909 {
910 struct hFILE_plugin_list *p = malloc (sizeof (struct hFILE_plugin_list));
911 if (p == NULL) abort();
912
913 p->plugin.api_version = 1;
914 p->plugin.obj = obj;
915 p->plugin.name = NULL;
916 p->plugin.destroy = NULL;
917
918 int ret = (*init)(&p->plugin);
919
920 if (ret != 0) {
921 hts_log_debug("Initialisation failed for plugin \"%s\": %d", pluginname, ret);
922 free(p);
923 return ret;
924 }
925
926 hts_log_debug("Loaded \"%s\"", pluginname);
927
928 p->next = plugins, plugins = p;
929 return 0;
930 }
931
load_hfile_plugins()932 static void load_hfile_plugins()
933 {
934 static const struct hFILE_scheme_handler
935 data = { hopen_mem, hfile_always_local, "built-in", 80 },
936 file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 },
937 preload = { hopen_preload, is_preload_url_remote, "built-in", 80 };
938
939 schemes = kh_init(scheme_string);
940 if (schemes == NULL) abort();
941
942 hfile_add_scheme_handler("data", &data);
943 hfile_add_scheme_handler("file", &file);
944 hfile_add_scheme_handler("preload", &preload);
945 init_add_plugin(NULL, hfile_plugin_init_net, "knetfile");
946 init_add_plugin(NULL, hfile_plugin_init_mem, "mem");
947
948 #ifdef ENABLE_PLUGINS
949 struct hts_path_itr path;
950 const char *pluginname;
951 hts_path_itr_setup(&path, NULL, NULL, "hfile_", 6, NULL, 0);
952 while ((pluginname = hts_path_itr_next(&path)) != NULL) {
953 void *obj;
954 int (*init)(struct hFILE_plugin *) = (int (*)(struct hFILE_plugin *))
955 load_plugin(&obj, pluginname, "hfile_plugin_init");
956
957 if (init) {
958 if (init_add_plugin(obj, init, pluginname) != 0)
959 close_plugin(obj);
960 }
961 }
962 #else
963
964 #ifdef HAVE_LIBCURL
965 init_add_plugin(NULL, hfile_plugin_init_libcurl, "libcurl");
966 #endif
967 #ifdef ENABLE_GCS
968 init_add_plugin(NULL, hfile_plugin_init_gcs, "gcs");
969 #endif
970 #ifdef ENABLE_S3
971 init_add_plugin(NULL, hfile_plugin_init_s3, "s3");
972 #endif
973
974 #endif
975
976 // In the unlikely event atexit() fails, it's better to succeed here and
977 // carry on; then eventually when the program exits, we'll merely close
978 // down the plugins uncleanly, as if we had aborted.
979 (void) atexit(hfile_exit);
980 }
981
982 /* A filename like "foo:bar" in which we don't recognise the scheme is
983 either an ordinary file or an indication of a missing or broken plugin.
984 Try to open it as an ordinary file; but if there's no such file, set
985 errno distinctively to make the plugin issue apparent. */
hopen_unknown_scheme(const char * fname,const char * mode)986 static hFILE *hopen_unknown_scheme(const char *fname, const char *mode)
987 {
988 hFILE *fp = hopen_fd(fname, mode);
989 if (fp == NULL && errno == ENOENT) errno = EPROTONOSUPPORT;
990 return fp;
991 }
992
993 /* Returns the appropriate handler, or NULL if the string isn't an URL. */
find_scheme_handler(const char * s)994 static const struct hFILE_scheme_handler *find_scheme_handler(const char *s)
995 {
996 static const struct hFILE_scheme_handler unknown_scheme =
997 { hopen_unknown_scheme, hfile_always_local, "built-in", 0 };
998
999 char scheme[12];
1000 int i;
1001
1002 for (i = 0; i < sizeof scheme; i++)
1003 if (isalnum_c(s[i]) || s[i] == '+' || s[i] == '-' || s[i] == '.')
1004 scheme[i] = tolower_c(s[i]);
1005 else if (s[i] == ':') break;
1006 else return NULL;
1007
1008 // 1 byte schemes are likely windows C:/foo pathnames
1009 if (i <= 1 || i >= sizeof scheme) return NULL;
1010 scheme[i] = '\0';
1011
1012 pthread_mutex_lock(&plugins_lock);
1013 if (! schemes) load_hfile_plugins();
1014 pthread_mutex_unlock(&plugins_lock);
1015
1016 khint_t k = kh_get(scheme_string, schemes, scheme);
1017 return (k != kh_end(schemes))? kh_value(schemes, k) : &unknown_scheme;
1018 }
1019
hopen(const char * fname,const char * mode,...)1020 hFILE *hopen(const char *fname, const char *mode, ...)
1021 {
1022 const struct hFILE_scheme_handler *handler = find_scheme_handler(fname);
1023 if (handler) {
1024 if (strchr(mode, ':') == NULL
1025 || handler->priority < 2000
1026 || handler->vopen == NULL) {
1027 return handler->open(fname, mode);
1028 }
1029 else {
1030 hFILE *fp;
1031 va_list arg;
1032 va_start(arg, mode);
1033 fp = handler->vopen(fname, mode, arg);
1034 va_end(arg);
1035 return fp;
1036 }
1037 }
1038 else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);
1039 else return hopen_fd(fname, mode);
1040 }
1041
hfile_always_local(const char * fname)1042 int hfile_always_local (const char *fname) { return 0; }
hfile_always_remote(const char * fname)1043 int hfile_always_remote(const char *fname) { return 1; }
1044
hisremote(const char * fname)1045 int hisremote(const char *fname)
1046 {
1047 const struct hFILE_scheme_handler *handler = find_scheme_handler(fname);
1048 return handler? handler->isremote(fname) : 0;
1049 }
1050