1 /*  hfile.c -- buffered low-level input/output streams.
2 
3     Copyright (C) 2013-2021 Genome Research Ltd.
4 
5     Author: John Marshall <jm18@sanger.ac.uk>
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE.  */
24 
25 #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26 #include <config.h>
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stddef.h>
31 #include <string.h>
32 #include <errno.h>
33 #include <limits.h>
34 
35 #include <pthread.h>
36 
37 #ifdef ENABLE_PLUGINS
38 #if defined(_WIN32) || defined(__CYGWIN__) || defined(__MSYS__)
39 #define USING_WINDOWS_PLUGIN_DLLS
40 #include <dlfcn.h>
41 #endif
42 #endif
43 
44 #include "htslib/hfile.h"
45 #include "hfile_internal.h"
46 #include "htslib/kstring.h"
47 
48 #ifndef ENOTSUP
49 #define ENOTSUP EINVAL
50 #endif
51 #ifndef EOVERFLOW
52 #define EOVERFLOW ERANGE
53 #endif
54 #ifndef EPROTONOSUPPORT
55 #define EPROTONOSUPPORT ENOSYS
56 #endif
57 
58 #ifndef SSIZE_MAX /* SSIZE_MAX is POSIX 1 */
59 #define SSIZE_MAX LONG_MAX
60 #endif
61 
62 /* hFILE fields are used as follows:
63 
64    char *buffer;     // Pointer to the start of the I/O buffer
65    char *begin;      // First not-yet-read character / unused position
66    char *end;        // First unfilled/unfillable position
67    char *limit;      // Pointer to the first position past the buffer
68 
69    const hFILE_backend *backend;  // Methods to refill/flush I/O buffer
70 
71    off_t offset;     // Offset within the stream of buffer position 0
72    unsigned at_eof:1;// For reading, whether EOF has been seen
73    unsigned mobile:1;// Buffer is a mobile window or fixed full contents
74    unsigned readonly:1;// Whether opened as "r" rather than "r+"/"w"/"a"
75    int has_errno;    // Error number from the last failure on this stream
76 
77 For reading, begin is the first unread character in the buffer and end is the
78 first unfilled position:
79 
80    -----------ABCDEFGHIJKLMNO---------------
81    ^buffer    ^begin         ^end           ^limit
82 
83 For writing, begin is the first unused position and end is unused so remains
84 equal to buffer:
85 
86    ABCDEFGHIJKLMNOPQRSTUVWXYZ---------------
87    ^buffer                   ^begin         ^limit
88    ^end
89 
90 Thus if begin > end then there is a non-empty write buffer, if begin < end
91 then there is a non-empty read buffer, and if begin == end then both buffers
92 are empty.  In all cases, the stream's file position indicator corresponds
93 to the position pointed to by begin.
94 
95 The above is the normal scenario of a mobile window.  For in-memory
96 streams (eg via hfile_init_fixed) the buffer can be used as the full
97 contents without any separate backend behind it.  These always have at_eof
98 set, offset set to 0, need no read() method, and should just return EINVAL
99 for seek():
100 
101    abcdefghijkLMNOPQRSTUVWXYZ------
102    ^buffer    ^begin         ^end  ^limit
103 */
104 HTSLIB_EXPORT
hfile_init(size_t struct_size,const char * mode,size_t capacity)105 hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)
106 {
107     hFILE *fp = (hFILE *) malloc(struct_size);
108     if (fp == NULL) goto error;
109 
110     if (capacity == 0) capacity = 32768;
111     // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory
112     if (strchr(mode, 'r') && capacity > 32768) capacity = 32768;
113 
114     fp->buffer = (char *) malloc(capacity);
115     if (fp->buffer == NULL) goto error;
116 
117     fp->begin = fp->end = fp->buffer;
118     fp->limit = &fp->buffer[capacity];
119 
120     fp->offset = 0;
121     fp->at_eof = 0;
122     fp->mobile = 1;
123     fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+'));
124     fp->has_errno = 0;
125     return fp;
126 
127 error:
128     hfile_destroy(fp);
129     return NULL;
130 }
131 
hfile_init_fixed(size_t struct_size,const char * mode,char * buffer,size_t buf_filled,size_t buf_size)132 hFILE *hfile_init_fixed(size_t struct_size, const char *mode,
133                         char *buffer, size_t buf_filled, size_t buf_size)
134 {
135     hFILE *fp = (hFILE *) malloc(struct_size);
136     if (fp == NULL) return NULL;
137 
138     fp->buffer = fp->begin = buffer;
139     fp->end = &fp->buffer[buf_filled];
140     fp->limit = &fp->buffer[buf_size];
141 
142     fp->offset = 0;
143     fp->at_eof = 1;
144     fp->mobile = 0;
145     fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+'));
146     fp->has_errno = 0;
147     return fp;
148 }
149 
150 static const struct hFILE_backend mem_backend;
151 
152 HTSLIB_EXPORT
hfile_destroy(hFILE * fp)153 void hfile_destroy(hFILE *fp)
154 {
155     int save = errno;
156     if (fp) free(fp->buffer);
157     free(fp);
158     errno = save;
159 }
160 
writebuffer_is_nonempty(hFILE * fp)161 static inline int writebuffer_is_nonempty(hFILE *fp)
162 {
163     return fp->begin > fp->end;
164 }
165 
166 /* Refills the read buffer from the backend (once, so may only partially
167    fill the buffer), returning the number of additional characters read
168    (which might be 0), or negative when an error occurred.  */
refill_buffer(hFILE * fp)169 static ssize_t refill_buffer(hFILE *fp)
170 {
171     ssize_t n;
172 
173     // Move any unread characters to the start of the buffer
174     if (fp->mobile && fp->begin > fp->buffer) {
175         fp->offset += fp->begin - fp->buffer;
176         memmove(fp->buffer, fp->begin, fp->end - fp->begin);
177         fp->end = &fp->buffer[fp->end - fp->begin];
178         fp->begin = fp->buffer;
179     }
180 
181     // Read into the available buffer space at fp->[end,limit)
182     if (fp->at_eof || fp->end == fp->limit) n = 0;
183     else {
184         n = fp->backend->read(fp, fp->end, fp->limit - fp->end);
185         if (n < 0) { fp->has_errno = errno; return n; }
186         else if (n == 0) fp->at_eof = 1;
187     }
188 
189     fp->end += n;
190     return n;
191 }
192 
193 /*
194  * Changes the buffer size for an hFILE.  Ideally this is done
195  * immediately after opening.  If performed later, this function may
196  * fail if we are reducing the buffer size and the current offset into
197  * the buffer is beyond the new capacity.
198  *
199  * Returns 0 on success;
200  *        -1 on failure.
201  */
202 HTSLIB_EXPORT
hfile_set_blksize(hFILE * fp,size_t bufsiz)203 int hfile_set_blksize(hFILE *fp, size_t bufsiz) {
204     char *buffer;
205     ptrdiff_t curr_used;
206     if (!fp) return -1;
207     curr_used = (fp->begin > fp->end ? fp->begin : fp->end) - fp->buffer;
208     if (bufsiz == 0) bufsiz = 32768;
209 
210     // Ensure buffer resize will not erase live data
211     if (bufsiz < curr_used)
212         return -1;
213 
214     if (!(buffer = (char *) realloc(fp->buffer, bufsiz))) return -1;
215 
216     fp->begin  = buffer + (fp->begin - fp->buffer);
217     fp->end    = buffer + (fp->end   - fp->buffer);
218     fp->buffer = buffer;
219     fp->limit  = &fp->buffer[bufsiz];
220 
221     return 0;
222 }
223 
224 /* Called only from hgetc(), when our buffer is empty.  */
225 HTSLIB_EXPORT
hgetc2(hFILE * fp)226 int hgetc2(hFILE *fp)
227 {
228     return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF;
229 }
230 
hgetdelim(char * buffer,size_t size,int delim,hFILE * fp)231 ssize_t hgetdelim(char *buffer, size_t size, int delim, hFILE *fp)
232 {
233     char *found;
234     size_t n, copied = 0;
235     ssize_t got;
236 
237     if (size < 1 || size > SSIZE_MAX) {
238         fp->has_errno = errno = EINVAL;
239         return -1;
240     }
241     if (writebuffer_is_nonempty(fp)) {
242         fp->has_errno = errno = EBADF;
243         return -1;
244     }
245 
246     --size; /* to allow space for the NUL terminator */
247 
248     do {
249         n = fp->end - fp->begin;
250         if (n > size - copied) n = size - copied;
251 
252         /* Look in the hFILE buffer for the delimiter */
253         found = memchr(fp->begin, delim, n);
254         if (found != NULL) {
255             n = found - fp->begin + 1;
256             memcpy(buffer + copied, fp->begin, n);
257             buffer[n + copied] = '\0';
258             fp->begin += n;
259             return n + copied;
260         }
261 
262         /* No delimiter yet, copy as much as we can and refill if necessary */
263         memcpy(buffer + copied, fp->begin, n);
264         fp->begin += n;
265         copied += n;
266 
267         if (copied == size) { /* Output buffer full */
268             buffer[copied] = '\0';
269             return copied;
270         }
271 
272         got = refill_buffer(fp);
273     } while (got > 0);
274 
275     if (got < 0) return -1; /* Error on refill. */
276 
277     buffer[copied] = '\0';  /* EOF, return anything that was copied. */
278     return copied;
279 }
280 
hgets(char * buffer,int size,hFILE * fp)281 char *hgets(char *buffer, int size, hFILE *fp)
282 {
283     if (size < 1) {
284         fp->has_errno = errno = EINVAL;
285         return NULL;
286     }
287     return hgetln(buffer, size, fp) > 0 ? buffer : NULL;
288 }
289 
hpeek(hFILE * fp,void * buffer,size_t nbytes)290 ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes)
291 {
292     size_t n = fp->end - fp->begin;
293     while (n < nbytes) {
294         ssize_t ret = refill_buffer(fp);
295         if (ret < 0) return ret;
296         else if (ret == 0) break;
297         else n += ret;
298     }
299 
300     if (n > nbytes) n = nbytes;
301     memcpy(buffer, fp->begin, n);
302     return n;
303 }
304 
305 /* Called only from hread(); when called, our buffer is empty and nread bytes
306    have already been placed in the destination buffer.  */
307 HTSLIB_EXPORT
hread2(hFILE * fp,void * destv,size_t nbytes,size_t nread)308 ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread)
309 {
310     const size_t capacity = fp->limit - fp->buffer;
311     int buffer_invalidated = 0;
312     char *dest = (char *) destv;
313     dest += nread, nbytes -= nread;
314 
315     // Read large requests directly into the destination buffer
316     while (nbytes * 2 >= capacity && !fp->at_eof) {
317         ssize_t n = fp->backend->read(fp, dest, nbytes);
318         if (n < 0) { fp->has_errno = errno; return n; }
319         else if (n == 0) fp->at_eof = 1;
320         else buffer_invalidated = 1;
321         fp->offset += n;
322         dest += n, nbytes -= n;
323         nread += n;
324     }
325 
326     if (buffer_invalidated) {
327         // Our unread buffer is empty, so begin == end, but our already-read
328         // buffer [buffer,begin) is likely non-empty and is no longer valid as
329         // its contents are no longer adjacent to the file position indicator.
330         // Discard it so that hseek() can't try to take advantage of it.
331         fp->offset += fp->begin - fp->buffer;
332         fp->begin = fp->end = fp->buffer;
333     }
334 
335     while (nbytes > 0 && !fp->at_eof) {
336         size_t n;
337         ssize_t ret = refill_buffer(fp);
338         if (ret < 0) return ret;
339 
340         n = fp->end - fp->begin;
341         if (n > nbytes) n = nbytes;
342         memcpy(dest, fp->begin, n);
343         fp->begin += n;
344         dest += n, nbytes -= n;
345         nread += n;
346     }
347 
348     return nread;
349 }
350 
351 /* Flushes the write buffer, fp->[buffer,begin), out through the backend
352    returning 0 on success or negative if an error occurred.  */
flush_buffer(hFILE * fp)353 static ssize_t flush_buffer(hFILE *fp)
354 {
355     const char *buffer = fp->buffer;
356     while (buffer < fp->begin) {
357         ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer);
358         if (n < 0) { fp->has_errno = errno; return n; }
359         buffer += n;
360         fp->offset += n;
361     }
362 
363     fp->begin = fp->buffer;  // Leave the buffer empty
364     return 0;
365 }
366 
hflush(hFILE * fp)367 int hflush(hFILE *fp)
368 {
369     if (flush_buffer(fp) < 0) return EOF;
370     if (fp->backend->flush) {
371         if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; }
372     }
373     return 0;
374 }
375 
376 /* Called only from hputc(), when our buffer is already full.  */
377 HTSLIB_EXPORT
hputc2(int c,hFILE * fp)378 int hputc2(int c, hFILE *fp)
379 {
380     if (flush_buffer(fp) < 0) return EOF;
381     *(fp->begin++) = c;
382     return c;
383 }
384 
385 /* Called only from hwrite() and hputs2(); when called, our buffer is either
386    full and ncopied bytes from the source have already been copied to our
387    buffer; or completely empty, ncopied is zero and totalbytes is greater than
388    the buffer size.  */
389 HTSLIB_EXPORT
hwrite2(hFILE * fp,const void * srcv,size_t totalbytes,size_t ncopied)390 ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied)
391 {
392     const char *src = (const char *) srcv;
393     ssize_t ret;
394     const size_t capacity = fp->limit - fp->buffer;
395     size_t remaining = totalbytes - ncopied;
396     src += ncopied;
397 
398     ret = flush_buffer(fp);
399     if (ret < 0) return ret;
400 
401     // Write large blocks out directly from the source buffer
402     while (remaining * 2 >= capacity) {
403         ssize_t n = fp->backend->write(fp, src, remaining);
404         if (n < 0) { fp->has_errno = errno; return n; }
405         fp->offset += n;
406         src += n, remaining -= n;
407     }
408 
409     // Just buffer any remaining characters
410     memcpy(fp->begin, src, remaining);
411     fp->begin += remaining;
412 
413     return totalbytes;
414 }
415 
416 /* Called only from hputs(), when our buffer is already full.  */
417 HTSLIB_EXPORT
hputs2(const char * text,size_t totalbytes,size_t ncopied,hFILE * fp)418 int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp)
419 {
420     return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF;
421 }
422 
hseek(hFILE * fp,off_t offset,int whence)423 off_t hseek(hFILE *fp, off_t offset, int whence)
424 {
425     off_t curpos, pos;
426 
427     if (writebuffer_is_nonempty(fp) && fp->mobile) {
428         int ret = flush_buffer(fp);
429         if (ret < 0) return ret;
430     }
431 
432     curpos = htell(fp);
433 
434     // Relative offsets are given relative to the hFILE's stream position,
435     // which may differ from the backend's physical position due to buffering
436     // read-ahead.  Correct for this by converting to an absolute position.
437     if (whence == SEEK_CUR) {
438         if (curpos + offset < 0) {
439             // Either a negative offset resulted in a position before the
440             // start of the file, or we overflowed when given a positive offset
441             fp->has_errno = errno = (offset < 0)? EINVAL : EOVERFLOW;
442             return -1;
443         }
444 
445         whence = SEEK_SET;
446         offset = curpos + offset;
447     }
448     // For fixed immobile buffers, convert everything else to SEEK_SET too
449     // so that seeking can be avoided for all (within range) requests.
450     else if (! fp->mobile && whence == SEEK_END) {
451         size_t length = fp->end - fp->buffer;
452         if (offset > 0 || -offset > length) {
453             fp->has_errno = errno = EINVAL;
454             return -1;
455         }
456 
457         whence = SEEK_SET;
458         offset = length + offset;
459     }
460 
461     // Avoid seeking if the desired position is within our read buffer.
462     // (But not when the next operation may be a write on a mobile buffer.)
463     if (whence == SEEK_SET && (! fp->mobile || fp->readonly) &&
464         offset >= fp->offset && offset - fp->offset <= fp->end - fp->buffer) {
465         fp->begin = &fp->buffer[offset - fp->offset];
466         return offset;
467     }
468 
469     pos = fp->backend->seek(fp, offset, whence);
470     if (pos < 0) { fp->has_errno = errno; return pos; }
471 
472     // Seeking succeeded, so discard any non-empty read buffer
473     fp->begin = fp->end = fp->buffer;
474     fp->at_eof = 0;
475 
476     fp->offset = pos;
477     return pos;
478 }
479 
hclose(hFILE * fp)480 int hclose(hFILE *fp)
481 {
482     int err = fp->has_errno;
483 
484     if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno;
485     if (fp->backend->close(fp) < 0) err = errno;
486     hfile_destroy(fp);
487 
488     if (err) {
489         errno = err;
490         return EOF;
491     }
492     else return 0;
493 }
494 
hclose_abruptly(hFILE * fp)495 void hclose_abruptly(hFILE *fp)
496 {
497     int save = errno;
498     if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ }
499     hfile_destroy(fp);
500     errno = save;
501 }
502 
503 
504 /***************************
505  * File descriptor backend *
506  ***************************/
507 
508 #ifndef _WIN32
509 #include <sys/socket.h>
510 #include <sys/stat.h>
511 #define HAVE_STRUCT_STAT_ST_BLKSIZE
512 #else
513 #include <winsock2.h>
514 #define HAVE_CLOSESOCKET
515 #define HAVE_SETMODE
516 #endif
517 #include <fcntl.h>
518 #include <unistd.h>
519 
520 /* For Unix, it doesn't matter whether a file descriptor is a socket.
521    However Windows insists on send()/recv() and its own closesocket()
522    being used when fd happens to be a socket.  */
523 
524 typedef struct {
525     hFILE base;
526     int fd;
527     unsigned is_socket:1;
528 } hFILE_fd;
529 
fd_read(hFILE * fpv,void * buffer,size_t nbytes)530 static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes)
531 {
532     hFILE_fd *fp = (hFILE_fd *) fpv;
533     ssize_t n;
534     do {
535         n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0)
536                          : read(fp->fd, buffer, nbytes);
537     } while (n < 0 && errno == EINTR);
538     return n;
539 }
540 
fd_write(hFILE * fpv,const void * buffer,size_t nbytes)541 static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes)
542 {
543     hFILE_fd *fp = (hFILE_fd *) fpv;
544     ssize_t n;
545     do {
546         n = fp->is_socket?  send(fp->fd, buffer, nbytes, 0)
547                          : write(fp->fd, buffer, nbytes);
548     } while (n < 0 && errno == EINTR);
549 #ifdef _WIN32
550         // On windows we have no SIGPIPE.  Instead write returns
551         // EINVAL.  We check for this and our fd being a pipe.
552         // If so, we raise SIGTERM instead of SIGPIPE.  It's not
553         // ideal, but I think the only alternative is extra checking
554         // in every single piece of code.
555         if (n < 0 && errno == EINVAL &&
556             GetLastError() == ERROR_NO_DATA &&
557             GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) {
558             raise(SIGTERM);
559         }
560 #endif
561     return n;
562 }
563 
fd_seek(hFILE * fpv,off_t offset,int whence)564 static off_t fd_seek(hFILE *fpv, off_t offset, int whence)
565 {
566     hFILE_fd *fp = (hFILE_fd *) fpv;
567     return lseek(fp->fd, offset, whence);
568 }
569 
fd_flush(hFILE * fpv)570 static int fd_flush(hFILE *fpv)
571 {
572     int ret = 0;
573     do {
574 #ifdef HAVE_FDATASYNC
575         hFILE_fd *fp = (hFILE_fd *) fpv;
576         ret = fdatasync(fp->fd);
577 #elif defined(HAVE_FSYNC)
578         hFILE_fd *fp = (hFILE_fd *) fpv;
579         ret = fsync(fp->fd);
580 #endif
581         // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe,
582         // and operation-not-supported errors (Mac OS X)
583         if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0;
584     } while (ret < 0 && errno == EINTR);
585     return ret;
586 }
587 
fd_close(hFILE * fpv)588 static int fd_close(hFILE *fpv)
589 {
590     hFILE_fd *fp = (hFILE_fd *) fpv;
591     int ret;
592     do {
593 #ifdef HAVE_CLOSESOCKET
594         ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd);
595 #else
596         ret = close(fp->fd);
597 #endif
598     } while (ret < 0 && errno == EINTR);
599     return ret;
600 }
601 
602 static const struct hFILE_backend fd_backend =
603 {
604     fd_read, fd_write, fd_seek, fd_flush, fd_close
605 };
606 
blksize(int fd)607 static size_t blksize(int fd)
608 {
609 #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
610     struct stat sbuf;
611     if (fstat(fd, &sbuf) != 0) return 0;
612     return sbuf.st_blksize;
613 #else
614     return 0;
615 #endif
616 }
617 
hopen_fd(const char * filename,const char * mode)618 static hFILE *hopen_fd(const char *filename, const char *mode)
619 {
620     hFILE_fd *fp = NULL;
621     int fd = open(filename, hfile_oflags(mode), 0666);
622     if (fd < 0) goto error;
623 
624     fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
625     if (fp == NULL) goto error;
626 
627     fp->fd = fd;
628     fp->is_socket = 0;
629     fp->base.backend = &fd_backend;
630     return &fp->base;
631 
632 error:
633     if (fd >= 0) { int save = errno; (void) close(fd); errno = save; }
634     hfile_destroy((hFILE *) fp);
635     return NULL;
636 }
637 
638 // Loads the contents of filename to produced a read-only, in memory,
639 // immobile hfile.  fp is the already opened file.  We always close this
640 // input fp, irrespective of whether we error or whether we return a new
641 // immobile hfile.
hpreload(hFILE * fp)642 static hFILE *hpreload(hFILE *fp) {
643     hFILE *mem_fp;
644     char *buf = NULL;
645     off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len;
646 
647     for (;;) {
648         if (buf_a - buf_sz < 5000) {
649             buf_a += buf_inc;
650             char *t = realloc(buf, buf_a);
651             if (!t) goto err;
652             buf = t;
653             if (buf_inc < 1000000) buf_inc *= 1.3;
654         }
655         len = hread(fp, buf+buf_sz, buf_a-buf_sz);
656         if (len > 0)
657             buf_sz += len;
658         else
659             break;
660     }
661 
662     if (len < 0) goto err;
663     mem_fp = hfile_init_fixed(sizeof(hFILE), "r", buf, buf_sz, buf_a);
664     if (!mem_fp) goto err;
665     mem_fp->backend = &mem_backend;
666 
667     if (hclose(fp) < 0) {
668         hclose_abruptly(mem_fp);
669         goto err;
670     }
671     return mem_fp;
672 
673  err:
674     free(buf);
675     hclose_abruptly(fp);
676     return NULL;
677 }
678 
is_preload_url_remote(const char * url)679 static int is_preload_url_remote(const char *url){
680     return hisremote(url + 8); // len("preload:") = 8
681 }
682 
hopen_preload(const char * url,const char * mode)683 static hFILE *hopen_preload(const char *url, const char *mode){
684     hFILE* fp = hopen(url + 8, mode);
685     return hpreload(fp);
686 }
687 
hdopen(int fd,const char * mode)688 hFILE *hdopen(int fd, const char *mode)
689 {
690     hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
691     if (fp == NULL) return NULL;
692 
693     fp->fd = fd;
694     fp->is_socket = (strchr(mode, 's') != NULL);
695     fp->base.backend = &fd_backend;
696     return &fp->base;
697 }
698 
hopen_fd_fileuri(const char * url,const char * mode)699 static hFILE *hopen_fd_fileuri(const char *url, const char *mode)
700 {
701     if (strncmp(url, "file://localhost/", 17) == 0) url += 16;
702     else if (strncmp(url, "file:///", 8) == 0) url += 7;
703     else { errno = EPROTONOSUPPORT; return NULL; }
704 
705 #if defined(_WIN32) || defined(__MSYS__)
706     // For cases like C:/foo
707     if (url[0] == '/' && url[1] && url[2] == ':' && url[3] == '/') url++;
708 #endif
709 
710     return hopen_fd(url, mode);
711 }
712 
hopen_fd_stdinout(const char * mode)713 static hFILE *hopen_fd_stdinout(const char *mode)
714 {
715     int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO;
716 #if defined HAVE_SETMODE && defined O_BINARY
717     if (setmode(fd, O_BINARY) < 0) return NULL;
718 #endif
719     return hdopen(fd, mode);
720 }
721 
722 HTSLIB_EXPORT
hfile_oflags(const char * mode)723 int hfile_oflags(const char *mode)
724 {
725     int rdwr = 0, flags = 0;
726     const char *s;
727     for (s = mode; *s; s++)
728         switch (*s) {
729         case 'r': rdwr = O_RDONLY;  break;
730         case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC;  break;
731         case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND;  break;
732         case '+': rdwr = O_RDWR;  break;
733 #ifdef O_CLOEXEC
734         case 'e': flags |= O_CLOEXEC;  break;
735 #endif
736 #ifdef O_EXCL
737         case 'x': flags |= O_EXCL;  break;
738 #endif
739         default:  break;
740         }
741 
742 #ifdef O_BINARY
743     flags |= O_BINARY;
744 #endif
745 
746     return rdwr | flags;
747 }
748 
749 
750 /*********************
751  * In-memory backend *
752  *********************/
753 
754 #include "hts_internal.h"
755 
756 typedef struct {
757     hFILE base;
758 } hFILE_mem;
759 
mem_seek(hFILE * fpv,off_t offset,int whence)760 static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
761 {
762     errno = EINVAL;
763     return -1;
764 }
765 
mem_close(hFILE * fpv)766 static int mem_close(hFILE *fpv)
767 {
768     return 0;
769 }
770 
771 static const struct hFILE_backend mem_backend =
772 {
773     NULL, NULL, mem_seek, NULL, mem_close
774 };
775 
cmp_prefix(const char * key,const char * s)776 static int cmp_prefix(const char *key, const char *s)
777 {
778     while (*key)
779         if (tolower_c(*s) != *key) return +1;
780         else s++, key++;
781 
782     return 0;
783 }
784 
create_hfile_mem(char * buffer,const char * mode,size_t buf_filled,size_t buf_size)785 static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size)
786 {
787     hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size);
788     if (fp == NULL)
789         return NULL;
790 
791     fp->base.backend = &mem_backend;
792     return &fp->base;
793 }
794 
hopen_mem(const char * url,const char * mode)795 static hFILE *hopen_mem(const char *url, const char *mode)
796 {
797     size_t length, size;
798     char *buffer;
799     const char *data, *comma = strchr(url, ',');
800     if (comma == NULL) { errno = EINVAL; return NULL; }
801     data = comma+1;
802 
803     // TODO Implement write modes
804     if (strchr(mode, 'r') == NULL) { errno = EROFS; return NULL; }
805 
806     if (comma - url >= 7 && cmp_prefix(";base64", &comma[-7]) == 0) {
807         size = hts_base64_decoded_length(strlen(data));
808         buffer = malloc(size);
809         if (buffer == NULL) return NULL;
810         hts_decode_base64(buffer, &length, data);
811     }
812     else {
813         size = strlen(data) + 1;
814         buffer = malloc(size);
815         if (buffer == NULL) return NULL;
816         hts_decode_percent(buffer, &length, data);
817     }
818     hFILE* hf;
819 
820     if(!(hf = create_hfile_mem(buffer, mode, length, size))){
821         free(buffer);
822         return NULL;
823     }
824 
825     return hf;
826 }
827 
hopenv_mem(const char * filename,const char * mode,va_list args)828 static hFILE *hopenv_mem(const char *filename, const char *mode, va_list args)
829 {
830     char* buffer = va_arg(args, char*);
831     size_t sz = va_arg(args, size_t);
832     va_end(args);
833 
834     hFILE* hf;
835 
836     if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){
837         free(buffer);
838         return NULL;
839     }
840 
841     return hf;
842 }
843 
hfile_mem_get_buffer(hFILE * file,size_t * length)844 char *hfile_mem_get_buffer(hFILE *file, size_t *length) {
845     if (file->backend != &mem_backend) {
846         errno = EINVAL;
847         return NULL;
848     }
849 
850     if (length)
851         *length = file->buffer - file->limit;
852 
853     return file->buffer;
854 }
855 
hfile_mem_steal_buffer(hFILE * file,size_t * length)856 char *hfile_mem_steal_buffer(hFILE *file, size_t *length) {
857     char *buf = hfile_mem_get_buffer(file, length);
858     if (buf)
859         file->buffer = NULL;
860     return buf;
861 }
862 
hfile_plugin_init_mem(struct hFILE_plugin * self)863 int hfile_plugin_init_mem(struct hFILE_plugin *self)
864 {
865     // mem files are declared remote so they work with a tabix index
866     static const struct hFILE_scheme_handler handler =
867             {NULL, hfile_always_remote, "mem", 2000 + 50, hopenv_mem};
868     self->name = "mem";
869     hfile_add_scheme_handler("mem", &handler);
870     return 0;
871 }
872 
873 /**********************************************************************
874  * Dummy crypt4gh plug-in.  Does nothing apart from advise how to get *
875  * the real one.  It will be overridden by the actual plug-in.        *
876  **********************************************************************/
877 
crypt4gh_needed(const char * url,const char * mode)878 static hFILE *crypt4gh_needed(const char *url, const char *mode)
879 {
880     const char *u = strncmp(url, "crypt4gh:", 9) == 0 ? url + 9 : url;
881 #if defined(ENABLE_PLUGINS)
882     const char *enable_plugins = "";
883 #else
884     const char *enable_plugins = "You also need to rebuild HTSlib with plug-ins enabled.\n";
885 #endif
886 
887     hts_log_error("Accessing \"%s\" needs the crypt4gh plug-in.\n"
888                   "It can be found at "
889                   "https://github.com/samtools/htslib-crypt4gh\n"
890                   "%s"
891                   "If you have the plug-in, please ensure it can be "
892                   "found on your HTS_PATH.",
893                   u, enable_plugins);
894 
895     errno = EPROTONOSUPPORT;
896     return NULL;
897 }
898 
hfile_plugin_init_crypt4gh_needed(struct hFILE_plugin * self)899 int hfile_plugin_init_crypt4gh_needed(struct hFILE_plugin *self)
900 {
901     static const struct hFILE_scheme_handler handler =
902         { crypt4gh_needed, NULL, "crypt4gh-needed", 0, NULL };
903     self->name = "crypt4gh-needed";
904     hfile_add_scheme_handler("crypt4gh", &handler);
905     return 0;
906 }
907 
908 
909 /*****************************************
910  * Plugin and hopen() backend dispatcher *
911  *****************************************/
912 
913 #include "htslib/khash.h"
914 
915 KHASH_MAP_INIT_STR(scheme_string, const struct hFILE_scheme_handler *)
916 static khash_t(scheme_string) *schemes = NULL;
917 
918 struct hFILE_plugin_list {
919     struct hFILE_plugin plugin;
920     struct hFILE_plugin_list *next;
921 };
922 
923 static struct hFILE_plugin_list *plugins = NULL;
924 static pthread_mutex_t plugins_lock = PTHREAD_MUTEX_INITIALIZER;
925 
hfile_shutdown(int do_close_plugin)926 void hfile_shutdown(int do_close_plugin)
927 {
928     pthread_mutex_lock(&plugins_lock);
929 
930     if (schemes) {
931         kh_destroy(scheme_string, schemes);
932         schemes = NULL;
933     }
934 
935     while (plugins != NULL) {
936         struct hFILE_plugin_list *p = plugins;
937         if (p->plugin.destroy) p->plugin.destroy();
938 #ifdef ENABLE_PLUGINS
939         if (p->plugin.obj && do_close_plugin) close_plugin(p->plugin.obj);
940 #endif
941         plugins = p->next;
942         free(p);
943     }
944 
945     pthread_mutex_unlock(&plugins_lock);
946 }
947 
hfile_exit()948 static void hfile_exit()
949 {
950     hfile_shutdown(0);
951     pthread_mutex_destroy(&plugins_lock);
952 }
953 
priority(const struct hFILE_scheme_handler * handler)954 static inline int priority(const struct hFILE_scheme_handler *handler)
955 {
956     return handler->priority % 1000;
957 }
958 
959 #ifdef USING_WINDOWS_PLUGIN_DLLS
960 /*
961  * Work-around for Windows plug-in dlls where the plug-in could be
962  * using a different HTSlib library to the executable (for example
963  * because the latter was build against a static libhts.a).  When this
964  * happens, the plug-in can call the wrong copy of hfile_add_scheme_handler().
965  * If this is detected, it calls this function which attempts to fix the
966  * problem by redirecting to the hfile_add_scheme_handler() in the main
967  * executable.
968  */
try_exe_add_scheme_handler(const char * scheme,const struct hFILE_scheme_handler * handler)969 static int try_exe_add_scheme_handler(const char *scheme,
970                                       const struct hFILE_scheme_handler *handler)
971 {
972     static void (*add_scheme_handler)(const char *scheme,
973                                       const struct hFILE_scheme_handler *handler);
974     if (!add_scheme_handler) {
975         // dlopen the main executable and resolve hfile_add_scheme_handler
976         void *exe_handle = dlopen(NULL, RTLD_LAZY);
977         if (!exe_handle) return -1;
978         *(void **) (&add_scheme_handler) = dlsym(exe_handle, "hfile_add_scheme_handler");
979         dlclose(exe_handle);
980     }
981     // Check that the symbol was obtained and isn't the one in this copy
982     // of the library (to avoid infinite recursion)
983     if (!add_scheme_handler || add_scheme_handler == hfile_add_scheme_handler)
984         return -1;
985     add_scheme_handler(scheme, handler);
986     return 0;
987 }
988 #else
try_exe_add_scheme_handler(const char * scheme,const struct hFILE_scheme_handler * handler)989 static int try_exe_add_scheme_handler(const char *scheme,
990                                       const struct hFILE_scheme_handler *handler)
991 {
992     return -1;
993 }
994 #endif
995 
996 HTSLIB_EXPORT
hfile_add_scheme_handler(const char * scheme,const struct hFILE_scheme_handler * handler)997 void hfile_add_scheme_handler(const char *scheme,
998                               const struct hFILE_scheme_handler *handler)
999 {
1000     int absent;
1001     if (!schemes) {
1002         if (try_exe_add_scheme_handler(scheme, handler) != 0) {
1003             hts_log_warning("Couldn't register scheme handler for %s", scheme);
1004         }
1005         return;
1006     }
1007     khint_t k = kh_put(scheme_string, schemes, scheme, &absent);
1008     if (absent < 0) {
1009         hts_log_warning("Couldn't register scheme handler for %s : %s",
1010                         scheme, strerror(errno));
1011         return;
1012     }
1013     if (absent || priority(handler) > priority(kh_value(schemes, k))) {
1014         kh_value(schemes, k) = handler;
1015     }
1016 }
1017 
init_add_plugin(void * obj,int (* init)(struct hFILE_plugin *),const char * pluginname)1018 static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *),
1019                            const char *pluginname)
1020 {
1021     struct hFILE_plugin_list *p = malloc (sizeof (struct hFILE_plugin_list));
1022     if (p == NULL) {
1023         hts_log_debug("Failed to allocate memory for plugin \"%s\"", pluginname);
1024         return -1;
1025     }
1026 
1027     p->plugin.api_version = 1;
1028     p->plugin.obj = obj;
1029     p->plugin.name = NULL;
1030     p->plugin.destroy = NULL;
1031 
1032     int ret = (*init)(&p->plugin);
1033 
1034     if (ret != 0) {
1035         hts_log_debug("Initialisation failed for plugin \"%s\": %d", pluginname, ret);
1036         free(p);
1037         return ret;
1038     }
1039 
1040     hts_log_debug("Loaded \"%s\"", pluginname);
1041 
1042     p->next = plugins, plugins = p;
1043     return 0;
1044 }
1045 
1046 /*
1047  * Returns 0 on success,
1048  *        <0 on failure
1049  */
load_hfile_plugins()1050 static int load_hfile_plugins()
1051 {
1052     static const struct hFILE_scheme_handler
1053         data = { hopen_mem, hfile_always_local, "built-in", 80 },
1054         file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 },
1055         preload = { hopen_preload, is_preload_url_remote, "built-in", 80 };
1056 
1057     schemes = kh_init(scheme_string);
1058     if (schemes == NULL)
1059         return -1;
1060 
1061     hfile_add_scheme_handler("data", &data);
1062     hfile_add_scheme_handler("file", &file);
1063     hfile_add_scheme_handler("preload", &preload);
1064     init_add_plugin(NULL, hfile_plugin_init_mem, "mem");
1065     init_add_plugin(NULL, hfile_plugin_init_crypt4gh_needed, "crypt4gh-needed");
1066 
1067 #ifdef ENABLE_PLUGINS
1068     struct hts_path_itr path;
1069     const char *pluginname;
1070     hts_path_itr_setup(&path, NULL, NULL, "hfile_", 6, NULL, 0);
1071     while ((pluginname = hts_path_itr_next(&path)) != NULL) {
1072         void *obj;
1073         int (*init)(struct hFILE_plugin *) = (int (*)(struct hFILE_plugin *))
1074             load_plugin(&obj, pluginname, "hfile_plugin_init");
1075 
1076         if (init) {
1077             if (init_add_plugin(obj, init, pluginname) != 0)
1078                 close_plugin(obj);
1079         }
1080     }
1081 #else
1082 
1083 #ifdef HAVE_LIBCURL
1084     init_add_plugin(NULL, hfile_plugin_init_libcurl, "libcurl");
1085 #endif
1086 #ifdef ENABLE_GCS
1087     init_add_plugin(NULL, hfile_plugin_init_gcs, "gcs");
1088 #endif
1089 #ifdef ENABLE_S3
1090     init_add_plugin(NULL, hfile_plugin_init_s3, "s3");
1091     init_add_plugin(NULL, hfile_plugin_init_s3_write, "s3w");
1092 #endif
1093 
1094 #endif
1095 
1096     // In the unlikely event atexit() fails, it's better to succeed here and
1097     // carry on; then eventually when the program exits, we'll merely close
1098     // down the plugins uncleanly, as if we had aborted.
1099     (void) atexit(hfile_exit);
1100 
1101     return 0;
1102 }
1103 
1104 /* A filename like "foo:bar" in which we don't recognise the scheme is
1105    either an ordinary file or an indication of a missing or broken plugin.
1106    Try to open it as an ordinary file; but if there's no such file, set
1107    errno distinctively to make the plugin issue apparent.  */
hopen_unknown_scheme(const char * fname,const char * mode)1108 static hFILE *hopen_unknown_scheme(const char *fname, const char *mode)
1109 {
1110     hFILE *fp = hopen_fd(fname, mode);
1111     if (fp == NULL && errno == ENOENT) errno = EPROTONOSUPPORT;
1112     return fp;
1113 }
1114 
1115 /* Returns the appropriate handler, or NULL if the string isn't an URL.  */
find_scheme_handler(const char * s)1116 static const struct hFILE_scheme_handler *find_scheme_handler(const char *s)
1117 {
1118     static const struct hFILE_scheme_handler unknown_scheme =
1119         { hopen_unknown_scheme, hfile_always_local, "built-in", 0 };
1120 
1121     char scheme[12];
1122     int i;
1123 
1124     for (i = 0; i < sizeof scheme; i++)
1125         if (isalnum_c(s[i]) || s[i] == '+' || s[i] == '-' || s[i] == '.')
1126             scheme[i] = tolower_c(s[i]);
1127         else if (s[i] == ':') break;
1128         else return NULL;
1129 
1130     // 1 byte schemes are likely windows C:/foo pathnames
1131     if (i <= 1 || i >= sizeof scheme) return NULL;
1132     scheme[i] = '\0';
1133 
1134     pthread_mutex_lock(&plugins_lock);
1135     if (!schemes && load_hfile_plugins() < 0) {
1136         pthread_mutex_unlock(&plugins_lock);
1137         return NULL;
1138     }
1139     pthread_mutex_unlock(&plugins_lock);
1140 
1141     khint_t k = kh_get(scheme_string, schemes, scheme);
1142     return (k != kh_end(schemes))? kh_value(schemes, k) : &unknown_scheme;
1143 }
1144 
1145 
1146 /***************************
1147  * Library introspection functions
1148  ***************************/
1149 
1150 /*
1151  * Fills out sc_list[] with the list of known URL schemes.
1152  * This can be restricted to just ones from a specific plugin,
1153  * or all (plugin == NULL).
1154  *
1155  * Returns number of schemes found on success;
1156  *        -1 on failure.
1157  */
1158 HTSLIB_EXPORT
hfile_list_schemes(const char * plugin,const char * sc_list[],int * nschemes)1159 int hfile_list_schemes(const char *plugin, const char *sc_list[], int *nschemes)
1160 {
1161     pthread_mutex_lock(&plugins_lock);
1162     if (!schemes && load_hfile_plugins() < 0) {
1163         pthread_mutex_unlock(&plugins_lock);
1164         return -1;
1165     }
1166     pthread_mutex_unlock(&plugins_lock);
1167 
1168     khiter_t k;
1169     int ns = 0;
1170 
1171     for (k = kh_begin(schemes); k != kh_end(schemes); k++) {
1172         if (!kh_exist(schemes, k))
1173             continue;
1174 
1175         const struct hFILE_scheme_handler *s = kh_value(schemes, k);
1176         if (plugin && strcmp(s->provider, plugin) != 0)
1177             continue;
1178 
1179         if (ns < *nschemes)
1180             sc_list[ns] = kh_key(schemes, k);
1181         ns++;
1182     }
1183 
1184     if (*nschemes > ns)
1185         *nschemes = ns;
1186 
1187     return ns;
1188 }
1189 
1190 
1191 /*
1192  * Fills out plist[] with the list of known hFILE plugins.
1193  *
1194  * Returns number of schemes found on success;
1195  *        -1 on failure
1196  */
1197 HTSLIB_EXPORT
hfile_list_plugins(const char * plist[],int * nplugins)1198 int hfile_list_plugins(const char *plist[], int *nplugins)
1199 {
1200     pthread_mutex_lock(&plugins_lock);
1201     if (!schemes && load_hfile_plugins() < 0) {
1202         pthread_mutex_unlock(&plugins_lock);
1203         return -1;
1204     }
1205     pthread_mutex_unlock(&plugins_lock);
1206 
1207     int np = 0;
1208     if (*nplugins)
1209         plist[np++] = "built-in";
1210 
1211     struct hFILE_plugin_list *p = plugins;
1212     while (p) {
1213         if (np < *nplugins)
1214             plist[np] = p->plugin.name;
1215 
1216         p = p->next;
1217         np++;
1218     }
1219 
1220     if (*nplugins > np)
1221         *nplugins = np;
1222 
1223     return np;
1224 }
1225 
1226 
1227 /*
1228  * Tests for the presence of a specific hFILE plugin.
1229  *
1230  * Returns 1 if true
1231  *         0 otherwise
1232  */
1233 HTSLIB_EXPORT
hfile_has_plugin(const char * name)1234 int hfile_has_plugin(const char *name)
1235 {
1236     pthread_mutex_lock(&plugins_lock);
1237     if (!schemes && load_hfile_plugins() < 0) {
1238         pthread_mutex_unlock(&plugins_lock);
1239         return -1;
1240     }
1241     pthread_mutex_unlock(&plugins_lock);
1242 
1243     struct hFILE_plugin_list *p = plugins;
1244     while (p) {
1245         if (strcmp(p->plugin.name, name) == 0)
1246             return 1;
1247         p = p->next;
1248     }
1249 
1250     return 0;
1251 }
1252 
1253 /***************************
1254  * hFILE interface proper
1255  ***************************/
1256 
hopen(const char * fname,const char * mode,...)1257 hFILE *hopen(const char *fname, const char *mode, ...)
1258 {
1259     const struct hFILE_scheme_handler *handler = find_scheme_handler(fname);
1260     if (handler) {
1261         if (strchr(mode, ':') == NULL
1262             || handler->priority < 2000
1263             || handler->vopen == NULL) {
1264             return handler->open(fname, mode);
1265         }
1266         else {
1267             hFILE *fp;
1268             va_list arg;
1269             va_start(arg, mode);
1270             fp = handler->vopen(fname, mode, arg);
1271             va_end(arg);
1272             return fp;
1273         }
1274     }
1275     else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);
1276     else return hopen_fd(fname, mode);
1277 }
1278 
1279 HTSLIB_EXPORT
hfile_always_local(const char * fname)1280 int hfile_always_local (const char *fname) { return 0; }
1281 
1282 HTSLIB_EXPORT
hfile_always_remote(const char * fname)1283 int hfile_always_remote(const char *fname) { return 1; }
1284 
hisremote(const char * fname)1285 int hisremote(const char *fname)
1286 {
1287     const struct hFILE_scheme_handler *handler = find_scheme_handler(fname);
1288     return handler? handler->isremote(fname) : 0;
1289 }
1290 
1291 // Remove an extension, if any, from the basename part of [start,limit).
1292 // Note: Doesn't notice percent-encoded '.' and '/' characters. Don't do that.
strip_extension(const char * start,const char * limit)1293 static const char *strip_extension(const char *start, const char *limit)
1294 {
1295     const char *s = limit;
1296     while (s > start) {
1297         --s;
1298         if (*s == '.') return s;
1299         else if (*s == '/') break;
1300     }
1301     return limit;
1302 }
1303 
haddextension(struct kstring_t * buffer,const char * filename,int replace,const char * new_extension)1304 char *haddextension(struct kstring_t *buffer, const char *filename,
1305                     int replace, const char *new_extension)
1306 {
1307     const char *trailing, *end;
1308 
1309     if (find_scheme_handler(filename)) {
1310         // URL, so alter extensions before any trailing query or fragment parts
1311         // Allow # symbols in s3 URLs
1312         trailing = filename + ((strncmp(filename, "s3://", 5) && strncmp(filename, "s3+http://", 10) && strncmp(filename, "s3+https://", 11))  ? strcspn(filename, "?#") : strcspn(filename, "?"));
1313     }
1314     else {
1315         // Local path, so alter extensions at the end of the filename
1316         trailing = strchr(filename, '\0');
1317     }
1318 
1319     end = replace? strip_extension(filename, trailing) : trailing;
1320 
1321     buffer->l = 0;
1322     if (kputsn(filename, end - filename, buffer) >= 0 &&
1323         kputs(new_extension, buffer) >= 0 &&
1324         kputs(trailing, buffer) >= 0) return buffer->s;
1325     else return NULL;
1326 }
1327 
1328 
1329 /*
1330  * ----------------------------------------------------------------------
1331  * Minimal stub functions for knet, added after the removal of
1332  * hfile_net.c and knetfile.c.
1333  *
1334  * They exist purely for ABI compatibility, but are simply wrappers to
1335  * hFILE.  API should be compatible except knet_fileno (unused?).
1336  *
1337  * CULL THESE and knetfile.h at the next .so version bump.
1338  */
1339 typedef struct knetFile_s {
1340     // As per htslib/knetfile.h.  Duplicated here as we don't wish to
1341     // have any dependence on the deprecated knetfile.h interface, plus
1342     // it's hopefully only temporary.
1343     int type, fd;
1344     int64_t offset;
1345     char *host, *port;
1346     int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
1347     char *response, *retr, *size_cmd;
1348     int64_t seek_offset;
1349     int64_t file_size;
1350     char *path, *http_host;
1351 
1352     // Our local addition
1353     hFILE *hf;
1354 } knetFile;
1355 
1356 HTSLIB_EXPORT
knet_open(const char * fn,const char * mode)1357 knetFile *knet_open(const char *fn, const char *mode) {
1358     knetFile *fp = calloc(1, sizeof(*fp));
1359     if (!fp) return NULL;
1360     if (!(fp->hf = hopen(fn, mode))) {
1361         free(fp);
1362         return NULL;
1363     }
1364 
1365     // FD backend is the only one implementing knet_fileno
1366     fp->fd = fp->hf->backend == &fd_backend
1367         ? ((hFILE_fd *)fp->hf)->fd
1368         : -1;
1369 
1370     return fp;
1371 }
1372 
1373 HTSLIB_EXPORT
knet_dopen(int fd,const char * mode)1374 knetFile *knet_dopen(int fd, const char *mode) {
1375     knetFile *fp = calloc(1, sizeof(*fp));
1376     if (!fp) return NULL;
1377     if (!(fp->hf = hdopen(fd, mode))) {
1378         free(fp);
1379         return NULL;
1380     }
1381     fp->fd = fd;
1382     return fp;
1383 }
1384 
1385 HTSLIB_EXPORT
knet_read(knetFile * fp,void * buf,size_t len)1386 ssize_t knet_read(knetFile *fp, void *buf, size_t len) {
1387     ssize_t r = hread(fp->hf, buf, len);
1388     fp->offset += r>0?r:0;
1389     return r;
1390 }
1391 
1392 HTSLIB_EXPORT
knet_seek(knetFile * fp,off_t off,int whence)1393 off_t knet_seek(knetFile *fp, off_t off, int whence) {
1394     off_t r = hseek(fp->hf, off, whence);
1395     if (r >= 0)
1396         fp->offset = r;
1397     return r;
1398 }
1399 
1400 HTSLIB_EXPORT
knet_close(knetFile * fp)1401 int knet_close(knetFile *fp) {
1402     int r = hclose(fp->hf);
1403     free(fp);
1404     return r;
1405 }
1406