1 /// @file htslib/hfile.h
2 /// Buffered low-level input/output streams.
3 /*
4     Copyright (C) 2013-2021 Genome Research Ltd.
5 
6     Author: John Marshall <jm18@sanger.ac.uk>
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this software and associated documentation files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 DEALINGS IN THE SOFTWARE.  */
25 
26 #ifndef HTSLIB_HFILE_H
27 #define HTSLIB_HFILE_H
28 
29 #include <string.h>
30 
31 #include <sys/types.h>
32 
33 #include "hts_defs.h"
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 
39 struct hFILE_backend;
40 struct kstring_t;
41 
42 /// Low-level input/output stream handle
43 /** The fields of this structure are declared here solely for the benefit
44 of the hFILE-related inline functions.  They may change in future releases.
45 User code should not use them directly; you should imagine that hFILE is an
46 opaque incomplete type.
47 */
48 typedef struct hFILE {
49     // @cond internal
50     char *buffer, *begin, *end, *limit;
51     const struct hFILE_backend *backend;
52     off_t offset;
53     unsigned at_eof:1, mobile:1, readonly:1;
54     int has_errno;
55     // @endcond
56 } hFILE;
57 
58 /// Open the named file or URL as a stream
59 /** @return An hFILE pointer, or `NULL` (with _errno_ set) if an error occurred.
60 
61 The usual `fopen(3)` _mode_ letters are supported: one of
62 `r` (read), `w` (write), `a` (append), optionally followed by any of
63 `+` (update), `e` (close on `exec(2)`), `x` (create exclusively),
64 `:` (indicates scheme-specific variable arguments follow).
65 */
66 HTSLIB_EXPORT
67 hFILE *hopen(const char *filename, const char *mode, ...) HTS_RESULT_USED;
68 
69 /// Associate a stream with an existing open file descriptor
70 /** @return An hFILE pointer, or `NULL` (with _errno_ set) if an error occurred.
71 
72 Note that the file must be opened in binary mode, or else
73 there will be problems on platforms that make a difference
74 between text and binary mode.
75 
76 For socket descriptors (on Windows), _mode_ should contain `s`.
77 */
78 HTSLIB_EXPORT
79 hFILE *hdopen(int fd, const char *mode) HTS_RESULT_USED;
80 
81 /// Report whether the file name or URL denotes remote storage
82 /** @return  0 if local, 1 if remote.
83 
84 "Remote" means involving e.g. explicit network access, with the implication
85 that callers may wish to cache such files' contents locally.
86 */
87 HTSLIB_EXPORT
88 int hisremote(const char *filename) HTS_RESULT_USED;
89 
90 /// Append an extension or replace an existing extension
91 /** @param buffer     The kstring to be used to store the modified filename
92     @param filename   The filename to be (copied and) adjusted
93     @param replace    If non-zero, one extension (if any) is removed first
94     @param extension  The extension to be added (e.g. ".csi")
95     @return  The modified filename (i.e., `buffer->s`), or NULL on error.
96     @since   1.10
97 
98 If _filename_ is an URL, alters extensions at the end of the `hier-part`,
99 leaving any trailing `?query` or `#fragment` unchanged.
100 */
101 HTSLIB_EXPORT
102 char *haddextension(struct kstring_t *buffer, const char *filename,
103                     int replace, const char *extension) HTS_RESULT_USED;
104 
105 /// Flush (for output streams) and close the stream
106 /** @return  0 if successful, or `EOF` (with _errno_ set) if an error occurred.
107 */
108 HTSLIB_EXPORT
109 int hclose(hFILE *fp) HTS_RESULT_USED;
110 
111 /// Close the stream, without flushing or propagating errors
112 /** For use while cleaning up after an error only.  Preserves _errno_.
113 */
114 HTSLIB_EXPORT
115 void hclose_abruptly(hFILE *fp);
116 
117 /// Return the stream's error indicator
118 /** @return  Non-zero (in fact, an _errno_ value) if an error has occurred.
119 
120 This would be called `herror()` and return true/false to parallel `ferror(3)`,
121 but a networking-related `herror(3)` function already exists.
122 */
herrno(hFILE * fp)123 static inline int herrno(hFILE *fp)
124 {
125     return fp->has_errno;
126 }
127 
128 /// Clear the stream's error indicator
hclearerr(hFILE * fp)129 static inline void hclearerr(hFILE *fp)
130 {
131     fp->has_errno = 0;
132 }
133 
134 /// Reposition the read/write stream offset
135 /** @return  The resulting offset within the stream (as per `lseek(2)`),
136     or negative if an error occurred.
137 */
138 HTSLIB_EXPORT
139 off_t hseek(hFILE *fp, off_t offset, int whence) HTS_RESULT_USED;
140 
141 /// Report the current stream offset
142 /** @return  The offset within the stream, starting from zero.
143 */
htell(hFILE * fp)144 static inline off_t htell(hFILE *fp)
145 {
146     return fp->offset + (fp->begin - fp->buffer);
147 }
148 
149 /// Read one character from the stream
150 /** @return  The character read, or `EOF` on end-of-file or error.
151 */
hgetc(hFILE * fp)152 static inline int hgetc(hFILE *fp)
153 {
154     extern int hgetc2(hFILE *);
155     return (fp->end > fp->begin)? (unsigned char) *(fp->begin++) : hgetc2(fp);
156 }
157 
158 /// Read from the stream until the delimiter, up to a maximum length
159 /** @param buffer  The buffer into which bytes will be written
160     @param size    The size of the buffer
161     @param delim   The delimiter (interpreted as an `unsigned char`)
162     @param fp      The file stream
163     @return  The number of bytes read, or negative on error.
164     @since   1.4
165 
166 Bytes will be read into the buffer up to and including a delimiter, until
167 EOF is reached, or _size-1_ bytes have been written, whichever comes first.
168 The string will then be terminated with a NUL byte (`\0`).
169 */
170 HTSLIB_EXPORT
171 ssize_t hgetdelim(char *buffer, size_t size, int delim, hFILE *fp)
172     HTS_RESULT_USED;
173 
174 /// Read a line from the stream, up to a maximum length
175 /** @param buffer  The buffer into which bytes will be written
176     @param size    The size of the buffer
177     @param fp      The file stream
178     @return  The number of bytes read, or negative on error.
179     @since   1.4
180 
181 Specialization of hgetdelim() for a `\n` delimiter.
182 */
183 static inline ssize_t HTS_RESULT_USED
hgetln(char * buffer,size_t size,hFILE * fp)184 hgetln(char *buffer, size_t size, hFILE *fp)
185 {
186     return hgetdelim(buffer, size, '\n', fp);
187 }
188 
189 /// Read a line from the stream, up to a maximum length
190 /** @param buffer  The buffer into which bytes will be written
191     @param size    The size of the buffer (must be > 1 to be useful)
192     @param fp      The file stream
193     @return  _buffer_ on success, or `NULL` if an error occurred.
194     @since   1.4
195 
196 This function can be used as a replacement for `fgets(3)`, or together with
197 kstring's `kgetline()` to read arbitrarily-long lines into a _kstring_t_.
198 */
199 HTSLIB_EXPORT
200 char *hgets(char *buffer, int size, hFILE *fp) HTS_RESULT_USED;
201 
202 /// Peek at characters to be read without removing them from buffers
203 /** @param fp      The file stream
204     @param buffer  The buffer to which the peeked bytes will be written
205     @param nbytes  The number of bytes to peek at; limited by the size of the
206                    internal buffer, which could be as small as 4K.
207     @return  The number of bytes peeked, which may be less than _nbytes_
208              if EOF is encountered; or negative, if there was an I/O error.
209 
210 The characters peeked at remain in the stream's internal buffer, and will be
211 returned by later hread() etc calls.
212 */
213 HTSLIB_EXPORT
214 ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes) HTS_RESULT_USED;
215 
216 /// Read a block of characters from the file
217 /** @return  The number of bytes read, or negative if an error occurred.
218 
219 The full _nbytes_ requested will be returned, except as limited by EOF
220 or I/O errors.
221 */
222 static inline ssize_t HTS_RESULT_USED
hread(hFILE * fp,void * buffer,size_t nbytes)223 hread(hFILE *fp, void *buffer, size_t nbytes)
224 {
225     extern ssize_t hread2(hFILE *, void *, size_t, size_t);
226 
227     size_t n = fp->end - fp->begin;
228     if (n > nbytes) n = nbytes;
229     memcpy(buffer, fp->begin, n);
230     fp->begin += n;
231     return (n == nbytes || !fp->mobile)? (ssize_t) n : hread2(fp, buffer, nbytes, n);
232 }
233 
234 /// Write a character to the stream
235 /** @return  The character written, or `EOF` if an error occurred.
236 */
hputc(int c,hFILE * fp)237 static inline int hputc(int c, hFILE *fp)
238 {
239     extern int hputc2(int, hFILE *);
240     if (fp->begin < fp->limit) *(fp->begin++) = c;
241     else c = hputc2(c, fp);
242     return c;
243 }
244 
245 /// Write a string to the stream
246 /** @return  0 if successful, or `EOF` if an error occurred.
247 */
hputs(const char * text,hFILE * fp)248 static inline int hputs(const char *text, hFILE *fp)
249 {
250     extern int hputs2(const char *, size_t, size_t, hFILE *);
251 
252     size_t nbytes = strlen(text), n = fp->limit - fp->begin;
253     if (n > nbytes) n = nbytes;
254     memcpy(fp->begin, text, n);
255     fp->begin += n;
256     return (n == nbytes)? 0 : hputs2(text, nbytes, n, fp);
257 }
258 
259 /// Write a block of characters to the file
260 /** @return  Either _nbytes_, or negative if an error occurred.
261 
262 In the absence of I/O errors, the full _nbytes_ will be written.
263 */
264 static inline ssize_t HTS_RESULT_USED
hwrite(hFILE * fp,const void * buffer,size_t nbytes)265 hwrite(hFILE *fp, const void *buffer, size_t nbytes)
266 {
267     extern ssize_t hwrite2(hFILE *, const void *, size_t, size_t);
268     extern int hfile_set_blksize(hFILE *fp, size_t bufsiz);
269 
270     if (!fp->mobile) {
271         size_t n = fp->limit - fp->begin;
272         if (n < nbytes) {
273             hfile_set_blksize(fp, fp->limit - fp->buffer + nbytes);
274             fp->end = fp->limit;
275         }
276     }
277 
278     size_t n = fp->limit - fp->begin;
279     if (nbytes >= n && fp->begin == fp->buffer) {
280         // Go straight to hwrite2 if the buffer is empty and the request
281         // won't fit.
282         return hwrite2(fp, buffer, nbytes, 0);
283     }
284 
285     if (n > nbytes) n = nbytes;
286     memcpy(fp->begin, buffer, n);
287     fp->begin += n;
288     return (n==nbytes)? (ssize_t) n : hwrite2(fp, buffer, nbytes, n);
289 }
290 
291 /// For writing streams, flush buffered output to the underlying stream
292 /** @return  0 if successful, or `EOF` if an error occurred.
293 
294 This includes low-level flushing such as via `fdatasync(2)`.
295 */
296 HTSLIB_EXPORT
297 int hflush(hFILE *fp) HTS_RESULT_USED;
298 
299 /// For hfile_mem: get the internal buffer and it's size from a hfile
300 /** @return  buffer if successful, or NULL if an error occurred
301 
302 The buffer returned should not be freed as this will happen when the
303 hFILE is closed.
304 */
305 HTSLIB_EXPORT
306 char *hfile_mem_get_buffer(hFILE *file, size_t *length);
307 
308 /// For hfile_mem: get the internal buffer and it's size from a hfile.
309 /** @return  buffer if successful, or NULL if an error occurred
310 
311 This is similar to hfile_mem_get_buffer except that ownership of the
312 buffer is granted to the caller, who now has responsibility for freeing
313 it.  From this point onwards, the hFILE should not be used for any
314 purpose other than closing.
315 */
316 HTSLIB_EXPORT
317 char *hfile_mem_steal_buffer(hFILE *file, size_t *length);
318 
319 /// Fills out sc_list[] with the list of known URL schemes.
320 /**
321  * @param plugin   [in]     Restricts schemes to only those from 'plugin.
322  * @param sc_list  [out]    Filled out with the scheme names
323  * @param nschemes [in/out] Size of sc_list (in) and number returned (out)
324  *
325  * Plugin may be passed in as NULL in which case all schemes are returned.
326  * Use plugin "built-in" to list the built in schemes.
327  * The size of sc_list is determined by the input value of *nschemes.
328  * This is updated to return the output size.  It is up to the caller to
329  * determine whether to call again with a larger number if this is too small.
330  *
331  * The return value represents the total number found matching plugin, which
332  * may be larger than *nschemes if too small a value was specified.
333  *
334  * @return the number of schemes found on success.
335  *         -1 on failure
336  */
337 HTSLIB_EXPORT
338 int hfile_list_schemes(const char *plugin, const char *sc_list[], int *nschemes);
339 
340 /// Fills out plist[] with the list of known hFILE plugins.
341 /*
342  * @param plist    [out]    Filled out with the plugin names
343  * @param nplugins [in/out] Size of plist (in) and number returned (out)
344  *
345  * The size of plist is determined by the input value of *nplugins.
346  * This is updated to return the output size.  It is up to the caller to
347  * determine whether to call again with a larger number if this is too small.
348  *
349  * The return value represents the total number found, which may be
350  * larger than *nplugins if too small a value was specified.
351  *
352  * @return the number of plugins found on success.
353  *         -1 on failure
354  */
355 HTSLIB_EXPORT
356 int hfile_list_plugins(const char *plist[], int *nplugins);
357 
358 /// Tests for the presence of a specific hFILE plugin.
359 /*
360  * @param name     The name of the plugin to query.
361  *
362  * @return 1 if found, 0 otherwise.
363  */
364 HTSLIB_EXPORT
365 int hfile_has_plugin(const char *name);
366 
367 #ifdef __cplusplus
368 }
369 #endif
370 
371 #endif
372