1 /*
2 
3   VSEARCH: a versatile open source tool for metagenomics
4 
5   Copyright (C) 2014-2021, Torbjorn Rognes, Frederic Mahe and Tomas Flouri
6   All rights reserved.
7 
8   Contact: Torbjorn Rognes <torognes@ifi.uio.no>,
9   Department of Informatics, University of Oslo,
10   PO Box 1080 Blindern, NO-0316 Oslo, Norway
11 
12   This software is dual-licensed and available under a choice
13   of one of two licenses, either under the terms of the GNU
14   General Public License version 3 or the BSD 2-Clause License.
15 
16 
17   GNU General Public License version 3
18 
19   This program is free software: you can redistribute it and/or modify
20   it under the terms of the GNU General Public License as published by
21   the Free Software Foundation, either version 3 of the License, or
22   (at your option) any later version.
23 
24   This program is distributed in the hope that it will be useful,
25   but WITHOUT ANY WARRANTY; without even the implied warranty of
26   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27   GNU General Public License for more details.
28 
29   You should have received a copy of the GNU General Public License
30   along with this program.  If not, see <http://www.gnu.org/licenses/>.
31 
32 
33   The BSD 2-Clause License
34 
35   Redistribution and use in source and binary forms, with or without
36   modification, are permitted provided that the following conditions
37   are met:
38 
39   1. Redistributions of source code must retain the above copyright
40   notice, this list of conditions and the following disclaimer.
41 
42   2. Redistributions in binary form must reproduce the above copyright
43   notice, this list of conditions and the following disclaimer in the
44   documentation and/or other materials provided with the distribution.
45 
46   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
47   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
48   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
49   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
50   COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
51   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
52   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
53   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
54   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
56   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
57   POSSIBILITY OF SUCH DAMAGE.
58 
59 */
60 
61 struct fastx_buffer_s
62 {
63   char * data;
64   uint64_t length;
65   uint64_t alloc;
66   uint64_t position;
67 };
68 
69 void buffer_init(struct fastx_buffer_s * buffer);
70 void buffer_free(struct fastx_buffer_s * buffer);
71 void buffer_extend(struct fastx_buffer_s * dest_buffer,
72                    char * source_buf,
73                    uint64_t len);
74 void buffer_makespace(struct fastx_buffer_s * buffer, uint64_t x);
75 
76 struct fastx_s
77 {
78   bool is_pipe;
79   bool is_fastq;
80   bool is_empty;
81 
82   FILE * fp;
83 
84 #ifdef HAVE_ZLIB_H
85   gzFile fp_gz;
86 #endif
87 
88 #ifdef HAVE_BZLIB_H
89   BZFILE * fp_bz;
90 #endif
91 
92   struct fastx_buffer_s file_buffer;
93 
94   struct fastx_buffer_s header_buffer;
95   struct fastx_buffer_s sequence_buffer;
96   struct fastx_buffer_s plusline_buffer;
97   struct fastx_buffer_s quality_buffer;
98 
99   uint64_t file_size;
100   uint64_t file_position;
101 
102   uint64_t lineno;
103   uint64_t lineno_start;
104   int64_t seqno;
105 
106   uint64_t stripped_all;
107   uint64_t stripped[256];
108 
109   int format;
110 };
111 
112 typedef struct fastx_s * fastx_handle;
113 
114 
115 /* fastx input */
116 
117 bool fastx_is_fastq(fastx_handle h);
118 void fastx_filter_header(fastx_handle h, bool truncateatspace);
119 fastx_handle fastx_open(const char * filename);
120 void fastx_close(fastx_handle h);
121 bool fastx_next(fastx_handle h,
122                 bool truncateatspace,
123                 const unsigned char * char_mapping);
124 uint64_t fastx_get_position(fastx_handle h);
125 uint64_t fastx_get_size(fastx_handle h);
126 uint64_t fastx_get_lineno(fastx_handle h);
127 uint64_t fastx_get_seqno(fastx_handle h);
128 char * fastx_get_header(fastx_handle h);
129 char * fastx_get_sequence(fastx_handle h);
130 uint64_t fastx_get_header_length(fastx_handle h);
131 uint64_t fastx_get_sequence_length(fastx_handle h);
132 
133 char * fastx_get_quality(fastx_handle h);
134 int64_t fastx_get_abundance(fastx_handle h);
135 
136 uint64_t fastx_file_fill_buffer(fastx_handle h);
137