1 /*
2 * Copyright (c) 2008, 2013 Genome Research Ltd.
3 * Author(s): James Bonfield
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following
13 * disclaimer in the documentation and/or other materials provided
14 * with the distribution.
15 *
16 * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17 * Institute nor the names of its contributors may be used to endorse
18 * or promote products derived from this software without specific
19 * prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #ifdef HAVE_CONFIG_H
35 #include "io_lib_config.h"
36 #endif
37
38 #include <stdio.h>
39 #include <string.h>
40 #include <stdlib.h>
41 #include <errno.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <io_lib/os.h>
46 #include <io_lib/hash_table.h>
47 #include <io_lib/srf.h>
48
49 /* Command line options */
50 typedef struct {
51 int long_format;
52 int count_only;
53 int verbose;
54 } opts;
55
56 /*
57 * Lists the contents of an SRF file.
58 *
59 * Returns num_reads for success
60 * -1 for failure
61 */
list_file(char * fname,opts * opts)62 int64_t list_file(char *fname, opts *opts) {
63 srf_t *srf;
64 char name[512];
65 int64_t count = 0;
66 int type;
67 uint64_t pos;
68
69 if (NULL == (srf = srf_open(fname, "r"))) {
70 perror(fname);
71 return -1;
72 }
73
74 /* Scan through file gathering the details to index in memory */
75 while ((type = srf_next_block_details(srf, &pos, name)) >= 0) {
76 if (type == SRFB_TRACE_BODY) {
77 count++;
78 if (!opts->count_only) {
79 if (opts->long_format)
80 printf("%-30s %10"PRId64" + %4d + %5d\n",
81 name, pos,
82 srf->tb.trace_size,
83 srf->th.trace_hdr_size);
84 else
85 puts(name);
86 }
87 }
88 }
89
90 srf_destroy(srf, 1);
91
92 return count;
93 }
94
95 /*
96 * Counts the contents of an SRF file.
97 * If the hash index exists it uses this instead.
98 *
99 * Returns num_reads for success
100 * -1 for failure
101 */
count_file(char * fname,opts * opts)102 int64_t count_file(char *fname, opts *opts) {
103 srf_t *srf;
104 srf_index_hdr_t hdr;
105 off_t skip;
106 int item_sz = 9;
107
108 if (NULL == (srf = srf_open(fname, "r"))) {
109 perror(fname);
110 return -1;
111 }
112
113 /* Read the index header */
114 if (0 != srf_read_index_hdr(srf, &hdr, 0)) {
115 srf_destroy(srf, 1);
116 return list_file(fname, opts);
117 }
118
119 /* Compute the remaining size of the index and divide by item_sz */
120 if (hdr.dbh_pos_stored_sep)
121 item_sz += 4;
122 skip = hdr.index_hdr_sz
123 + hdr.n_container * 8
124 + hdr.n_data_block_hdr * 8
125 + hdr.n_buckets * 8;
126
127 srf_destroy(srf, 1);
128
129 return (hdr.size - skip - 16/* footer*/) / item_sz;
130 }
131
usage(int error)132 void usage(int error) {
133 printf("Usage: srf_list [options] srf_file ...\n");
134 printf("Options: -c\tCount only - do not list filenames\n");
135 printf(" -v\tVerbose - gives summary data per file too\n");
136 printf(" -l\tList in long format. Lines contain:\n");
137 printf(" \t name position body-size header-size\n");
138
139 exit(error);
140 }
141
142 /*
143 * Lists the contents of a .hash file
144 */
main(int argc,char ** argv)145 int main(int argc, char **argv) {
146 opts opts;
147 int i, c;
148 int64_t count = 0;
149
150 opts.long_format = 0;
151 opts.count_only = 0;
152 opts.verbose = 0;
153
154 while ((c = getopt(argc, argv, "lcvh")) != -1) {
155 switch (c) {
156 case 'l':
157 opts.long_format = 1;
158 break;
159
160 case 'c':
161 opts.count_only = 1;
162 break;
163
164 case 'v':
165 opts.verbose = 1;
166 break;
167
168 case 'h':
169 usage(0);
170
171 default:
172 usage(1);
173 }
174 }
175
176 for (i = optind; i < argc; i++) {
177 int64_t c;
178
179 if (opts.count_only)
180 c = count_file(argv[i], &opts);
181 else
182 c = list_file(argv[i], &opts);
183
184 if (c < 0)
185 return 1;
186
187 if (opts.verbose)
188 printf("%s: %"PRId64" sequences\n", argv[i], c);
189 count += c;
190 }
191
192 if (opts.count_only)
193 printf("%"PRId64"\n", count);
194
195 return 0;
196 }
197