1 /*
2  * Copyright (c) 2008, 2013 Genome Research Ltd.
3  * Author(s): James Bonfield
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *    1. Redistributions of source code must retain the above copyright notice,
9  *       this list of conditions and the following disclaimer.
10  *
11  *    2. Redistributions in binary form must reproduce the above
12  *       copyright notice, this list of conditions and the following
13  *       disclaimer in the documentation and/or other materials provided
14  *       with the distribution.
15  *
16  *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17  *    Institute nor the names of its contributors may be used to endorse
18  *    or promote products derived from this software without specific
19  *    prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25  * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifdef HAVE_CONFIG_H
35 #include "io_lib_config.h"
36 #endif
37 
38 #include <stdio.h>
39 #include <string.h>
40 #include <stdlib.h>
41 #include <errno.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44 
45 #include <io_lib/os.h>
46 #include <io_lib/hash_table.h>
47 #include <io_lib/srf.h>
48 
49 /* Command line options */
50 typedef struct {
51     int long_format;
52     int count_only;
53     int verbose;
54 } opts;
55 
56 /*
57  * Lists the contents of an SRF file.
58  *
59  * Returns num_reads for success
60  *        -1 for failure
61  */
list_file(char * fname,opts * opts)62 int64_t list_file(char *fname, opts *opts) {
63     srf_t *srf;
64     char name[512];
65     int64_t count = 0;
66     int type;
67     uint64_t pos;
68 
69     if (NULL == (srf = srf_open(fname, "r"))) {
70 	perror(fname);
71 	return -1;
72     }
73 
74     /* Scan through file gathering the details to index in memory */
75     while ((type = srf_next_block_details(srf, &pos, name)) >= 0) {
76 	if (type == SRFB_TRACE_BODY) {
77 	    count++;
78 	    if (!opts->count_only) {
79 		if (opts->long_format)
80 		    printf("%-30s %10"PRId64" + %4d + %5d\n",
81 			   name, pos,
82 			   srf->tb.trace_size,
83 			   srf->th.trace_hdr_size);
84 		else
85 		    puts(name);
86 	    }
87 	}
88     }
89 
90     srf_destroy(srf, 1);
91 
92     return count;
93 }
94 
95 /*
96  * Counts the contents of an SRF file.
97  * If the hash index exists it uses this instead.
98  *
99  * Returns num_reads for success
100  *        -1 for failure
101  */
count_file(char * fname,opts * opts)102 int64_t count_file(char *fname, opts *opts) {
103     srf_t *srf;
104     srf_index_hdr_t hdr;
105     off_t skip;
106     int item_sz = 9;
107 
108     if (NULL == (srf = srf_open(fname, "r"))) {
109 	perror(fname);
110 	return -1;
111     }
112 
113     /* Read the index header */
114     if (0 != srf_read_index_hdr(srf, &hdr, 0)) {
115 	srf_destroy(srf, 1);
116 	return list_file(fname, opts);
117     }
118 
119     /* Compute the remaining size of the index and divide by item_sz */
120     if (hdr.dbh_pos_stored_sep)
121 	item_sz += 4;
122     skip = hdr.index_hdr_sz
123 	 + hdr.n_container * 8
124 	 + hdr.n_data_block_hdr * 8
125 	 + hdr.n_buckets * 8;
126 
127     srf_destroy(srf, 1);
128 
129     return (hdr.size - skip - 16/* footer*/) / item_sz;
130 }
131 
usage(int error)132 void usage(int error) {
133     printf("Usage: srf_list [options] srf_file ...\n");
134     printf("Options:  -c\tCount only - do not list filenames\n");
135     printf("          -v\tVerbose - gives summary data per file too\n");
136     printf("          -l\tList in long format. Lines contain:\n");
137     printf("            \t    name position body-size header-size\n");
138 
139     exit(error);
140 }
141 
142 /*
143  * Lists the contents of a .hash file
144  */
main(int argc,char ** argv)145 int main(int argc, char **argv) {
146     opts opts;
147     int i, c;
148     int64_t count = 0;
149 
150     opts.long_format = 0;
151     opts.count_only = 0;
152     opts.verbose = 0;
153 
154     while ((c = getopt(argc, argv, "lcvh")) != -1) {
155 	switch (c) {
156 	case 'l':
157 	    opts.long_format = 1;
158 	    break;
159 
160 	case 'c':
161 	    opts.count_only = 1;
162 	    break;
163 
164 	case 'v':
165 	    opts.verbose = 1;
166 	    break;
167 
168 	case 'h':
169 	    usage(0);
170 
171 	default:
172 	    usage(1);
173 	}
174     }
175 
176     for (i = optind; i < argc; i++) {
177 	int64_t c;
178 
179 	if (opts.count_only)
180 	    c = count_file(argv[i], &opts);
181 	else
182 	    c = list_file(argv[i], &opts);
183 
184 	if (c < 0)
185 	    return 1;
186 
187 	if (opts.verbose)
188 	    printf("%s: %"PRId64" sequences\n", argv[i], c);
189 	count += c;
190     }
191 
192     if (opts.count_only)
193 	printf("%"PRId64"\n", count);
194 
195     return 0;
196 }
197