1 /*
2 * Copyright (c) 2008-2010, 2013 Genome Research Ltd.
3 * Author(s): James Bonfield
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following
13 * disclaimer in the documentation and/or other materials provided
14 * with the distribution.
15 *
16 * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17 * Institute nor the names of its contributors may be used to endorse
18 * or promote products derived from this software without specific
19 * prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /*
35 * This performs a linear (non-indexed) search for a trace in an SRF archive.
36 *
37 * It's not intended as a suitable production program or as a library of code
38 * to use, but as a test and benchmark statistic.
39 */
40
41 #ifdef HAVE_CONFIG_H
42 #include "io_lib_config.h"
43 #endif
44
45 #include <string.h>
46 #include <stdio.h>
47 #include <unistd.h>
48 #include <ctype.h>
49 #include <sys/stat.h>
50 #include <sys/types.h>
51 #include <io_lib/Read.h>
52 #include <io_lib/misc.h>
53 #include <io_lib/ztr.h>
54 #include <io_lib/srf.h>
55 #include <io_lib/hash_table.h>
56 #include <io_lib/xalloc.h>
57
58 #define LEVEL_READ (1 << 0)
59 #define LEVEL_CHUNK (1 << 1)
60 #define LEVEL_NAME (1 << 2)
61 #define LEVEL_BASE (1 << 3)
62 #define LEVEL_ALL (LEVEL_READ | LEVEL_CHUNK | LEVEL_NAME | LEVEL_BASE );
63
64 /* only checks the first 10 traces */
65 #define LEVEL_CHECK 255
66
67 #define READ_GOOD 0
68 #define READ_BAD 1
69 #define READ_TOTAL 2
70
71 #define NREADS 3
72
73 #define READ_GOOD_STR "GOOD"
74 #define READ_BAD_STR "BAD"
75 #define READ_TOTAL_STR "TOTAL"
76
77 /* see ztr.h for a list of all possible ztr chunk types */
78
79 #define CHUNK_BASE 0
80 #define CHUNK_CNF1 1
81 #define CHUNK_CNF4 2
82 #define CHUNK_SAMP 3
83 #define CHUNK_SMP4 4
84 #define CHUNK_REGN 5
85
86 #define NCHUNKS 6
87
88 #define CHUNK_BASE_TYPE ZTR_TYPE_BASE
89 #define CHUNK_CNF1_TYPE ZTR_TYPE_CNF1
90 #define CHUNK_CNF4_TYPE ZTR_TYPE_CNF4
91 #define CHUNK_SAMP_TYPE ZTR_TYPE_SAMP
92 #define CHUNK_SMP4_TYPE ZTR_TYPE_SMP4
93 #define CHUNK_REGN_TYPE ZTR_TYPE_REGN
94
95 #define KEY_TYPE 0
96 #define KEY_VALTYPE 1
97 #define KEY_GROUP 2
98 #define KEY_OFFS 3
99 #define KEY_SCALE 4
100 #define KEY_COORD 5
101 #define KEY_NAME 6
102
103 #define NKEYS 7
104
105 #define KEY_TYPE_STR "TYPE"
106 #define KEY_VALTYPE_STR "VALTYPE"
107 #define KEY_GROUP_STR "GROUP"
108 #define KEY_OFFS_STR "OFFS"
109 #define KEY_SCALE_STR "SCALE"
110 #define KEY_COORD_STR "COORD"
111 #define KEY_NAME_STR "NAME"
112
113 #define TYPE_PROC 0
114 #define TYPE_SLXI 1
115 #define TYPE_SLXN 2
116 #define TYPE_0FAM 3
117 #define TYPE_1CY3 4
118 #define TYPE_2TXR 5
119 #define TYPE_3CY5 6
120
121 #define NTYPES 7
122
123 #define TYPE_PROC_STR "PROC"
124 #define TYPE_SLXI_STR "SLXI"
125 #define TYPE_SLXN_STR "SLXN"
126 #define TYPE_0FAM_STR "0FAM"
127 #define TYPE_1CY3_STR "1CY3"
128 #define TYPE_2TXR_STR "2TXR"
129 #define TYPE_3CY5_STR "3CY5"
130
131 #define MAX_REGIONS 4
132
133 /* regn chunk */
134 typedef struct {
135 char coord;
136 char *region_names;
137 int nregions;
138 char *name[MAX_REGIONS];
139 char code[MAX_REGIONS];
140 int start[MAX_REGIONS];
141 int length[MAX_REGIONS];
142 int count;
143 } regn_t;
144
145 /* ------------------------------------------------------------------------ */
146 /*
147 * Print usage message to stderr and exit with the given \"code\".
148 */
usage(int code)149 void usage(int code) {
150 fprintf(stderr, "Usage: srf_info [-level level_bitmap] input(s)\n");
151 fprintf(stderr, "Options:\n");
152 fprintf(stderr, " -l level_bitmap \n");
153 fprintf(stderr, " 1\tCount of good/bad reads.\n");
154 fprintf(stderr, " 2\tCounts for selected chunk types.\n");
155 fprintf(stderr, " 4\tTrace count and trace name prefix for each trace_header.\n");
156 fprintf(stderr, " 8\tBase count.\n");
157 fprintf(stderr, "\n");
158
159 exit(code);
160 }
161
162 /*
163 * Parse the REGN chunk, add to regn HASH
164 *
165 * Returns corresponding HashItem * from regn Hash
166 */
parse_regn(ztr_t * z,ztr_chunk_t * chunk,HashTable * regn_hash)167 HashItem *parse_regn(ztr_t *z, ztr_chunk_t *chunk, HashTable *regn_hash) {
168 char key[1024];
169 char *name;
170 HashItem *hi;
171 regn_t *regn;
172 size_t l;
173
174 uncompress_chunk(z, chunk);
175
176 /* the hash key is a combination of the region names and boundaries */
177 name = ztr_lookup_mdata_value(z, chunk, "NAME");
178 l = snprintf(key, sizeof(key), "names=%s", name);
179 if( chunk->dlength ){
180 int nbndy = (chunk->dlength-1)/4;
181 uint4 *bndy = (uint4 *)(chunk->data+1);
182 int ibndy;
183 for (ibndy=0; ibndy<nbndy; ibndy++) {
184 if( ibndy )
185 l += snprintf(key + l, sizeof(key) - l,
186 ";%d", be_int4(bndy[ibndy]));
187 else
188 l += snprintf(key + l, sizeof(key) - l,
189 " boundaries=%d", be_int4(bndy[ibndy]));
190 }
191 }
192
193 if (NULL == (hi = (HashTableSearch(regn_hash, key, strlen(key))))) {
194 int iregion, nregions = 0;
195 char *coord;
196 char *cp1;
197 uint4 bndy[MAX_REGIONS];
198 int ibndy, nbndy = 0;
199 HashData hd;
200
201 if( NULL == (regn = (regn_t *)malloc(sizeof(regn_t)))) {
202 return NULL;
203 }
204
205 coord = ztr_lookup_mdata_value(z, chunk, "COORD");
206 regn->coord = (NULL == coord ? 'B' : *coord );
207
208 regn->region_names = strdup(name);
209
210 cp1 = strtok (regn->region_names,";");
211 while(cp1) {
212 char *cp2;
213 if(NULL == (cp2 = strchr(cp1,':'))) {
214 fprintf(stderr, "Invalid region name/code pair %s\n", cp1);
215 return NULL;
216 }
217 *cp2++ = '\0';
218 regn->name[nregions] = cp1;
219 regn->code[nregions] = *cp2;
220 nregions++;
221 cp1 = strtok (NULL, ";");
222 }
223
224 regn->nregions = nregions;
225
226 if( chunk->dlength ) {
227 nbndy = (chunk->dlength-1)/4;
228 memcpy(bndy, chunk->data+1, chunk->dlength-1);
229 }
230
231 for( iregion=0, ibndy=0; iregion<nregions; iregion++) {
232 /* start = (start + length of previous region) or 0 if no previous region */
233 /* length = (next boundary - start of region) or -1 if no next boundary */
234 if( regn->code[iregion] == 'E' ){
235 /* no sequence, length = 0 */
236 regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
237 regn->length[iregion] = 0;
238 }else{
239 if( ibndy > nbndy ){
240 fprintf(stderr, "More name/code pairs than boundaries\n");
241 return NULL;
242 }
243 regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
244 regn->length[iregion] = (ibndy == nbndy ? -1 : (be_int4(bndy[ibndy])-regn->start[iregion]));
245 ibndy++;
246 }
247 }
248
249 regn->count = 1;
250
251 hd.p = regn;
252 if (NULL == (hi = HashTableAdd(regn_hash, key, strlen(key), hd, NULL))) {
253 free(regn->region_names);
254 free(regn);
255 return NULL;
256 }
257 } else {
258 regn = (regn_t *)(hi->data.p);
259 regn->count++;
260 }
261
262 return hi;
263 }
264
265 /*
266 * Parse the sequence
267 *
268 * Returns 0 on success.
269 */
parse_base(ztr_t * z,ztr_chunk_t * chunk,uint64_t * base_count)270 int parse_base(ztr_t *z, ztr_chunk_t *chunk, uint64_t *base_count) {
271 int i;
272
273 uncompress_chunk(z, chunk);
274
275 for (i = 1; i < chunk->dlength; i++) {
276 char base = chunk->data[i];
277 uint1 key;
278 switch (base) {
279 case 'A': case 'a':
280 key = 0;
281 break;
282 case 'C': case 'c':
283 key = 1;
284 break;
285 case 'G': case 'g':
286 key = 2;
287 break;
288 case 'T': case 't':
289 key = 3;
290 break;
291 default:
292 key = 4;
293 break;
294 }
295 base_count[key]++;
296 }
297
298 return 0;
299 }
300
301 /*
302 * count the mdata keys
303 *
304 * Returns 0 on success.
305 */
count_mdata_keys(ztr_t * z,ztr_chunk_t * chunk,int ichunk,long key_count[NCHUNKS][NKEYS],long type_count[NCHUNKS][NTYPES])306 int count_mdata_keys(ztr_t *z, ztr_chunk_t *chunk, int ichunk, long key_count[NCHUNKS][NKEYS], long type_count[NCHUNKS][NTYPES]) {
307 char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR};
308 char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR};
309 int ikey, itype;
310
311 if (z->header.version_major > 1 ||
312 z->header.version_minor >= 2) {
313 /* ZTR format 1.2 onwards */
314
315 char *cp = chunk->mdata;
316 int32_t dlen = chunk->mdlength;
317
318 /*
319 * NB: we may wish to rewrite this using a dedicated state machine
320 * instead of strlen/strcmp as this currently assumes the meta-
321 * data is correctly formatted, which we cannot assume as the
322 * metadata is external and outside of our control.
323 * Passing in non-nul terminated strings could crash this code.
324 */
325 while (dlen > 0) {
326 size_t l;
327
328 /* key */
329 l = strlen(cp);
330 for (ikey=0; ikey<NKEYS; ikey++)
331 if(0 == strcmp(cp, keys_str[ikey]))
332 break;
333
334 cp += l+1;
335 dlen -= l+1;
336
337 /* value */
338 if (ikey < NKEYS)
339 key_count[ichunk][ikey]++;
340
341 /* for the type key check the value */
342 if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) {
343 for (itype=0; itype<NTYPES; itype++)
344 if(0 == strcmp(cp, types_str[itype]))
345 break;
346 if(itype < NTYPES)
347 type_count[ichunk][itype]++;
348 }
349
350 l = strlen(cp);
351 cp += l+1;
352 dlen -= l+1;
353 }
354
355 } else {
356 /* v1.1 and before only supported a few types, specifically coded
357 * per chunk type.
358 */
359
360 switch (chunk->type) {
361 case ZTR_TYPE_SAMP:
362 case ZTR_TYPE_SMP4:
363 key_count[ichunk][KEY_TYPE]++;
364 for (itype=0; itype<NTYPES; itype++)
365 if(0 == strcmp(chunk->mdata, types_str[itype])) {
366 type_count[ichunk][itype]++;
367 }
368 break;
369
370 default:
371 break;
372 }
373 }
374
375 return 0;
376 }
377
378 /*
379 * As per partial_decode_ztr in srf.c, but without uncompress_ztr.
380 */
partial_decode_ztr2(srf_t * srf,mFILE * mf,ztr_t * z)381 ztr_t *partial_decode_ztr2(srf_t *srf, mFILE *mf, ztr_t *z) {
382 ztr_t *ztr;
383 ztr_chunk_t *chunk;
384 long pos = 0;
385
386 if (z) {
387 /* Use existing ZTR object => already loaded header */
388 ztr = z;
389
390 } else {
391 /* Allocate or use existing ztr */
392 if (NULL == (ztr = new_ztr()))
393 return NULL;
394
395 /* Read the header */
396 if (-1 == ztr_read_header(mf, &ztr->header)) {
397 if (!z)
398 delete_ztr(ztr);
399 mrewind(mf);
400 return NULL;
401 }
402
403 /* Check magic number and version */
404 if (memcmp(ztr->header.magic, ZTR_MAGIC, 8) != 0) {
405 if (!z)
406 delete_ztr(ztr);
407 mrewind(mf);
408 return NULL;
409 }
410
411 if (ztr->header.version_major != ZTR_VERSION_MAJOR) {
412 if (!z)
413 delete_ztr(ztr);
414 mrewind(mf);
415 return NULL;
416 }
417 }
418
419 /* Load chunks */
420 pos = mftell(mf);
421 while ((chunk = ztr_read_chunk_hdr(mf))) {
422 chunk->data = (char *)xmalloc(chunk->dlength);
423 if (chunk->dlength != mfread(chunk->data, 1, chunk->dlength, mf))
424 break;
425
426 ztr->nchunks++;
427 ztr->chunk = (ztr_chunk_t *)xrealloc(ztr->chunk, ztr->nchunks *
428 sizeof(ztr_chunk_t));
429 memcpy(&ztr->chunk[ztr->nchunks-1], chunk, sizeof(*chunk));
430 xfree(chunk);
431 pos = mftell(mf);
432 }
433
434 /*
435 * At this stage we're 'pos' into the mFILE mf with any remainder being
436 * a partial block.
437 */
438 if (0 == ztr->nchunks) {
439 if (!z)
440 delete_ztr(ztr);
441 mrewind(mf);
442 return NULL;
443 }
444
445 /* Ensure we exit at the start of a ztr CHUNK */
446 mfseek(mf, pos, SEEK_SET);
447
448 /* If this is the header part, ensure we uncompress and init. data */
449 if (!z) {
450 /* Force caching of huffman code_sets */
451 ztr_find_hcode(ztr, CODE_USER);
452
453 /* And uncompress the rest */
454 uncompress_ztr(ztr);
455 }
456
457 return ztr;
458 }
459
460 /*
461 * Given the archive name and the level_mode
462 * generate information about the archive
463 *
464 * Note the generated srf file is NOT indexed
465 *
466 * Returns 0 on success.
467 */
srf_info(char * input,int level_mode,long * read_count,long * chunk_count,uint64_t * chunk_size,long key_count[NCHUNKS][NKEYS],long type_count[NCHUNKS][NTYPES],HashTable * regn_hash,uint64_t * base_count)468 int srf_info(char *input, int level_mode, long *read_count, long *chunk_count,
469 uint64_t *chunk_size, long key_count[NCHUNKS][NKEYS],
470 long type_count[NCHUNKS][NTYPES], HashTable *regn_hash,
471 uint64_t *base_count) {
472 srf_t *srf;
473 off_t pos;
474 int type;
475 int count = 0;
476 long trace_body_count = 0;
477 char name[1024];
478
479 if (NULL == (srf = srf_open(input, "rb"))) {
480 perror(input);
481 return 1;
482 }
483
484 while ((type = srf_next_block_type(srf)) >= 0) {
485
486 switch (type) {
487 case SRFB_CONTAINER:
488 if( trace_body_count ){
489 if( level_mode & LEVEL_NAME )
490 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
491 trace_body_count = 0;
492 }
493 if (0 != srf_read_cont_hdr(srf, &srf->ch)) {
494 fprintf(stderr, "Error reading container header.\nExiting.\n");
495 exit(1);
496 }
497 break;
498
499 case SRFB_XML:
500 if( trace_body_count ){
501 if( level_mode & LEVEL_NAME )
502 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
503 trace_body_count = 0;
504 }
505 if (0 != srf_read_xml(srf, &srf->xml)) {
506 fprintf(stderr, "Error reading XML.\nExiting.\n");
507 exit(1);
508 }
509 break;
510
511 case SRFB_TRACE_HEADER:
512 if( trace_body_count ){
513 if( level_mode & LEVEL_NAME )
514 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
515 trace_body_count = 0;
516 }
517 if (0 != srf_read_trace_hdr(srf, &srf->th)) {
518 fprintf(stderr, "Error reading trace header.\nExiting.\n");
519 exit(1);
520 }
521
522 if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) )
523 break;
524
525 /* Decode ZTR chunks in the header */
526 if (srf->mf)
527 mfdestroy(srf->mf);
528
529 srf->mf = mfcreate(NULL, 0);
530 if (srf->th.trace_hdr_size)
531 mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, srf->mf);
532 if (srf->ztr)
533 delete_ztr(srf->ztr);
534 mrewind(srf->mf);
535
536 if (NULL != (srf->ztr = partial_decode_ztr(srf, srf->mf, NULL))) {
537 srf->mf_pos = mftell(srf->mf);
538 } else {
539 /* Maybe not enough to decode or no headerBlob. */
540 /* So delay until decoding the body. */
541 srf->mf_pos = 0;
542 }
543 mfseek(srf->mf, 0, SEEK_END);
544 srf->mf_end = mftell(srf->mf);
545
546 break;
547
548 case SRFB_TRACE_BODY: {
549 srf_trace_body_t old_tb;
550 ztr_t *ztr_tmp;
551 int no_trace = (level_mode & (LEVEL_CHUNK | LEVEL_BASE) ? 0 : 1);
552
553 if (0 != srf_read_trace_body(srf, &old_tb, no_trace)) {
554 fprintf(stderr, "Error reading trace body.\nExiting.\n");
555 exit(1);
556 }
557
558 if (-1 == construct_trace_name(srf->th.id_prefix,
559 (unsigned char *)old_tb.read_id,
560 old_tb.read_id_length,
561 name, 512)) {
562 fprintf(stderr, "Error constructing trace name.\nExiting.\n");
563 exit(1);
564 }
565
566 trace_body_count++;
567 if( 1 == trace_body_count ){
568 if( level_mode & LEVEL_NAME )
569 printf( "trace_name: %s + %s", srf->th.id_prefix, name+strlen(srf->th.id_prefix));
570 }
571
572 read_count[READ_TOTAL]++;
573
574 if (old_tb.flags & SRF_READ_FLAG_BAD_MASK ){
575 read_count[READ_BAD]++;
576 } else {
577 read_count[READ_GOOD]++;
578 }
579
580 if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) )
581 break;
582
583 if (!srf->mf) {
584 fprintf(stderr, "Error reading trace body.\nExiting.\n");
585 exit(1);
586 }
587
588 mfseek(srf->mf, srf->mf_end, SEEK_SET);
589 if (old_tb.trace_size) {
590 mfwrite(old_tb.trace, 1, old_tb.trace_size, srf->mf);
591 free(old_tb.trace);
592 old_tb.trace = NULL;
593 }
594
595 mftruncate(srf->mf, mftell(srf->mf));
596 mfseek(srf->mf, srf->mf_pos, SEEK_SET);
597
598 if (srf->ztr)
599 ztr_tmp = ztr_dup(srf->ztr); /* inefficient, but simple */
600 else
601 ztr_tmp = NULL;
602
603 if ((ztr_tmp = partial_decode_ztr(srf, srf->mf, ztr_tmp))) {
604 int i;
605 for (i=0; i<ztr_tmp->nchunks; i++) {
606 int ichunk = -1;
607
608 switch (ztr_tmp->chunk[i].type) {
609 case ZTR_TYPE_BASE:
610 ichunk = CHUNK_BASE;
611 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
612 if( parse_base(ztr_tmp, &ztr_tmp->chunk[i], base_count) ){
613 delete_ztr(ztr_tmp);
614 return 1;
615 }
616 break;
617 case ZTR_TYPE_CNF1:
618 ichunk = CHUNK_CNF1;
619 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
620 break;
621 case ZTR_TYPE_CNF4:
622 ichunk = CHUNK_CNF4;
623 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
624 break;
625 case ZTR_TYPE_SAMP:
626 ichunk = CHUNK_SAMP;
627 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
628 break;
629 case ZTR_TYPE_SMP4:
630 ichunk = CHUNK_SMP4;
631 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
632 break;
633 case ZTR_TYPE_REGN:
634 ichunk = CHUNK_REGN;
635 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
636 if( NULL == parse_regn(ztr_tmp, &ztr_tmp->chunk[i], regn_hash) ){
637 delete_ztr(ztr_tmp);
638 return 1;
639 }
640 break;
641 default:
642 break;
643 }
644
645 if( ichunk > -1 ) {
646 chunk_count[ichunk]++;
647 count_mdata_keys(ztr_tmp, &ztr_tmp->chunk[i], ichunk, key_count, type_count);
648 }
649 }
650
651 }
652
653 if( ztr_tmp )
654 delete_ztr(ztr_tmp);
655
656 count++;
657 if( (level_mode == LEVEL_CHECK) && (count == 10) ){
658 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
659 srf_destroy(srf, 1);
660 return 0;
661 }
662
663 break;
664 }
665
666 case SRFB_INDEX: {
667 off_t pos = ftell(srf->fp);
668 if( trace_body_count ){
669 if( level_mode & LEVEL_NAME )
670 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
671 trace_body_count = 0;
672 }
673 printf( "Reading srf index block\n");
674 if (0 != srf_read_index_hdr(srf, &srf->hdr, 1)) {
675 srf_destroy(srf, 1);
676 fprintf(stderr, "Error reading srf index block header.\nExiting.\n");
677 exit(1);
678 }
679 /* Skip the index body */
680 fseeko(srf->fp, pos + srf->hdr.size, SEEK_SET);
681
682 break;
683 }
684
685 case SRFB_NULL_INDEX: {
686 uint64_t ilen;
687 if( trace_body_count ){
688 if( level_mode & LEVEL_NAME )
689 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
690 trace_body_count = 0;
691 }
692 printf( "Reading srf null index block\n");
693 /*
694 * Maybe the last 8 bytes of a the file (or previously was
695 * last 8 bytes prior to concatenating SRF files together).
696 * If so it's the index length and should always be 8 zeros.
697 */
698 if (1 != fread(&ilen, 8, 1, srf->fp)) {
699 srf_destroy(srf, 1);
700 fprintf(stderr, "Error reading srf null index block.\nExiting.\n");
701 exit(1);
702 }
703 if (ilen != 0) {
704 srf_destroy(srf, 1);
705 fprintf(stderr, "Invalid srf null index block.\nExiting.\n");
706 exit(1);
707 }
708
709 break;
710 }
711
712 default:
713 srf_destroy(srf, 1);
714 fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type);
715 exit(1);
716 }
717
718 }
719
720 if( trace_body_count ){
721 if( level_mode & LEVEL_NAME )
722 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
723 trace_body_count = 0;
724 }
725
726 /* the type should be -1 (EOF) */
727 if( type != -1 ) {
728 fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type);
729 exit(1);
730 }
731
732 /* are we really at the end of the srf file */
733 pos = ftell(srf->fp);
734 fseek(srf->fp, 0, SEEK_END);
735 if( pos != ftell(srf->fp) ){
736 fprintf(stderr, "srf file is corrupt\n");
737 exit(1);
738 }
739
740 srf_destroy(srf, 1);
741 return 0;
742 }
743
744 /* ------------------------------------------------------------------------ */
745
746 /*
747 * Main method.
748 */
main(int argc,char ** argv)749 int main(int argc, char **argv) {
750 int ifile, nfiles;
751 char *input = NULL;
752
753 int c;
754 int errflg = 0;
755 extern char *optarg;
756 extern int optind, optopt;
757
758 int level_mode = LEVEL_ALL;
759
760 long read_count[NREADS];
761 char *read_str[] = {READ_GOOD_STR, READ_BAD_STR, READ_TOTAL_STR};
762 long chunk_count[NCHUNKS];
763 uint64_t chunk_size[NCHUNKS];
764 uint4 chunk_type[] = {CHUNK_BASE_TYPE, CHUNK_CNF1_TYPE, CHUNK_CNF4_TYPE, CHUNK_SAMP_TYPE, CHUNK_SMP4_TYPE, CHUNK_REGN_TYPE};
765 long key_count[NCHUNKS][NKEYS];
766 char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR};
767 long type_count[NCHUNKS][NTYPES];
768 char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR};
769 int iread, ichunk, ikey, itype;
770
771 while ((c = getopt(argc, argv, "l:")) != -1) {
772 switch (c) {
773 case 'l':
774 if (1 != sscanf(optarg, "%d", &level_mode)) {
775 fprintf(stderr,
776 "Otion -%c requires an operand\n", optopt);
777 errflg++;
778 }
779 break;
780 case ':': /* -? without operand */
781 fprintf(stderr,
782 "Option -%c requires an operand\n", optopt);
783 errflg++;
784 break;
785 case '?':
786 fprintf(stderr,
787 "Unrecognised option: -%c\n", optopt);
788 errflg++;
789 }
790 }
791
792 if (errflg) {
793 usage(1);
794 }
795
796 nfiles = (argc-optind);
797 if( nfiles < 1 ){
798 fprintf(stderr, "Please specify input archive name(s).\n");
799 usage(1);
800 }
801
802 for (ifile=0; ifile<nfiles; ifile++) {
803 HashTable *regn_hash;
804 char bases[] = "ACGTN";
805 uint64_t base_count[5];
806 char type[5];
807
808 input = argv[optind+ifile];
809 printf("Reading archive %s.\n", input);
810
811 for (iread=0; iread<NREADS; iread++)
812 read_count[iread] = 0;
813
814 for (ichunk=0; ichunk<NCHUNKS; ichunk++) {
815 chunk_count[ichunk] = 0;
816 chunk_size[ichunk] = 0;
817 }
818
819 for (ichunk=0; ichunk<NCHUNKS; ichunk++)
820 for (ikey=0; ikey<NKEYS; ikey++)
821 key_count[ichunk][ikey] = 0;
822
823 for (ichunk=0; ichunk<NCHUNKS; ichunk++)
824 for (itype=0; itype<NTYPES; itype++)
825 type_count[ichunk][itype] = 0;
826
827 if (NULL == (regn_hash = HashTableCreate(0, HASH_DYNAMIC_SIZE|HASH_FUNC_JENKINS3))) {
828 return 1;
829 }
830
831 memset(base_count, 0, 5 * sizeof(uint64_t));
832
833 if( 0 == srf_info(input, level_mode, read_count,
834 chunk_count, chunk_size,
835 key_count, type_count, regn_hash, base_count) ){
836
837 /* read counts */
838 if( level_mode & LEVEL_READ ) {
839 for (iread=0; iread<NREADS; iread++) {
840 if( read_count[iread] )
841 printf("Reads: %s : %ld\n", read_str[iread], read_count[iread]);
842 }
843 }
844
845 /* chunk, key and type counts */
846 if( level_mode & LEVEL_CHUNK ) {
847 for (ichunk=0; ichunk<NCHUNKS; ichunk++) {
848 if( chunk_count[ichunk] ) {
849 printf("Chunk: %s : %ld %"PRId64"\n",
850 ZTR_BE2STR(chunk_type[ichunk], type),
851 chunk_count[ichunk], chunk_size[ichunk]);
852 for (ikey=0; ikey<NKEYS; ikey++) {
853 if(key_count[ichunk][ikey]) {
854 printf(" Mdata key: %s : %ld\n", keys_str[ikey], key_count[ichunk][ikey]);
855 if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) {
856 for (itype=0; itype<NTYPES; itype++)
857 if(type_count[ichunk][itype])
858 printf(" types: %s : %ld\n", types_str[itype], type_count[ichunk][itype]);
859 }
860 if (ikey == KEY_NAME && (ichunk == CHUNK_REGN)) {
861 int ibucket;
862 for (ibucket=0; ibucket<regn_hash->nbuckets; ibucket++) {
863 HashItem *hi;
864 for (hi = regn_hash->bucket[ibucket]; hi; hi = hi->next) {
865 regn_t *regn = (regn_t *)hi->data.p;
866 printf(" %s x%d\n", hi->key, regn->count);
867 }
868 }
869 }
870 }
871 }
872 }
873 }
874 }
875
876 /* base counts */
877 if( level_mode & LEVEL_BASE ) {
878 uint64_t total = 0;
879 int i;
880 for (i=0; i<5; i++) {
881 if( base_count[i] ){
882 printf("Bases: %c: %"PRId64"\n",
883 bases[i], base_count[i]);
884 total += base_count[i];
885 }
886 }
887 printf("Bases: TOTAL: %"PRId64"\n", total);
888 }
889 }
890 }
891
892 return 0;
893 }
894