1 /*
2  * Copyright (c) 2008-2010, 2013 Genome Research Ltd.
3  * Author(s): James Bonfield
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *    1. Redistributions of source code must retain the above copyright notice,
9  *       this list of conditions and the following disclaimer.
10  *
11  *    2. Redistributions in binary form must reproduce the above
12  *       copyright notice, this list of conditions and the following
13  *       disclaimer in the documentation and/or other materials provided
14  *       with the distribution.
15  *
16  *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17  *    Institute nor the names of its contributors may be used to endorse
18  *    or promote products derived from this software without specific
19  *    prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25  * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * This performs a linear (non-indexed) search for a trace in an SRF archive.
36  *
37  * It's not intended as a suitable production program or as a library of code
38  * to use, but as a test and benchmark statistic.
39  */
40 
41 #ifdef HAVE_CONFIG_H
42 #include "io_lib_config.h"
43 #endif
44 
45 #include <string.h>
46 #include <stdio.h>
47 #include <unistd.h>
48 #include <ctype.h>
49 #include <sys/stat.h>
50 #include <sys/types.h>
51 #include <io_lib/Read.h>
52 #include <io_lib/misc.h>
53 #include <io_lib/ztr.h>
54 #include <io_lib/srf.h>
55 #include <io_lib/hash_table.h>
56 #include <io_lib/xalloc.h>
57 
58 #define LEVEL_READ  (1 << 0)
59 #define LEVEL_CHUNK (1 << 1)
60 #define LEVEL_NAME  (1 << 2)
61 #define LEVEL_BASE  (1 << 3)
62 #define LEVEL_ALL   (LEVEL_READ | LEVEL_CHUNK | LEVEL_NAME | LEVEL_BASE );
63 
64 /* only checks the first 10 traces */
65 #define LEVEL_CHECK 255
66 
67 #define READ_GOOD  0
68 #define READ_BAD   1
69 #define READ_TOTAL 2
70 
71 #define NREADS 3
72 
73 #define READ_GOOD_STR   "GOOD"
74 #define READ_BAD_STR    "BAD"
75 #define READ_TOTAL_STR  "TOTAL"
76 
77 /* see ztr.h for a list of all possible ztr chunk types */
78 
79 #define CHUNK_BASE 0
80 #define CHUNK_CNF1 1
81 #define CHUNK_CNF4 2
82 #define CHUNK_SAMP 3
83 #define CHUNK_SMP4 4
84 #define CHUNK_REGN 5
85 
86 #define NCHUNKS 6
87 
88 #define CHUNK_BASE_TYPE ZTR_TYPE_BASE
89 #define CHUNK_CNF1_TYPE ZTR_TYPE_CNF1
90 #define CHUNK_CNF4_TYPE ZTR_TYPE_CNF4
91 #define CHUNK_SAMP_TYPE ZTR_TYPE_SAMP
92 #define CHUNK_SMP4_TYPE ZTR_TYPE_SMP4
93 #define CHUNK_REGN_TYPE ZTR_TYPE_REGN
94 
95 #define KEY_TYPE    0
96 #define KEY_VALTYPE 1
97 #define KEY_GROUP   2
98 #define KEY_OFFS    3
99 #define KEY_SCALE   4
100 #define KEY_COORD   5
101 #define KEY_NAME    6
102 
103 #define NKEYS 7
104 
105 #define KEY_TYPE_STR    "TYPE"
106 #define KEY_VALTYPE_STR "VALTYPE"
107 #define KEY_GROUP_STR   "GROUP"
108 #define KEY_OFFS_STR    "OFFS"
109 #define KEY_SCALE_STR   "SCALE"
110 #define KEY_COORD_STR   "COORD"
111 #define KEY_NAME_STR    "NAME"
112 
113 #define TYPE_PROC 0
114 #define TYPE_SLXI 1
115 #define TYPE_SLXN 2
116 #define TYPE_0FAM 3
117 #define TYPE_1CY3 4
118 #define TYPE_2TXR 5
119 #define TYPE_3CY5 6
120 
121 #define NTYPES 7
122 
123 #define TYPE_PROC_STR "PROC"
124 #define TYPE_SLXI_STR "SLXI"
125 #define TYPE_SLXN_STR "SLXN"
126 #define TYPE_0FAM_STR "0FAM"
127 #define TYPE_1CY3_STR "1CY3"
128 #define TYPE_2TXR_STR "2TXR"
129 #define TYPE_3CY5_STR "3CY5"
130 
131 #define MAX_REGIONS   4
132 
133 /* regn chunk */
134 typedef struct {
135     char coord;
136     char *region_names;
137     int nregions;
138     char *name[MAX_REGIONS];
139     char code[MAX_REGIONS];
140     int start[MAX_REGIONS];
141     int length[MAX_REGIONS];
142     int count;
143 } regn_t;
144 
145 /* ------------------------------------------------------------------------ */
146 /*
147  * Print usage message to stderr and exit with the given \"code\".
148  */
usage(int code)149 void usage(int code) {
150     fprintf(stderr, "Usage: srf_info [-level level_bitmap] input(s)\n");
151     fprintf(stderr, "Options:\n");
152     fprintf(stderr, "    -l level_bitmap \n");
153     fprintf(stderr, "              1\tCount of good/bad reads.\n");
154     fprintf(stderr, "              2\tCounts for selected chunk types.\n");
155     fprintf(stderr, "              4\tTrace count and trace name prefix for each trace_header.\n");
156     fprintf(stderr, "              8\tBase count.\n");
157     fprintf(stderr, "\n");
158 
159     exit(code);
160 }
161 
162 /*
163  * Parse the REGN chunk, add to regn HASH
164  *
165  * Returns corresponding HashItem * from regn Hash
166  */
parse_regn(ztr_t * z,ztr_chunk_t * chunk,HashTable * regn_hash)167 HashItem *parse_regn(ztr_t *z, ztr_chunk_t *chunk, HashTable *regn_hash) {
168     char key[1024];
169     char *name;
170     HashItem *hi;
171     regn_t *regn;
172     size_t l;
173 
174     uncompress_chunk(z, chunk);
175 
176     /* the hash key is a combination of the region names and boundaries */
177     name = ztr_lookup_mdata_value(z, chunk, "NAME");
178     l = snprintf(key, sizeof(key), "names=%s", name);
179     if( chunk->dlength ){
180         int nbndy = (chunk->dlength-1)/4;
181         uint4 *bndy = (uint4 *)(chunk->data+1);
182         int ibndy;
183 	for (ibndy=0; ibndy<nbndy; ibndy++) {
184             if( ibndy )
185                 l += snprintf(key + l, sizeof(key) - l,
186 			      ";%d", be_int4(bndy[ibndy]));
187             else
188                 l += snprintf(key + l, sizeof(key) - l,
189 			      " boundaries=%d", be_int4(bndy[ibndy]));
190         }
191     }
192 
193     if (NULL == (hi = (HashTableSearch(regn_hash, key, strlen(key))))) {
194         int iregion, nregions = 0;
195         char *coord;
196 	char *cp1;
197         uint4 bndy[MAX_REGIONS];
198         int ibndy, nbndy = 0;
199         HashData hd;
200 
201         if( NULL == (regn = (regn_t *)malloc(sizeof(regn_t)))) {
202 	    return NULL;
203 	}
204 
205 	coord = ztr_lookup_mdata_value(z, chunk, "COORD");
206 	regn->coord = (NULL == coord ? 'B' : *coord );
207 
208 	regn->region_names = strdup(name);
209 
210         cp1 = strtok (regn->region_names,";");
211         while(cp1) {
212             char *cp2;
213             if(NULL == (cp2 = strchr(cp1,':'))) {
214                 fprintf(stderr, "Invalid region name/code pair %s\n", cp1);
215                 return NULL;
216             }
217             *cp2++ = '\0';
218             regn->name[nregions] = cp1;
219             regn->code[nregions] = *cp2;
220             nregions++;
221             cp1 = strtok (NULL, ";");
222         }
223 
224         regn->nregions = nregions;
225 
226 	if( chunk->dlength ) {
227             nbndy = (chunk->dlength-1)/4;
228             memcpy(bndy, chunk->data+1, chunk->dlength-1);
229 	}
230 
231         for( iregion=0, ibndy=0; iregion<nregions; iregion++) {
232             /* start = (start + length of previous region) or 0 if no previous region */
233             /* length = (next boundary - start of region) or -1 if no next boundary */
234             if( regn->code[iregion] == 'E' ){
235                 /* no sequence, length = 0 */
236                 regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
237                 regn->length[iregion] = 0;
238             }else{
239                 if( ibndy > nbndy ){
240                     fprintf(stderr, "More name/code pairs than boundaries\n");
241                     return NULL;
242                 }
243                 regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
244                 regn->length[iregion] = (ibndy == nbndy ? -1 : (be_int4(bndy[ibndy])-regn->start[iregion]));
245                 ibndy++;
246             }
247         }
248 
249         regn->count = 1;
250 
251 	hd.p = regn;
252 	if (NULL == (hi = HashTableAdd(regn_hash, key, strlen(key), hd, NULL))) {
253 	    free(regn->region_names);
254 	    free(regn);
255 	    return NULL;
256 	}
257     } else {
258 	regn = (regn_t *)(hi->data.p);
259 	regn->count++;
260     }
261 
262     return hi;
263 }
264 
265 /*
266  * Parse the sequence
267  *
268  * Returns 0 on success.
269  */
parse_base(ztr_t * z,ztr_chunk_t * chunk,uint64_t * base_count)270 int parse_base(ztr_t *z, ztr_chunk_t *chunk, uint64_t *base_count) {
271     int i;
272 
273     uncompress_chunk(z, chunk);
274 
275     for (i = 1; i < chunk->dlength; i++) {
276 	char base = chunk->data[i];
277         uint1 key;
278 	switch (base) {
279 	case 'A': case 'a':
280             key = 0;
281             break;
282 	case 'C': case 'c':
283             key = 1;
284             break;
285 	case 'G': case 'g':
286             key = 2;
287             break;
288 	case 'T': case 't':
289             key = 3;
290             break;
291 	default:
292             key = 4;
293             break;
294 	}
295         base_count[key]++;
296     }
297 
298     return 0;
299 }
300 
301 /*
302  * count the mdata keys
303  *
304  * Returns 0 on success.
305  */
count_mdata_keys(ztr_t * z,ztr_chunk_t * chunk,int ichunk,long key_count[NCHUNKS][NKEYS],long type_count[NCHUNKS][NTYPES])306 int count_mdata_keys(ztr_t *z, ztr_chunk_t *chunk, int ichunk, long key_count[NCHUNKS][NKEYS], long type_count[NCHUNKS][NTYPES]) {
307     char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR};
308     char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR};
309     int ikey, itype;
310 
311     if (z->header.version_major > 1 ||
312 	z->header.version_minor >= 2) {
313 	/* ZTR format 1.2 onwards */
314 
315 	char *cp = chunk->mdata;
316 	int32_t dlen = chunk->mdlength;
317 
318 	/*
319 	 * NB: we may wish to rewrite this using a dedicated state machine
320 	 * instead of strlen/strcmp as this currently assumes the meta-
321 	 * data is correctly formatted, which we cannot assume as the
322 	 * metadata is external and outside of our control.
323 	 * Passing in non-nul terminated strings could crash this code.
324 	 */
325 	while (dlen > 0) {
326 	    size_t l;
327 
328 	    /* key */
329 	    l = strlen(cp);
330 	    for (ikey=0; ikey<NKEYS; ikey++)
331 		if(0 == strcmp(cp, keys_str[ikey]))
332 		    break;
333 
334 	    cp += l+1;
335 	    dlen -= l+1;
336 
337 	    /* value */
338 	    if (ikey < NKEYS)
339 		key_count[ichunk][ikey]++;
340 
341 	    /* for the type key check the value */
342 	    if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) {
343 		for (itype=0; itype<NTYPES; itype++)
344 		    if(0 == strcmp(cp, types_str[itype]))
345 			break;
346 		if(itype < NTYPES)
347 		    type_count[ichunk][itype]++;
348 	    }
349 
350 	    l = strlen(cp);
351 	    cp += l+1;
352 	    dlen -= l+1;
353 	}
354 
355     } else {
356 	/* v1.1 and before only supported a few types, specifically coded
357 	 * per chunk type.
358 	 */
359 
360 	switch (chunk->type) {
361 	case ZTR_TYPE_SAMP:
362 	case ZTR_TYPE_SMP4:
363 	    key_count[ichunk][KEY_TYPE]++;
364 	    for (itype=0; itype<NTYPES; itype++)
365 		if(0 == strcmp(chunk->mdata, types_str[itype])) {
366 		    type_count[ichunk][itype]++;
367 		}
368 	    break;
369 
370 	default:
371 	    break;
372 	}
373     }
374 
375     return 0;
376 }
377 
378 /*
379  * As per partial_decode_ztr in srf.c, but without uncompress_ztr.
380  */
partial_decode_ztr2(srf_t * srf,mFILE * mf,ztr_t * z)381 ztr_t *partial_decode_ztr2(srf_t *srf, mFILE *mf, ztr_t *z) {
382     ztr_t *ztr;
383     ztr_chunk_t *chunk;
384     long pos = 0;
385 
386     if (z) {
387 	/* Use existing ZTR object => already loaded header */
388 	ztr = z;
389 
390     } else {
391 	/* Allocate or use existing ztr */
392 	if (NULL == (ztr = new_ztr()))
393 	    return NULL;
394 
395 	/* Read the header */
396 	if (-1 == ztr_read_header(mf, &ztr->header)) {
397 	    if (!z)
398 		delete_ztr(ztr);
399 	    mrewind(mf);
400 	    return NULL;
401 	}
402 
403 	/* Check magic number and version */
404 	if (memcmp(ztr->header.magic, ZTR_MAGIC, 8) != 0) {
405 	    if (!z)
406 		delete_ztr(ztr);
407 	    mrewind(mf);
408 	    return NULL;
409 	}
410 
411 	if (ztr->header.version_major != ZTR_VERSION_MAJOR) {
412 	    if (!z)
413 		delete_ztr(ztr);
414 	    mrewind(mf);
415 	    return NULL;
416 	}
417     }
418 
419     /* Load chunks */
420     pos = mftell(mf);
421     while ((chunk = ztr_read_chunk_hdr(mf))) {
422 	chunk->data = (char *)xmalloc(chunk->dlength);
423 	if (chunk->dlength != mfread(chunk->data, 1, chunk->dlength, mf))
424 	    break;
425 
426 	ztr->nchunks++;
427 	ztr->chunk = (ztr_chunk_t *)xrealloc(ztr->chunk, ztr->nchunks *
428 					     sizeof(ztr_chunk_t));
429 	memcpy(&ztr->chunk[ztr->nchunks-1], chunk, sizeof(*chunk));
430 	xfree(chunk);
431 	pos = mftell(mf);
432     }
433 
434     /*
435      * At this stage we're 'pos' into the mFILE mf with any remainder being
436      * a partial block.
437      */
438     if (0 == ztr->nchunks) {
439 	if (!z)
440 	    delete_ztr(ztr);
441 	mrewind(mf);
442 	return NULL;
443     }
444 
445     /* Ensure we exit at the start of a ztr CHUNK */
446     mfseek(mf, pos, SEEK_SET);
447 
448     /* If this is the header part, ensure we uncompress and init. data */
449     if (!z) {
450 	/* Force caching of huffman code_sets */
451 	ztr_find_hcode(ztr, CODE_USER);
452 
453 	/* And uncompress the rest */
454 	uncompress_ztr(ztr);
455     }
456 
457     return ztr;
458 }
459 
460 /*
461  * Given the archive name and the level_mode
462  * generate information about the archive
463  *
464  * Note the generated srf file is NOT indexed
465  *
466  * Returns 0 on success.
467  */
srf_info(char * input,int level_mode,long * read_count,long * chunk_count,uint64_t * chunk_size,long key_count[NCHUNKS][NKEYS],long type_count[NCHUNKS][NTYPES],HashTable * regn_hash,uint64_t * base_count)468 int srf_info(char *input, int level_mode, long *read_count, long *chunk_count,
469 	     uint64_t *chunk_size, long key_count[NCHUNKS][NKEYS],
470 	     long type_count[NCHUNKS][NTYPES], HashTable *regn_hash,
471 	     uint64_t *base_count) {
472     srf_t *srf;
473     off_t pos;
474     int type;
475     int count = 0;
476     long trace_body_count = 0;
477     char name[1024];
478 
479     if (NULL == (srf = srf_open(input, "rb"))) {
480 	perror(input);
481 	return 1;
482     }
483 
484     while ((type = srf_next_block_type(srf)) >= 0) {
485 
486       switch (type) {
487 	case SRFB_CONTAINER:
488 	    if( trace_body_count ){
489 		if( level_mode & LEVEL_NAME )
490 		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
491 		trace_body_count = 0;
492 	    }
493 	    if (0 != srf_read_cont_hdr(srf, &srf->ch)) {
494 		fprintf(stderr, "Error reading container header.\nExiting.\n");
495 		exit(1);
496 	    }
497 	    break;
498 
499         case SRFB_XML:
500 	    if( trace_body_count ){
501 		if( level_mode & LEVEL_NAME )
502 		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
503 		trace_body_count = 0;
504 	    }
505 	    if (0 != srf_read_xml(srf, &srf->xml)) {
506 		fprintf(stderr, "Error reading XML.\nExiting.\n");
507 		exit(1);
508 	    }
509 	    break;
510 
511 	case SRFB_TRACE_HEADER:
512 	    if( trace_body_count ){
513 		if( level_mode & LEVEL_NAME )
514 		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
515 		trace_body_count = 0;
516 	    }
517 	    if (0 != srf_read_trace_hdr(srf, &srf->th)) {
518 		fprintf(stderr, "Error reading trace header.\nExiting.\n");
519 		exit(1);
520 	    }
521 
522 	    if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) )
523 		break;
524 
525 	    /* Decode ZTR chunks in the header */
526 	    if (srf->mf)
527 		mfdestroy(srf->mf);
528 
529 	    srf->mf = mfcreate(NULL, 0);
530 	    if (srf->th.trace_hdr_size)
531 		mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, srf->mf);
532 	    if (srf->ztr)
533 		delete_ztr(srf->ztr);
534 	    mrewind(srf->mf);
535 
536 	    if (NULL != (srf->ztr = partial_decode_ztr(srf, srf->mf, NULL))) {
537 		srf->mf_pos = mftell(srf->mf);
538 	    } else {
539 		/* Maybe not enough to decode or no headerBlob. */
540 		/* So delay until decoding the body. */
541 		srf->mf_pos = 0;
542 	    }
543 	    mfseek(srf->mf, 0, SEEK_END);
544 	    srf->mf_end = mftell(srf->mf);
545 
546 	    break;
547 
548 	case SRFB_TRACE_BODY: {
549 	    srf_trace_body_t old_tb;
550 	    ztr_t *ztr_tmp;
551             int no_trace = (level_mode & (LEVEL_CHUNK | LEVEL_BASE) ? 0 : 1);
552 
553 	    if (0 != srf_read_trace_body(srf, &old_tb, no_trace)) {
554 		fprintf(stderr, "Error reading trace body.\nExiting.\n");
555 		exit(1);
556 	    }
557 
558 	    if (-1 == construct_trace_name(srf->th.id_prefix,
559 					   (unsigned char *)old_tb.read_id,
560 					   old_tb.read_id_length,
561 					   name, 512)) {
562 		fprintf(stderr, "Error constructing trace name.\nExiting.\n");
563 		exit(1);
564 	    }
565 
566 	    trace_body_count++;
567 	    if( 1 == trace_body_count ){
568 		if( level_mode & LEVEL_NAME )
569 		    printf( "trace_name: %s + %s", srf->th.id_prefix, name+strlen(srf->th.id_prefix));
570 	    }
571 
572 	    read_count[READ_TOTAL]++;
573 
574 	    if (old_tb.flags & SRF_READ_FLAG_BAD_MASK ){
575 		read_count[READ_BAD]++;
576 	    } else {
577 		read_count[READ_GOOD]++;
578 	    }
579 
580 	    if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) )
581 		break;
582 
583 	    if (!srf->mf) {
584 		fprintf(stderr, "Error reading trace body.\nExiting.\n");
585 		exit(1);
586 	    }
587 
588 	    mfseek(srf->mf, srf->mf_end, SEEK_SET);
589 	    if (old_tb.trace_size) {
590 		mfwrite(old_tb.trace, 1, old_tb.trace_size, srf->mf);
591 		free(old_tb.trace);
592 		old_tb.trace = NULL;
593 	    }
594 
595 	    mftruncate(srf->mf, mftell(srf->mf));
596 	    mfseek(srf->mf, srf->mf_pos, SEEK_SET);
597 
598 	    if (srf->ztr)
599 		ztr_tmp = ztr_dup(srf->ztr); /* inefficient, but simple */
600 	    else
601 		ztr_tmp = NULL;
602 
603 	    if ((ztr_tmp = partial_decode_ztr(srf, srf->mf, ztr_tmp))) {
604 		int i;
605 		for (i=0; i<ztr_tmp->nchunks; i++) {
606 		    int ichunk = -1;
607 
608 		    switch (ztr_tmp->chunk[i].type) {
609 		    case ZTR_TYPE_BASE:
610 			ichunk = CHUNK_BASE;
611 			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
612 			if( parse_base(ztr_tmp, &ztr_tmp->chunk[i], base_count) ){
613 			    delete_ztr(ztr_tmp);
614 			    return 1;
615 			}
616 			break;
617 		    case ZTR_TYPE_CNF1:
618 			ichunk = CHUNK_CNF1;
619 			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
620 			break;
621 		    case ZTR_TYPE_CNF4:
622 			ichunk = CHUNK_CNF4;
623 			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
624 			break;
625 		    case ZTR_TYPE_SAMP:
626 			ichunk = CHUNK_SAMP;
627 			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
628 			break;
629 		    case ZTR_TYPE_SMP4:
630 			ichunk = CHUNK_SMP4;
631 			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
632 			break;
633 		    case ZTR_TYPE_REGN:
634 			ichunk = CHUNK_REGN;
635 			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
636 			if( NULL == parse_regn(ztr_tmp, &ztr_tmp->chunk[i], regn_hash) ){
637 			    delete_ztr(ztr_tmp);
638 			    return 1;
639 			}
640 			break;
641 		    default:
642 			break;
643 		    }
644 
645 		    if( ichunk > -1 ) {
646 			chunk_count[ichunk]++;
647 			count_mdata_keys(ztr_tmp, &ztr_tmp->chunk[i], ichunk, key_count, type_count);
648 		    }
649 		}
650 
651 	    }
652 
653 	    if( ztr_tmp )
654 		delete_ztr(ztr_tmp);
655 
656 	    count++;
657 	    if( (level_mode == LEVEL_CHECK) && (count == 10) ){
658 		printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
659 		srf_destroy(srf, 1);
660 		return 0;
661 	    }
662 
663 	    break;
664         }
665 
666 	case SRFB_INDEX: {
667             off_t pos = ftell(srf->fp);
668 	    if( trace_body_count ){
669 		if( level_mode & LEVEL_NAME )
670 		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
671 		trace_body_count = 0;
672 	    }
673             printf( "Reading srf index block\n");
674 	    if (0 != srf_read_index_hdr(srf, &srf->hdr, 1)) {
675 		srf_destroy(srf, 1);
676 		fprintf(stderr, "Error reading srf index block header.\nExiting.\n");
677 		exit(1);
678 	    }
679             /* Skip the index body */
680 	    fseeko(srf->fp, pos + srf->hdr.size, SEEK_SET);
681 
682             break;
683         }
684 
685 	case SRFB_NULL_INDEX: {
686             uint64_t ilen;
687 	    if( trace_body_count ){
688 		if( level_mode & LEVEL_NAME )
689 		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
690 		trace_body_count = 0;
691 	    }
692             printf( "Reading srf null index block\n");
693 	    /*
694 	     * Maybe the last 8 bytes of a the file (or previously was
695 	     * last 8 bytes prior to concatenating SRF files together).
696 	     * If so it's the index length and should always be 8 zeros.
697 	     */
698             if (1 != fread(&ilen, 8, 1, srf->fp)) {
699 		srf_destroy(srf, 1);
700 		fprintf(stderr, "Error reading srf null index block.\nExiting.\n");
701 		exit(1);
702             }
703             if (ilen != 0) {
704 		srf_destroy(srf, 1);
705 		fprintf(stderr, "Invalid srf null index block.\nExiting.\n");
706 		exit(1);
707             }
708 
709             break;
710         }
711 
712         default:
713             srf_destroy(srf, 1);
714 	    fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type);
715 	    exit(1);
716 	}
717 
718     }
719 
720     if( trace_body_count ){
721         if( level_mode & LEVEL_NAME )
722             printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
723         trace_body_count = 0;
724     }
725 
726     /* the type should be -1 (EOF) */
727     if( type != -1 ) {
728         fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type);
729 	exit(1);
730     }
731 
732     /* are we really at the end of the srf file */
733     pos = ftell(srf->fp);
734     fseek(srf->fp, 0, SEEK_END);
735     if( pos != ftell(srf->fp) ){
736         fprintf(stderr, "srf file is corrupt\n");
737 	exit(1);
738     }
739 
740     srf_destroy(srf, 1);
741     return 0;
742 }
743 
744 /* ------------------------------------------------------------------------ */
745 
746 /*
747  * Main method.
748  */
main(int argc,char ** argv)749 int main(int argc, char **argv) {
750     int ifile, nfiles;
751     char *input = NULL;
752 
753     int c;
754     int errflg = 0;
755     extern char *optarg;
756     extern int optind, optopt;
757 
758     int level_mode = LEVEL_ALL;
759 
760     long read_count[NREADS];
761     char *read_str[] = {READ_GOOD_STR, READ_BAD_STR, READ_TOTAL_STR};
762     long chunk_count[NCHUNKS];
763     uint64_t chunk_size[NCHUNKS];
764     uint4 chunk_type[] = {CHUNK_BASE_TYPE, CHUNK_CNF1_TYPE, CHUNK_CNF4_TYPE, CHUNK_SAMP_TYPE, CHUNK_SMP4_TYPE, CHUNK_REGN_TYPE};
765     long key_count[NCHUNKS][NKEYS];
766     char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR};
767     long type_count[NCHUNKS][NTYPES];
768     char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR};
769     int iread, ichunk, ikey, itype;
770 
771     while ((c = getopt(argc, argv, "l:")) != -1) {
772         switch (c) {
773         case 'l':
774             if (1 != sscanf(optarg, "%d", &level_mode)) {
775                 fprintf(stderr,
776                         "Otion -%c requires an operand\n", optopt);
777                 errflg++;
778             }
779 	    break;
780         case ':':       /* -? without operand */
781             fprintf(stderr,
782                     "Option -%c requires an operand\n", optopt);
783             errflg++;
784             break;
785         case '?':
786             fprintf(stderr,
787                     "Unrecognised option: -%c\n", optopt);
788             errflg++;
789         }
790     }
791 
792     if (errflg) {
793 	usage(1);
794     }
795 
796     nfiles = (argc-optind);
797     if( nfiles < 1 ){
798         fprintf(stderr, "Please specify input archive name(s).\n");
799         usage(1);
800     }
801 
802     for (ifile=0; ifile<nfiles; ifile++) {
803         HashTable *regn_hash;
804         char bases[] = "ACGTN";
805         uint64_t base_count[5];
806         char type[5];
807 
808         input = argv[optind+ifile];
809         printf("Reading archive %s.\n", input);
810 
811         for (iread=0; iread<NREADS; iread++)
812 	    read_count[iread] = 0;
813 
814         for (ichunk=0; ichunk<NCHUNKS; ichunk++) {
815 	    chunk_count[ichunk] = 0;
816 	    chunk_size[ichunk] = 0;
817 	}
818 
819         for (ichunk=0; ichunk<NCHUNKS; ichunk++)
820             for (ikey=0; ikey<NKEYS; ikey++)
821                 key_count[ichunk][ikey] = 0;
822 
823         for (ichunk=0; ichunk<NCHUNKS; ichunk++)
824             for (itype=0; itype<NTYPES; itype++)
825                 type_count[ichunk][itype] = 0;
826 
827         if (NULL == (regn_hash = HashTableCreate(0, HASH_DYNAMIC_SIZE|HASH_FUNC_JENKINS3))) {
828 	    return 1;
829         }
830 
831         memset(base_count, 0, 5 * sizeof(uint64_t));
832 
833         if( 0 == srf_info(input, level_mode, read_count,
834 			  chunk_count, chunk_size,
835 			  key_count, type_count, regn_hash, base_count) ){
836 
837             /* read counts */
838             if( level_mode & LEVEL_READ ) {
839                 for (iread=0; iread<NREADS; iread++) {
840                     if( read_count[iread] )
841 			printf("Reads: %s : %ld\n", read_str[iread], read_count[iread]);
842                 }
843             }
844 
845             /* chunk, key and type counts */
846             if( level_mode & LEVEL_CHUNK ) {
847                 for (ichunk=0; ichunk<NCHUNKS; ichunk++) {
848                     if( chunk_count[ichunk] ) {
849                         printf("Chunk: %s : %ld %"PRId64"\n",
850 			       ZTR_BE2STR(chunk_type[ichunk], type),
851 			       chunk_count[ichunk], chunk_size[ichunk]);
852                         for (ikey=0; ikey<NKEYS; ikey++) {
853                             if(key_count[ichunk][ikey]) {
854                                 printf("  Mdata key: %s : %ld\n", keys_str[ikey], key_count[ichunk][ikey]);
855                                 if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) {
856                                     for (itype=0; itype<NTYPES; itype++)
857                                         if(type_count[ichunk][itype])
858                                             printf("    types: %s : %ld\n", types_str[itype], type_count[ichunk][itype]);
859                                 }
860                                 if (ikey == KEY_NAME && (ichunk == CHUNK_REGN)) {
861                                     int ibucket;
862                                     for (ibucket=0; ibucket<regn_hash->nbuckets; ibucket++) {
863                                         HashItem *hi;
864                                         for (hi = regn_hash->bucket[ibucket]; hi; hi = hi->next) {
865                                             regn_t *regn = (regn_t *)hi->data.p;
866                                             printf("    %s x%d\n", hi->key, regn->count);
867                                         }
868                                     }
869                                 }
870                             }
871                         }
872                     }
873                 }
874             }
875 
876             /* base counts */
877             if( level_mode & LEVEL_BASE ) {
878                 uint64_t total = 0;
879                 int i;
880                 for (i=0; i<5; i++) {
881                     if( base_count[i] ){
882                         printf("Bases: %c: %"PRId64"\n",
883 			       bases[i], base_count[i]);
884                         total += base_count[i];
885                     }
886                 }
887                 printf("Bases: TOTAL: %"PRId64"\n", total);
888             }
889         }
890     }
891 
892     return 0;
893 }
894