1 /* 2 * Copyright (c) 2007-2009 Genome Research Ltd. 3 * Author(s): James Bonfield 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above 12 * copyright notice, this list of conditions and the following 13 * disclaimer in the documentation and/or other materials provided 14 * with the distribution. 15 * 16 * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 * Institute nor the names of its contributors may be used to endorse 18 * or promote products derived from this software without specific 19 * prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef _SRF_H_ 35 #define _SRF_H_ 36 37 #include "io_lib/hash_table.h" 38 #include "io_lib/ztr.h" 39 #include "io_lib/mFILE.h" 40 41 #define SRF_MAGIC "SSRF" 42 #define SRF_VERSION "1.3" 43 44 #define SRFB_CONTAINER 'S' 45 #define SRFB_XML 'X' 46 #define SRFB_TRACE_HEADER 'H' 47 #define SRFB_TRACE_BODY 'R' 48 #define SRFB_INDEX 'I' 49 50 /* Lack of index => 8 zero bytes at end of file to indicate zero length */ 51 #define SRFB_NULL_INDEX '\0' 52 53 /*--- Public structures */ 54 55 /* Container header - several per file */ 56 typedef struct { 57 int block_type; 58 char version[256]; 59 char container_type; 60 char base_caller[256]; 61 char base_caller_version[256]; 62 } srf_cont_hdr_t; 63 64 /* Trace header - several per container */ 65 typedef struct { 66 int block_type; 67 char read_prefix_type; 68 char id_prefix[256]; 69 uint32_t trace_hdr_size; 70 unsigned char *trace_hdr; 71 } srf_trace_hdr_t; 72 73 /* Trace body - several per trace header */ 74 typedef struct { 75 int block_type; 76 int read_id_length; 77 char read_id[256]; 78 unsigned char flags; 79 uint32_t trace_size; 80 unsigned char *trace; 81 } srf_trace_body_t; 82 83 /* XML - NCBI TraceInfo data block */ 84 typedef struct { 85 uint32_t xml_len; 86 char *xml; 87 } srf_xml_t; 88 89 #define SRF_READ_FLAG_BAD_MASK (1<<0) 90 #define SRF_READ_FLAG_WITHDRAWN_MASK (1<<1) 91 #define SRF_READ_FLAG_USER_MASK (7<<5) 92 93 /* Indexing */ 94 typedef struct { 95 char magic[4]; 96 char version[4]; 97 uint64_t size; 98 uint32_t n_container; 99 uint32_t n_data_block_hdr; 100 uint64_t n_buckets; 101 int8_t index_type; 102 int8_t dbh_pos_stored_sep; 103 char dbh_file[256]; 104 char cont_file[256]; 105 int index_hdr_sz; /* size of the above data on disk */ 106 } srf_index_hdr_t; 107 108 /* In-memory index itself */ 109 #define SRF_INDEX_NAME_BLOCK_SIZE 10000000 110 111 typedef struct { 112 size_t used; 113 size_t space; 114 char *names; 115 } srf_name_block_t; 116 117 typedef struct { 118 char ch_file[PATH_MAX+1]; 119 char th_file[PATH_MAX+1]; 120 Array ch_pos; 121 Array th_pos; 122 Array name_blocks; 123 int dbh_pos_stored_sep; 124 HashTable *db_hash; 125 } srf_index_t; 126 127 /* Master SRF object */ 128 typedef struct { 129 FILE *fp; 130 131 /* Cached copies of each of the most recent chunk types loaded */ 132 srf_cont_hdr_t ch; 133 srf_trace_hdr_t th; 134 srf_trace_body_t tb; 135 srf_xml_t xml; 136 srf_index_hdr_t hdr; 137 138 /* Private: cached data for use by srf_next_ztr */ 139 ztr_t *ztr; 140 mFILE *mf; 141 long mf_pos, mf_end; 142 } srf_t; 143 144 #define SRF_INDEX_MAGIC "Ihsh" 145 #define SRF_INDEX_VERSION "1.01" 146 147 148 /*--- Initialisation */ 149 srf_t *srf_create(FILE *fp); 150 srf_t *srf_open(char *fn, char *mode); 151 void srf_destroy(srf_t *srf, int auto_close); 152 153 /*--- Base type I/O methods */ 154 155 int srf_write_pstring(srf_t *srf, char *str); 156 int srf_write_pstringb(srf_t *srf, char *str, int length); 157 int srf_read_pstring(srf_t *srf, char *str); 158 159 int srf_read_uint32(srf_t *srf, uint32_t *val); 160 int srf_write_uint32(srf_t *srf, uint32_t val); 161 162 int srf_read_uint64(srf_t *srf, uint64_t *val); 163 int srf_write_uint64(srf_t *srf, uint64_t val); 164 165 166 /*--- Mid level I/O - srf block */ 167 srf_cont_hdr_t *srf_construct_cont_hdr(srf_cont_hdr_t *ch, 168 char *bc, 169 char *bc_version); 170 void srf_destroy_cont_hdr(srf_cont_hdr_t *ch); 171 int srf_read_cont_hdr(srf_t *srf, srf_cont_hdr_t *ch); 172 int srf_write_cont_hdr(srf_t *srf, srf_cont_hdr_t *ch); 173 174 int srf_read_xml(srf_t *srf, srf_xml_t *xml); 175 int srf_write_xml(srf_t *srf, srf_xml_t *xml); 176 177 srf_trace_hdr_t *srf_construct_trace_hdr(srf_trace_hdr_t *th, 178 char *prefix, 179 unsigned char *header, 180 uint32_t header_sz); 181 void srf_destroy_trace_hdr(srf_trace_hdr_t *th); 182 int srf_read_trace_hdr(srf_t *srf, srf_trace_hdr_t *th); 183 int srf_write_trace_hdr(srf_t *srf, srf_trace_hdr_t *th); 184 185 srf_trace_body_t *srf_construct_trace_body(srf_trace_body_t *th, 186 char *suffix, 187 int suffix_len, 188 unsigned char *body, 189 uint32_t body_size, 190 unsigned char flags); 191 void srf_destroy_trace_body(srf_trace_body_t *th); 192 int srf_write_trace_body(srf_t *srf, srf_trace_body_t *th); 193 int srf_read_trace_body(srf_t *srf, srf_trace_body_t *th, int no_trace); 194 195 int srf_read_index_hdr(srf_t *srf, srf_index_hdr_t *hdr, int no_seek); 196 int srf_write_index_hdr(srf_t *srf, srf_index_hdr_t *hdr); 197 srf_index_t *srf_index_create(char *ch_file, char *th_file, int dbh_sep); 198 void srf_index_destroy(srf_index_t *idx); 199 void srf_index_stats(srf_index_t *idx, FILE *fp); 200 int srf_index_add_cont_hdr(srf_index_t *idx, uint64_t pos); 201 int srf_index_add_trace_hdr(srf_index_t *idx, uint64_t pos); 202 int srf_index_add_trace_body(srf_index_t *idx, char *name, uint64_t pos); 203 int srf_index_write(srf_t *srf, srf_index_t *idx); 204 205 /*--- Higher level I/O functions */ 206 mFILE *srf_next_trace(srf_t *srf, char *name); 207 ztr_t *srf_next_ztr_flags(srf_t *srf, char *name, int filter_mask, int *flags); 208 ztr_t *srf_next_ztr(srf_t *srf, char *name, int filter_mask); 209 210 ztr_t *partial_decode_ztr(srf_t *srf, mFILE *mf, ztr_t *z); 211 ztr_t *ztr_dup(ztr_t *src); 212 213 int srf_next_block_type(srf_t *srf); /* peek ahead */ 214 int srf_next_block_details(srf_t *srf, uint64_t *pos, char *name); 215 216 int srf_find_trace(srf_t *srf, char *trace, 217 uint64_t *cpos, uint64_t *hpos, uint64_t *dpos); 218 219 int construct_trace_name(char *fmt, 220 unsigned char *suffix, int suffix_len, 221 char *name, int name_len); 222 223 #endif /* _SRF_H_ */ 224