1 /* Unaligned ncbi sequence file i/o. 2 */ 3 #ifndef eslSQIO_NCBI_INCLUDED 4 #define eslSQIO_NCBI_INCLUDED 5 #include "esl_config.h" 6 7 #include <stdio.h> 8 #ifdef HAVE_SYS_TYPES_H 9 #include <sys/types.h> 10 #endif 11 12 #include "esl_sq.h" 13 #include "esl_sqio.h" 14 15 /* forward declaration */ 16 struct esl_sqio_s; 17 18 /* set the max residue count to 1 meg when reading a block */ 19 #define MAX_RESIDUE_COUNT (1024 * 1024) 20 21 #define MAX_DB_VOLUMES 100 22 23 /* ESL_SQNCBI_VOLUME: 24 * Information for the volume 25 */ 26 typedef struct esl_sqncbi_vol_s { 27 char *name; /* name of the volume */ 28 29 uint32_t start_seq; /* starting sequence number */ 30 uint32_t end_seq; /* ending sequence number */ 31 32 uint32_t hdr_off; /* disk offset in .pin to header index */ 33 uint32_t seq_off; /* disk offset to .pin to sequence index */ 34 uint32_t amb_off; /* disk offset to .pin to ambiguous index */ 35 } ESL_SQNCBI_VOLUME; 36 37 /* ESL_SQNCBI: 38 * An open sequence file for reading. 39 */ 40 typedef struct esl_sqncbi_s { 41 FILE *fppin; /* Open .pin file ptr */ 42 FILE *fpphr; /* Open .phr file ptr */ 43 FILE *fppsq; /* Open .psq file ptr */ 44 char errbuf[eslERRBUFSIZE];/* parse error mesg. Size must match msa.h */ 45 46 char *title; /* database title */ 47 int version; /* database version */ 48 char *timestamp; /* time stamp of database creation */ 49 50 uint32_t num_seq; /* number of sequences in the database */ 51 uint64_t total_res; /* total number of residues */ 52 uint32_t max_seq; /* longest sequence in the database */ 53 54 uint32_t hdr_off; /* disk offset in .pin to header index */ 55 uint32_t seq_off; /* disk offset to .pin to sequence index */ 56 uint32_t amb_off; /* disk offset to .pin to ambiguous index */ 57 58 int index; /* current sequence index in the database */ 59 uint32_t vol_index; /* current volume index (-1 if no volumes) */ 60 uint32_t roff; /* record offset (start of header) */ 61 uint32_t hoff; /* offset to last byte of header */ 62 uint32_t doff; /* data offset (start of sequence data) */ 63 uint32_t eoff; /* offset to last byte of sequence */ 64 65 uint32_t index_start; /* start of indexes currently loaded */ 66 uint32_t index_end; /* end of indexes currently loaded */ 67 uint32_t *hdr_indexes; /* block of header indexes from .pin */ 68 uint32_t *seq_indexes; /* block of header indexes from .pin */ 69 uint32_t *amb_indexes; /* block of header indexes from .pin */ 70 71 /* volume information */ 72 uint32_t volumes; /* number of volumes */ 73 ESL_SQNCBI_VOLUME vols[MAX_DB_VOLUMES]; 74 75 /* information for the current header */ 76 unsigned char *hdr_buf; /* buffer for holding unparsed header */ 77 unsigned char *hdr_ptr; /* current parser position */ 78 int hdr_alloced; /* size of the allocated buffer */ 79 80 char *name_ptr; /* pointer to name NOT NULL TERMINATED */ 81 int32_t name_size; /* length of the name */ 82 char *acc_ptr; /* pointer to accession NOT NULL TERMINATED */ 83 int32_t acc_size; /* length of the accession */ 84 int32_t int_id; /* integer sequence id */ 85 char *str_id_ptr; /* pointer to id NOT NULL TERMINATED */ 86 int32_t str_id_size; /* length of the id */ 87 88 /* information on the current sequence */ 89 uint32_t seq_apos; /* position of ambiguity table */ 90 uint32_t seq_alen; /* size of ambiguity table */ 91 uint32_t seq_cpos; /* current position in ambiguity table */ 92 int32_t seq_L; /* true sequence length */ 93 94 /* alphabet used to convert ncbi to hmmer to ascii */ 95 int alphatype; /* amino or dna */ 96 char *alphasym; /* string of residues */ 97 98 } ESL_SQNCBI_DATA; 99 100 101 extern int esl_sqncbi_Open(char *seqfile, int format, struct esl_sqio_s *sqfp); 102 103 #endif /*eslSQIO_NCBI_INCLUDED*/ 104 105